1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <petsc/private/kernels/blockinvert.h> 5 #include <../src/mat/impls/dense/seq/dense.h> 6 #include <petscdmplex.h> 7 #include <petscblaslapack.h> 8 #include <petsc/private/sfimpl.h> 9 #include <petsc/private/dmpleximpl.h> 10 #include <petscdmda.h> 11 12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 13 14 /* if range is true, it returns B s.t. span{B} = range(A) 15 if range is false, it returns B s.t. range(B) _|_ range(A) */ 16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 17 { 18 PetscScalar *uwork, *data, *U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM, bN, lwork, lierr, di = 1; 21 PetscInt ulw, i, nr, nc, n; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 PetscCall(MatGetSize(A, &nr, &nc)); 28 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 33 PetscCall(PetscMalloc1(ulw, &uwork)); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr, nc); 39 if (!rwork) { 40 PetscCall(PetscMalloc1(n, &sing)); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 PetscCall(PetscMalloc1(nr * nr, &U)); 47 PetscCall(PetscBLASIntCast(nr, &bM)); 48 PetscCall(PetscBLASIntCast(nc, &bN)); 49 PetscCall(PetscBLASIntCast(ulw, &lwork)); 50 PetscCall(MatDenseGetArray(A, &data)); 51 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 54 #else 55 PetscCall(PetscMalloc1(5 * n, &rwork2)); 56 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 57 PetscCall(PetscFree(rwork2)); 58 #endif 59 PetscCall(PetscFPTrapPop()); 60 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 61 PetscCall(MatDenseRestoreArray(A, &data)); 62 for (i = 0; i < n; i++) 63 if (sing[i] < PETSC_SMALL) break; 64 if (!rwork) PetscCall(PetscFree(sing)); 65 if (!work) PetscCall(PetscFree(uwork)); 66 /* create B */ 67 if (!range) { 68 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 69 PetscCall(MatDenseGetArray(*B, &data)); 70 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 71 } else { 72 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 73 PetscCall(MatDenseGetArray(*B, &data)); 74 PetscCall(PetscArraycpy(data, U, i * nr)); 75 } 76 PetscCall(MatDenseRestoreArray(*B, &data)); 77 PetscCall(PetscFree(U)); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* TODO REMOVE */ 82 #if defined(PRINT_GDET) 83 static int inc = 0; 84 static int lev = 0; 85 #endif 86 87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 88 { 89 Mat GE, GEd; 90 PetscInt rsize, csize, esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 PetscCall(ISGetSize(edge, &esize)); 95 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 96 PetscCall(ISGetSize(extrow, &rsize)); 97 PetscCall(ISGetSize(extcol, &csize)); 98 99 /* gradients */ 100 ptr = work + 5 * esize; 101 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 102 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 103 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 104 PetscCall(MatDestroy(&GE)); 105 106 /* constants */ 107 ptr += rsize * csize; 108 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 109 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 110 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 111 PetscCall(MatDestroy(&GE)); 112 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 113 PetscCall(MatDestroy(&GEd)); 114 115 if (corners) { 116 Mat GEc; 117 const PetscScalar *vals; 118 PetscScalar v; 119 120 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 121 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 122 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 123 /* v = PetscAbsScalar(vals[0]); */ 124 v = 1.; 125 cvals[0] = vals[0] / v; 126 cvals[1] = vals[1] / v; 127 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 128 PetscCall(MatScale(*GKins, 1. / v)); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 134 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 135 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 136 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 137 PetscCall(MatView(GEc, viewer)); 138 PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK")); 139 PetscCall(MatView(*GKins, viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 141 PetscCall(MatView(GEd, viewer)); 142 PetscCall(PetscViewerDestroy(&viewer)); 143 } 144 #endif 145 PetscCall(MatDestroy(&GEd)); 146 PetscCall(MatDestroy(&GEc)); 147 } 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *); 152 153 PetscErrorCode PCBDDCNedelecSupport(PC pc) 154 { 155 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 156 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 157 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 160 MPI_Comm comm; 161 IS lned, primals, allprimals, nedfieldlocal, elements_corners = NULL; 162 IS *eedges, *extrows, *extcols, *alleedges; 163 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 164 PetscScalar *vals, *work; 165 PetscReal *rwork; 166 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 167 PetscInt ne, nv, Lv, order, n, field; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 187 /* Command line customization */ 188 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 189 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 190 /* print debug info and adaptive order TODO: to be removed */ 191 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 192 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 193 PetscOptionsEnd(); 194 195 /* Return if there are no edges in the decomposition */ 196 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 197 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 198 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 199 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 200 lrc[0] = PETSC_FALSE; 201 for (i = 0; i < n; i++) { 202 if (PetscRealPart(vals[i]) > 2.) { 203 lrc[0] = PETSC_TRUE; 204 break; 205 } 206 } 207 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 208 PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm)); 209 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 210 211 /* Get Nedelec field */ 212 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 213 if (pcbddc->n_ISForDofsLocal && field >= 0) { 214 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 215 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 216 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 217 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 218 ne = n; 219 nedfieldlocal = NULL; 220 global = PETSC_TRUE; 221 } else if (field == PETSC_DECIDE) { 222 PetscInt rst, ren, *idx; 223 224 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 225 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 226 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 227 for (i = rst; i < ren; i++) { 228 PetscInt nc; 229 230 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 231 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 232 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 233 } 234 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 235 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 236 PetscCall(PetscMalloc1(n, &idx)); 237 for (i = 0, ne = 0; i < n; i++) 238 if (matis->sf_leafdata[i]) idx[ne++] = i; 239 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 240 } else { 241 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 242 } 243 244 /* Sanity checks */ 245 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 246 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 247 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 248 249 /* Just set primal dofs and return */ 250 if (setprimal) { 251 IS enedfieldlocal; 252 PetscInt *eidxs; 253 254 PetscCall(PetscMalloc1(ne, &eidxs)); 255 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 256 if (nedfieldlocal) { 257 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 258 for (i = 0, cum = 0; i < ne; i++) { 259 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 260 } 261 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 262 } else { 263 for (i = 0, cum = 0; i < ne; i++) { 264 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 265 } 266 } 267 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 268 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 269 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 270 PetscCall(PetscFree(eidxs)); 271 PetscCall(ISDestroy(&nedfieldlocal)); 272 PetscCall(ISDestroy(&enedfieldlocal)); 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 /* Compute some l2g maps */ 277 if (nedfieldlocal) { 278 IS is; 279 280 /* need to map from the local Nedelec field to local numbering */ 281 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 282 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 283 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 284 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 285 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 286 if (global) { 287 PetscCall(PetscObjectReference((PetscObject)al2g)); 288 el2g = al2g; 289 } else { 290 IS gis; 291 292 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 293 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 294 PetscCall(ISDestroy(&gis)); 295 } 296 PetscCall(ISDestroy(&is)); 297 } else { 298 /* one ref for the destruction of al2g, one for el2g */ 299 PetscCall(PetscObjectReference((PetscObject)al2g)); 300 PetscCall(PetscObjectReference((PetscObject)al2g)); 301 el2g = al2g; 302 fl2g = NULL; 303 } 304 305 /* Start communication to drop connections for interior edges (for cc analysis only) */ 306 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 307 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 308 if (nedfieldlocal) { 309 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 310 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 311 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 312 } else { 313 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 314 } 315 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 316 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 317 318 /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting 319 Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */ 320 if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners)); 321 322 /* drop connections with interior edges to avoid unneeded communications and memory movements */ 323 PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view")); 324 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 325 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 326 if (global) { 327 PetscInt rst; 328 329 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 330 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 331 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 332 } 333 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 334 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 335 } else { 336 PetscInt *tbz; 337 338 PetscCall(PetscMalloc1(ne, &tbz)); 339 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 340 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 342 for (i = 0, cum = 0; i < ne; i++) 343 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 344 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 345 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 346 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 347 PetscCall(PetscFree(tbz)); 348 } 349 350 /* Extract subdomain relevant rows of G */ 351 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 352 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 353 PetscCall(MatAIJExtractRows(G, lned, &lGall)); 354 /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */ 355 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 356 PetscCall(ISDestroy(&lned)); 357 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 358 PetscCall(MatDestroy(&lGall)); 359 PetscCall(MatISGetLocalMat(lGis, &lG)); 360 if (matis->allow_repeated) { /* multi-element support */ 361 Mat *lGn, B; 362 IS *is_rows, *tcols, tmap, nmap; 363 PetscInt subnv; 364 const PetscInt *subvidxs; 365 ISLocalToGlobalMapping mapn; 366 367 PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn)); 368 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows)); 369 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols)); 370 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 371 if (fl2g) { 372 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i])); 373 } else { 374 PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i])); 375 is_rows[i] = pcbddc->local_subs[i]; 376 } 377 PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)])); 378 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn)); 379 PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs)); 381 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i])); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs)); 383 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 384 } 385 386 /* Create new MATIS with repeated vertices */ 387 PetscCall(MatCreate(comm, &B)); 388 PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N)); 389 PetscCall(MatSetType(B, MATIS)); 390 PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE)); 391 PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap)); 392 PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap)); 393 PetscCall(ISDestroy(&tmap)); 394 PetscCall(ISGetLocalSize(nmap, &subnv)); 395 PetscCall(ISGetIndices(nmap, &subvidxs)); 396 PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap)); 397 PetscCall(ISRestoreIndices(nmap, &subvidxs)); 398 PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn)); 399 PetscCall(ISDestroy(&tmap)); 400 PetscCall(ISDestroy(&nmap)); 401 PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn)); 402 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 403 PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG)); 404 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 405 PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)])); 406 PetscCall(ISDestroy(&is_rows[i])); 407 PetscCall(ISDestroy(&tcols[i])); 408 } 409 PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG)); 410 PetscCall(PetscFree(lGn)); 411 PetscCall(PetscFree(is_rows)); 412 PetscCall(PetscFree(tcols)); 413 PetscCall(MatISSetLocalMat(B, lG)); 414 PetscCall(MatDestroy(&lG)); 415 416 PetscCall(MatDestroy(&lGis)); 417 lGis = B; 418 419 lGis->assembled = PETSC_TRUE; 420 } 421 PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view")); 422 423 /* SF for nodal dofs communications */ 424 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 425 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 426 PetscCall(PetscObjectReference((PetscObject)vl2g)); 427 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 428 PetscCall(PetscSFCreate(comm, &sfv)); 429 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 430 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 431 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 432 433 if (elements_corners) { 434 IS tmp; 435 Vec global, local; 436 Mat_IS *tGis = (Mat_IS *)lGis->data; 437 438 PetscCall(MatCreateVecs(lGis, &global, NULL)); 439 PetscCall(MatCreateVecs(tGis->A, &local, NULL)); 440 PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp)); 441 PetscCall(VecDestroy(&global)); 442 PetscCall(VecDestroy(&local)); 443 elements_corners = tmp; 444 } 445 446 /* Destroy temporary G */ 447 PetscCall(MatISGetLocalMat(lGis, &lG)); 448 PetscCall(PetscObjectReference((PetscObject)lG)); 449 PetscCall(MatDestroy(&G)); 450 PetscCall(MatDestroy(&lGis)); 451 452 if (print) { 453 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 454 PetscCall(MatView(lG, NULL)); 455 } 456 457 /* Save lG for values insertion in change of basis */ 458 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 459 460 /* Analyze the edge-nodes connections (duplicate lG) */ 461 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 462 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 463 PetscCall(PetscBTCreate(nv, &btv)); 464 PetscCall(PetscBTCreate(ne, &bte)); 465 PetscCall(PetscBTCreate(ne, &btb)); 466 PetscCall(PetscBTCreate(ne, &btbd)); 467 /* need to import the boundary specification to ensure the 468 proper detection of coarse edges' endpoints */ 469 if (pcbddc->DirichletBoundariesLocal) { 470 IS is; 471 472 if (fl2g) { 473 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 474 } else { 475 is = pcbddc->DirichletBoundariesLocal; 476 } 477 PetscCall(ISGetLocalSize(is, &cum)); 478 PetscCall(ISGetIndices(is, &idxs)); 479 for (i = 0; i < cum; i++) { 480 if (idxs[i] >= 0 && idxs[i] < ne) { 481 PetscCall(PetscBTSet(btb, idxs[i])); 482 PetscCall(PetscBTSet(btbd, idxs[i])); 483 } 484 } 485 PetscCall(ISRestoreIndices(is, &idxs)); 486 if (fl2g) PetscCall(ISDestroy(&is)); 487 } 488 if (pcbddc->NeumannBoundariesLocal) { 489 IS is; 490 491 if (fl2g) { 492 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 493 } else { 494 is = pcbddc->NeumannBoundariesLocal; 495 } 496 PetscCall(ISGetLocalSize(is, &cum)); 497 PetscCall(ISGetIndices(is, &idxs)); 498 for (i = 0; i < cum; i++) { 499 if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i])); 500 } 501 PetscCall(ISRestoreIndices(is, &idxs)); 502 if (fl2g) PetscCall(ISDestroy(&is)); 503 } 504 505 /* Count neighs per dof */ 506 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL)); 507 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL)); 508 509 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 510 for proper detection of coarse edges' endpoints */ 511 PetscCall(PetscBTCreate(ne, &btee)); 512 for (i = 0; i < ne; i++) { 513 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 514 } 515 PetscCall(PetscMalloc1(ne, &marks)); 516 if (!conforming) { 517 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 518 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 519 } 520 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 521 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 522 cum = 0; 523 for (i = 0; i < ne; i++) { 524 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 525 if (!PetscBTLookup(btee, i)) { 526 marks[cum++] = i; 527 continue; 528 } 529 /* set badly connected edge dofs as primal */ 530 if (!conforming) { 531 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 532 marks[cum++] = i; 533 PetscCall(PetscBTSet(bte, i)); 534 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 535 } else { 536 /* every edge dofs should be connected through a certain number of nodal dofs 537 to other edge dofs belonging to coarse edges 538 - at most 2 endpoints 539 - order-1 interior nodal dofs 540 - no undefined nodal dofs (nconn < order) 541 */ 542 PetscInt ends = 0, ints = 0, undef = 0; 543 for (j = ii[i]; j < ii[i + 1]; j++) { 544 PetscInt v = jj[j], k; 545 PetscInt nconn = iit[v + 1] - iit[v]; 546 for (k = iit[v]; k < iit[v + 1]; k++) 547 if (!PetscBTLookup(btee, jjt[k])) nconn--; 548 if (nconn > order) ends++; 549 else if (nconn == order) ints++; 550 else undef++; 551 } 552 if (undef || ends > 2 || ints != order - 1) { 553 marks[cum++] = i; 554 PetscCall(PetscBTSet(bte, i)); 555 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 556 } 557 } 558 } 559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 560 if (!order && ii[i + 1] != ii[i]) { 561 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 562 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 563 } 564 } 565 PetscCall(PetscBTDestroy(&btee)); 566 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 567 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 568 if (!conforming) { 569 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 570 PetscCall(MatDestroy(&lGt)); 571 } 572 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 573 574 /* identify splitpoints and corner candidates */ 575 PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots)); 576 PetscCall(PetscBTCreate(nv, &btvcand)); 577 if (elements_corners) { 578 PetscCall(ISGetLocalSize(elements_corners, &cum)); 579 PetscCall(ISGetIndices(elements_corners, &idxs)); 580 for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i])); 581 PetscCall(ISRestoreIndices(elements_corners, &idxs)); 582 } 583 584 if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */ 585 PetscSF emlsf, vmlsf; 586 PetscInt *eleaves, *vleaves, *meleaves, *mvleaves; 587 PetscInt cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl; 588 589 PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs)); 590 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 591 PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs)); 592 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 593 594 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf)); 595 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf)); 596 597 PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL)); 598 for (i = 0, j = 0; i < ne; i++) j += ecount[i]; 599 PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne); 600 PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j); 601 602 PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL)); 603 for (i = 0, j = 0; i < nv; i++) j += vcount[i]; 604 PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv); 605 PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j); 606 607 PetscCall(PetscMalloc1(ne, &eleaves)); 608 PetscCall(PetscMalloc1(nv, &vleaves)); 609 for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX; 610 for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX; 611 PetscCall(PetscMalloc1(emnl, &meleaves)); 612 PetscCall(PetscMalloc1(vmnl, &mvleaves)); 613 614 PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm)); 615 PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 616 for (i = 0; i < n_subs; i++) { 617 const PetscInt *idxs; 618 const PetscInt subid = cum_subs + i; 619 PetscInt ns; 620 621 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns)); 622 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 623 for (j = 0; j < ns; j++) { 624 const PetscInt e = idxs[j]; 625 626 eleaves[e] = subid; 627 for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid; 628 } 629 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 630 } 631 PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 632 PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 633 PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 634 PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 635 PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 636 PetscCall(PetscFree(eleaves)); 637 PetscCall(PetscFree(vleaves)); 638 639 PetscCall(PetscMalloc1(ne + 1, &eneighs)); 640 eneighs[0] = meleaves; 641 for (i = 0; i < ne; i++) { 642 PetscCall(PetscSortInt(ecount[i], eneighs[i])); 643 eneighs[i + 1] = eneighs[i] + ecount[i]; 644 } 645 PetscCall(PetscMalloc1(nv + 1, &vneighs)); 646 vneighs[0] = mvleaves; 647 for (i = 0; i < nv; i++) { 648 PetscCall(PetscSortInt(vcount[i], vneighs[i])); 649 vneighs[i + 1] = vneighs[i] + vcount[i]; 650 } 651 } else { 652 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs)); 653 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs)); 654 } 655 656 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 657 if (print) { 658 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 659 PetscCall(MatView(lGe, NULL)); 660 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 661 PetscCall(MatView(lGt, NULL)); 662 } 663 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 664 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 665 for (i = 0; i < nv; i++) { 666 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 667 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 668 if (!order) { /* variable order */ 669 PetscReal vorder = 0.; 670 671 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 672 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 673 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 674 ord = 1; 675 } 676 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 677 const PetscInt e = jj[j]; 678 679 if (PetscBTLookup(btbd, e)) { 680 bdir = PETSC_TRUE; 681 break; 682 } 683 if (vc != ecount[e]) { 684 sneighs = PETSC_FALSE; 685 } else { 686 const PetscInt *vn = vneighs[i], *en = eneighs[e]; 687 688 for (PetscInt k = 0; k < vc; k++) { 689 if (vn[k] != en[k]) { 690 sneighs = PETSC_FALSE; 691 break; 692 } 693 } 694 } 695 } 696 if (elements_corners) test = 0; 697 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 698 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 699 PetscCall(PetscBTSet(btv, i)); 700 } else if (test == ord) { 701 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 702 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 703 PetscCall(PetscBTSet(btv, i)); 704 } else if (!elements_corners) { 705 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 706 PetscCall(PetscBTSet(btvcand, i)); 707 } 708 } 709 } 710 PetscCall(PetscBTDestroy(&btbd)); 711 712 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 713 if (order != 1) { 714 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 715 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 716 for (i = 0; i < nv; i++) { 717 if (PetscBTLookup(btvcand, i)) { 718 PetscBool found = PETSC_FALSE; 719 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 720 PetscInt k, e = jj[j]; 721 if (PetscBTLookup(bte, e)) continue; 722 for (k = iit[e]; k < iit[e + 1]; k++) { 723 PetscInt v = jjt[k]; 724 if (v != i && PetscBTLookup(btvcand, v)) { 725 found = PETSC_TRUE; 726 break; 727 } 728 } 729 } 730 if (!found) { 731 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 732 PetscCall(PetscBTClear(btvcand, i)); 733 } else { 734 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 735 } 736 } 737 } 738 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 739 } 740 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 741 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 742 PetscCall(MatDestroy(&lGe)); 743 744 /* Get the local G^T explicitly */ 745 PetscCall(MatDestroy(&lGt)); 746 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 747 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 748 749 /* Mark shared nodal dofs */ 750 PetscCall(PetscBTCreate(nv, &btvi)); 751 for (i = 0; i < nv; i++) { 752 if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i)); 753 } 754 755 if (matis->allow_repeated) { 756 PetscCall(PetscFree(eneighs[0])); 757 PetscCall(PetscFree(vneighs[0])); 758 PetscCall(PetscFree(eneighs)); 759 PetscCall(PetscFree(vneighs)); 760 } 761 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 762 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 763 764 /* communicate corners and splitpoints */ 765 PetscCall(PetscMalloc1(nv, &vmarks)); 766 PetscCall(PetscArrayzero(sfvleaves, nv)); 767 PetscCall(PetscArrayzero(sfvroots, Lv)); 768 for (i = 0; i < nv; i++) 769 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 770 771 if (print) { 772 IS tbz; 773 774 cum = 0; 775 for (i = 0; i < nv; i++) 776 if (sfvleaves[i]) vmarks[cum++] = i; 777 778 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 779 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 780 PetscCall(ISView(tbz, NULL)); 781 PetscCall(ISDestroy(&tbz)); 782 } 783 784 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 785 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 786 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 787 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 788 789 /* Zero rows of lGt corresponding to identified corners 790 and interior nodal dofs */ 791 cum = 0; 792 for (i = 0; i < nv; i++) { 793 if (sfvleaves[i]) { 794 vmarks[cum++] = i; 795 PetscCall(PetscBTSet(btv, i)); 796 } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 797 } 798 PetscCall(PetscBTDestroy(&btvi)); 799 if (print) { 800 IS tbz; 801 802 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 803 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 804 PetscCall(ISView(tbz, NULL)); 805 PetscCall(ISDestroy(&tbz)); 806 } 807 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 808 PetscCall(PetscFree(vmarks)); 809 PetscCall(PetscSFDestroy(&sfv)); 810 PetscCall(PetscFree2(sfvleaves, sfvroots)); 811 812 /* Recompute G */ 813 PetscCall(MatDestroy(&lG)); 814 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 815 if (print) { 816 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 817 PetscCall(MatView(lG, NULL)); 818 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 819 PetscCall(MatView(lGt, NULL)); 820 } 821 822 /* Get primal dofs (if any) */ 823 cum = 0; 824 for (i = 0; i < ne; i++) { 825 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 826 } 827 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 828 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 829 if (print) { 830 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 831 PetscCall(ISView(primals, NULL)); 832 } 833 PetscCall(PetscBTDestroy(&bte)); 834 /* TODO: what if the user passed in some of them ? */ 835 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 836 PetscCall(ISDestroy(&primals)); 837 838 /* Compute edge connectivity */ 839 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 840 841 /* Symbolic conn = lG*lGt */ 842 if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */ 843 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 844 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 845 PetscCall(MatProductSetAlgorithm(conn, "default")); 846 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 847 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 848 PetscCall(MatProductSetFromOptions(conn)); 849 PetscCall(MatProductSymbolic(conn)); 850 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 851 if (fl2g) { 852 PetscBT btf; 853 PetscInt *iia, *jja, *iiu, *jju; 854 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 855 856 /* create CSR for all local dofs */ 857 PetscCall(PetscMalloc1(n + 1, &iia)); 858 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 859 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 860 iiu = pcbddc->mat_graph->xadj; 861 jju = pcbddc->mat_graph->adjncy; 862 } else if (pcbddc->use_local_adj) { 863 rest = PETSC_TRUE; 864 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 865 } else { 866 free = PETSC_TRUE; 867 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 868 iiu[0] = 0; 869 for (i = 0; i < n; i++) { 870 iiu[i + 1] = i + 1; 871 jju[i] = -1; 872 } 873 } 874 875 /* import sizes of CSR */ 876 iia[0] = 0; 877 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 878 879 /* overwrite entries corresponding to the Nedelec field */ 880 PetscCall(PetscBTCreate(n, &btf)); 881 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 882 for (i = 0; i < ne; i++) { 883 PetscCall(PetscBTSet(btf, idxs[i])); 884 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 885 } 886 887 /* iia in CSR */ 888 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 889 890 /* jja in CSR */ 891 PetscCall(PetscMalloc1(iia[n], &jja)); 892 for (i = 0; i < n; i++) 893 if (!PetscBTLookup(btf, i)) 894 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 895 896 /* map edge dofs connectivity */ 897 if (jj) { 898 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 899 for (i = 0; i < ne; i++) { 900 PetscInt e = idxs[i]; 901 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 902 } 903 } 904 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 905 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES)); 906 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 907 if (free) PetscCall(PetscFree2(iiu, jju)); 908 PetscCall(PetscBTDestroy(&btf)); 909 } else { 910 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES)); 911 } 912 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 913 PetscCall(MatDestroy(&conn)); 914 } 915 916 /* Analyze interface for edge dofs */ 917 PetscCall(PCBDDCAnalyzeInterface(pc)); 918 pcbddc->mat_graph->twodim = PETSC_FALSE; 919 920 /* Get coarse edges in the edge space */ 921 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 922 923 if (fl2g) { 924 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 925 PetscCall(PetscMalloc1(nee, &eedges)); 926 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 927 } else { 928 eedges = alleedges; 929 primals = allprimals; 930 } 931 932 /* Mark fine edge dofs with their coarse edge id */ 933 PetscCall(PetscArrayzero(marks, ne)); 934 PetscCall(ISGetLocalSize(primals, &cum)); 935 PetscCall(ISGetIndices(primals, &idxs)); 936 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 937 PetscCall(ISRestoreIndices(primals, &idxs)); 938 if (print) { 939 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 940 PetscCall(ISView(primals, NULL)); 941 } 942 943 maxsize = 0; 944 for (i = 0; i < nee; i++) { 945 PetscInt size, mark = i + 1; 946 947 PetscCall(ISGetLocalSize(eedges[i], &size)); 948 PetscCall(ISGetIndices(eedges[i], &idxs)); 949 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 950 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 951 maxsize = PetscMax(maxsize, size); 952 } 953 954 /* Find coarse edge endpoints */ 955 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 956 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 957 for (i = 0; i < nee; i++) { 958 PetscInt mark = i + 1, size; 959 960 PetscCall(ISGetLocalSize(eedges[i], &size)); 961 if (!size && nedfieldlocal) continue; 962 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 963 PetscCall(ISGetIndices(eedges[i], &idxs)); 964 if (print) { 965 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 966 PetscCall(ISView(eedges[i], NULL)); 967 } 968 for (j = 0; j < size; j++) { 969 PetscInt k, ee = idxs[j]; 970 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 971 for (k = ii[ee]; k < ii[ee + 1]; k++) { 972 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 973 if (PetscBTLookup(btv, jj[k])) { 974 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 975 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 976 PetscInt k2; 977 PetscBool corner = PETSC_FALSE; 978 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 979 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 980 /* it's a corner if either is connected with an edge dof belonging to a different cc or 981 if the edge dof lie on the natural part of the boundary */ 982 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 983 corner = PETSC_TRUE; 984 break; 985 } 986 } 987 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 988 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 989 PetscCall(PetscBTSet(btv, jj[k])); 990 } else { 991 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 992 } 993 } 994 } 995 } 996 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 997 } 998 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 999 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1000 PetscCall(PetscBTDestroy(&btb)); 1001 1002 /* Reset marked primal dofs */ 1003 PetscCall(ISGetLocalSize(primals, &cum)); 1004 PetscCall(ISGetIndices(primals, &idxs)); 1005 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 1006 PetscCall(ISRestoreIndices(primals, &idxs)); 1007 1008 /* Now use the initial lG */ 1009 PetscCall(MatDestroy(&lG)); 1010 PetscCall(MatDestroy(&lGt)); 1011 lG = lGinit; 1012 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 1013 1014 /* Compute extended cols indices */ 1015 PetscCall(PetscBTCreate(nv, &btvc)); 1016 PetscCall(PetscBTCreate(nee, &bter)); 1017 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1018 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 1019 i *= maxsize; 1020 PetscCall(PetscCalloc1(nee, &extcols)); 1021 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 1022 eerr = PETSC_FALSE; 1023 for (i = 0; i < nee; i++) { 1024 PetscInt size, found = 0; 1025 1026 cum = 0; 1027 PetscCall(ISGetLocalSize(eedges[i], &size)); 1028 if (!size && nedfieldlocal) continue; 1029 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1030 PetscCall(ISGetIndices(eedges[i], &idxs)); 1031 PetscCall(PetscBTMemzero(nv, btvc)); 1032 for (j = 0; j < size; j++) { 1033 PetscInt k, ee = idxs[j]; 1034 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1035 PetscInt vv = jj[k]; 1036 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 1037 else if (!PetscBTLookupSet(btvc, vv)) found++; 1038 } 1039 } 1040 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1041 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1042 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1043 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1044 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1045 /* it may happen that endpoints are not defined at this point 1046 if it is the case, mark this edge for a second pass */ 1047 if (cum != size - 1 || found != 2) { 1048 PetscCall(PetscBTSet(bter, i)); 1049 if (print) { 1050 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 1051 PetscCall(ISView(eedges[i], NULL)); 1052 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 1053 PetscCall(ISView(extcols[i], NULL)); 1054 } 1055 eerr = PETSC_TRUE; 1056 } 1057 } 1058 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 1059 PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm)); 1060 if (done) { 1061 PetscInt *newprimals; 1062 1063 PetscCall(PetscMalloc1(ne, &newprimals)); 1064 PetscCall(ISGetLocalSize(primals, &cum)); 1065 PetscCall(ISGetIndices(primals, &idxs)); 1066 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 1067 PetscCall(ISRestoreIndices(primals, &idxs)); 1068 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1069 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 1070 for (i = 0; i < nee; i++) { 1071 PetscBool has_candidates = PETSC_FALSE; 1072 if (PetscBTLookup(bter, i)) { 1073 PetscInt size, mark = i + 1; 1074 1075 PetscCall(ISGetLocalSize(eedges[i], &size)); 1076 PetscCall(ISGetIndices(eedges[i], &idxs)); 1077 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1078 for (j = 0; j < size; j++) { 1079 PetscInt k, ee = idxs[j]; 1080 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 1081 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1082 /* set all candidates located on the edge as corners */ 1083 if (PetscBTLookup(btvcand, jj[k])) { 1084 PetscInt k2, vv = jj[k]; 1085 has_candidates = PETSC_TRUE; 1086 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 1087 PetscCall(PetscBTSet(btv, vv)); 1088 /* set all edge dofs connected to candidate as primals */ 1089 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 1090 if (marks[jjt[k2]] == mark) { 1091 PetscInt k3, ee2 = jjt[k2]; 1092 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 1093 newprimals[cum++] = ee2; 1094 /* finally set the new corners */ 1095 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 1096 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 1097 PetscCall(PetscBTSet(btv, jj[k3])); 1098 } 1099 } 1100 } 1101 } else { 1102 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 1103 } 1104 } 1105 } 1106 if (!has_candidates) { /* circular edge */ 1107 PetscInt k, ee = idxs[0], *tmarks; 1108 1109 PetscCall(PetscCalloc1(ne, &tmarks)); 1110 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 1111 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1112 PetscInt k2; 1113 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 1114 PetscCall(PetscBTSet(btv, jj[k])); 1115 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 1116 } 1117 for (j = 0; j < size; j++) { 1118 if (tmarks[idxs[j]] > 1) { 1119 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 1120 newprimals[cum++] = idxs[j]; 1121 } 1122 } 1123 PetscCall(PetscFree(tmarks)); 1124 } 1125 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1126 } 1127 PetscCall(ISDestroy(&extcols[i])); 1128 } 1129 PetscCall(PetscFree(extcols)); 1130 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1131 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 1132 if (fl2g) { 1133 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 1134 PetscCall(ISDestroy(&primals)); 1135 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1136 PetscCall(PetscFree(eedges)); 1137 } 1138 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1139 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 1140 PetscCall(PetscFree(newprimals)); 1141 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 1142 PetscCall(ISDestroy(&primals)); 1143 PetscCall(PCBDDCAnalyzeInterface(pc)); 1144 pcbddc->mat_graph->twodim = PETSC_FALSE; 1145 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1146 if (fl2g) { 1147 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 1148 PetscCall(PetscMalloc1(nee, &eedges)); 1149 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 1150 } else { 1151 eedges = alleedges; 1152 primals = allprimals; 1153 } 1154 PetscCall(PetscCalloc1(nee, &extcols)); 1155 1156 /* Mark again */ 1157 PetscCall(PetscArrayzero(marks, ne)); 1158 for (i = 0; i < nee; i++) { 1159 PetscInt size, mark = i + 1; 1160 1161 PetscCall(ISGetLocalSize(eedges[i], &size)); 1162 PetscCall(ISGetIndices(eedges[i], &idxs)); 1163 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1164 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1165 } 1166 if (print) { 1167 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1168 PetscCall(ISView(primals, NULL)); 1169 } 1170 1171 /* Recompute extended cols */ 1172 eerr = PETSC_FALSE; 1173 for (i = 0; i < nee; i++) { 1174 PetscInt size; 1175 1176 cum = 0; 1177 PetscCall(ISGetLocalSize(eedges[i], &size)); 1178 if (!size && nedfieldlocal) continue; 1179 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1180 PetscCall(ISGetIndices(eedges[i], &idxs)); 1181 for (j = 0; j < size; j++) { 1182 PetscInt k, ee = idxs[j]; 1183 for (k = ii[ee]; k < ii[ee + 1]; k++) 1184 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1185 } 1186 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1187 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1188 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1189 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1190 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1191 if (cum != size - 1) { 1192 if (print) { 1193 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1194 PetscCall(ISView(eedges[i], NULL)); 1195 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1196 PetscCall(ISView(extcols[i], NULL)); 1197 } 1198 eerr = PETSC_TRUE; 1199 } 1200 } 1201 } 1202 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1203 PetscCall(PetscFree2(extrow, gidxs)); 1204 PetscCall(PetscBTDestroy(&bter)); 1205 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1206 /* an error should not occur at this point */ 1207 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1208 1209 /* Check the number of endpoints */ 1210 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1211 PetscCall(PetscMalloc1(2 * nee, &corners)); 1212 PetscCall(PetscMalloc1(nee, &cedges)); 1213 for (i = 0; i < nee; i++) { 1214 PetscInt size, found = 0, gc[2]; 1215 1216 /* init with defaults */ 1217 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1218 PetscCall(ISGetLocalSize(eedges[i], &size)); 1219 if (!size && nedfieldlocal) continue; 1220 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1221 PetscCall(ISGetIndices(eedges[i], &idxs)); 1222 PetscCall(PetscBTMemzero(nv, btvc)); 1223 for (j = 0; j < size; j++) { 1224 PetscInt k, ee = idxs[j]; 1225 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1226 PetscInt vv = jj[k]; 1227 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1228 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i); 1229 corners[i * 2 + found++] = vv; 1230 } 1231 } 1232 } 1233 if (found != 2) { 1234 PetscInt e; 1235 if (fl2g) { 1236 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1237 } else { 1238 e = idxs[0]; 1239 } 1240 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1241 } 1242 1243 /* get primal dof index on this coarse edge */ 1244 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1245 if (gc[0] > gc[1]) { 1246 PetscInt swap = corners[2 * i]; 1247 corners[2 * i] = corners[2 * i + 1]; 1248 corners[2 * i + 1] = swap; 1249 } 1250 cedges[i] = idxs[size - 1]; 1251 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1252 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1253 } 1254 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1255 PetscCall(PetscBTDestroy(&btvc)); 1256 1257 if (PetscDefined(USE_DEBUG)) { 1258 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1259 not interfere with neighbouring coarse edges */ 1260 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1261 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1262 for (i = 0; i < nv; i++) { 1263 PetscInt emax = 0, eemax = 0; 1264 1265 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1266 PetscCall(PetscArrayzero(emarks, nee + 1)); 1267 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1268 for (j = 1; j < nee + 1; j++) { 1269 if (emax < emarks[j]) { 1270 emax = emarks[j]; 1271 eemax = j; 1272 } 1273 } 1274 /* not relevant for edges */ 1275 if (!eemax) continue; 1276 1277 for (j = ii[i]; j < ii[i + 1]; j++) { 1278 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1279 } 1280 } 1281 PetscCall(PetscFree(emarks)); 1282 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1283 } 1284 1285 /* Compute extended rows indices for edge blocks of the change of basis */ 1286 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1287 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1288 extmem *= maxsize; 1289 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1290 PetscCall(PetscMalloc1(nee, &extrows)); 1291 PetscCall(PetscCalloc1(nee, &extrowcum)); 1292 for (i = 0; i < nv; i++) { 1293 PetscInt mark = 0, size, start; 1294 1295 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1296 for (j = ii[i]; j < ii[i + 1]; j++) 1297 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1298 1299 /* not relevant */ 1300 if (!mark) continue; 1301 1302 /* import extended row */ 1303 mark--; 1304 start = mark * extmem + extrowcum[mark]; 1305 size = ii[i + 1] - ii[i]; 1306 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1307 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1308 extrowcum[mark] += size; 1309 } 1310 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1311 PetscCall(MatDestroy(&lGt)); 1312 PetscCall(PetscFree(marks)); 1313 1314 /* Compress extrows */ 1315 cum = 0; 1316 for (i = 0; i < nee; i++) { 1317 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1318 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1319 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1320 cum = PetscMax(cum, size); 1321 } 1322 PetscCall(PetscFree(extrowcum)); 1323 PetscCall(PetscBTDestroy(&btv)); 1324 PetscCall(PetscBTDestroy(&btvcand)); 1325 1326 /* Workspace for lapack inner calls and VecSetValues */ 1327 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1328 1329 /* Create change of basis matrix (no preallocation) */ 1330 PetscCall(MatCreate(comm, &T)); 1331 PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap)); 1332 PetscCall(MatSetType(T, MATAIJ)); 1333 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1334 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1335 PetscCall(MatSetOption(T, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 1336 //PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL)); 1337 //PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL)); 1338 //PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1339 1340 /* Defaults to identity */ 1341 { 1342 Vec w; 1343 const PetscScalar *wa; 1344 1345 PetscCall(MatCreateVecs(T, &w, NULL)); 1346 PetscCall(VecSetLocalToGlobalMapping(w, al2g)); 1347 PetscCall(VecSet(w, 1.0)); 1348 for (i = 0; i < nee; i++) { 1349 const PetscInt *idxs; 1350 PetscInt nl; 1351 1352 PetscCall(ISGetLocalSize(eedges[i], &nl)); 1353 PetscCall(ISGetIndices(eedges[i], &idxs)); 1354 PetscCall(VecSetValuesLocal(w, nl, idxs, NULL, INSERT_VALUES)); 1355 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1356 } 1357 PetscCall(VecAssemblyBegin(w)); 1358 PetscCall(VecAssemblyEnd(w)); 1359 PetscCall(VecGetArrayRead(w, &wa)); 1360 for (i = T->rmap->rstart; i < T->rmap->rend; i++) 1361 if (PetscAbsScalar(wa[i - T->rmap->rstart])) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES)); 1362 PetscCall(VecRestoreArrayRead(w, &wa)); 1363 PetscCall(VecDestroy(&w)); 1364 } 1365 1366 /* Create discrete gradient for the coarser level if needed */ 1367 PetscCall(MatDestroy(&pcbddc->nedcG)); 1368 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1369 if (pcbddc->current_level < pcbddc->max_levels) { 1370 ISLocalToGlobalMapping cel2g, cvl2g; 1371 IS wis, gwis; 1372 PetscInt cnv, cne; 1373 1374 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1375 if (fl2g) { 1376 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1377 } else { 1378 PetscCall(PetscObjectReference((PetscObject)wis)); 1379 pcbddc->nedclocal = wis; 1380 } 1381 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1382 PetscCall(ISDestroy(&wis)); 1383 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1384 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1385 PetscCall(ISDestroy(&wis)); 1386 PetscCall(ISDestroy(&gwis)); 1387 1388 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1389 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1390 PetscCall(ISDestroy(&wis)); 1391 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1392 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1393 PetscCall(ISDestroy(&wis)); 1394 PetscCall(ISDestroy(&gwis)); 1395 1396 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1397 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1398 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1399 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1400 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1401 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1402 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1403 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1404 } 1405 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1406 1407 #if defined(PRINT_GDET) 1408 inc = 0; 1409 lev = pcbddc->current_level; 1410 #endif 1411 1412 /* Insert values in the change of basis matrix */ 1413 for (i = 0; i < nee; i++) { 1414 Mat Gins = NULL, GKins = NULL; 1415 IS cornersis = NULL; 1416 PetscScalar cvals[2]; 1417 1418 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1419 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1420 if (Gins && GKins) { 1421 const PetscScalar *data; 1422 const PetscInt *rows, *cols; 1423 PetscInt nrh, nch, nrc, ncc; 1424 1425 PetscCall(ISGetIndices(eedges[i], &cols)); 1426 /* H1 */ 1427 PetscCall(ISGetIndices(extrows[i], &rows)); 1428 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1429 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1430 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1431 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1432 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1433 /* complement */ 1434 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1435 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1436 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1437 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1438 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1439 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1440 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1441 1442 /* coarse discrete gradient */ 1443 if (pcbddc->nedcG) { 1444 PetscInt cols[2]; 1445 1446 cols[0] = 2 * i; 1447 cols[1] = 2 * i + 1; 1448 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1449 } 1450 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1451 } 1452 PetscCall(ISDestroy(&extrows[i])); 1453 PetscCall(ISDestroy(&extcols[i])); 1454 PetscCall(ISDestroy(&cornersis)); 1455 PetscCall(MatDestroy(&Gins)); 1456 PetscCall(MatDestroy(&GKins)); 1457 } 1458 1459 /* for FDM element-by-element: first dof on the edge only constraint. Why? */ 1460 if (elements_corners && pcbddc->mat_graph->multi_element) { 1461 MatNullSpace nnsp; 1462 Vec quad_vec; 1463 1464 PetscCall(MatCreateVecs(pc->pmat, &quad_vec, NULL)); 1465 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, 1, &quad_vec, &nnsp)); 1466 PetscCall(VecLockReadPop(quad_vec)); 1467 PetscCall(VecSetLocalToGlobalMapping(quad_vec, al2g)); 1468 for (i = 0; i < nee; i++) { 1469 const PetscInt *idxs; 1470 PetscScalar one = 1.0; 1471 1472 PetscCall(ISGetLocalSize(eedges[i], &cum)); 1473 if (!cum) continue; 1474 PetscCall(ISGetIndices(eedges[i], &idxs)); 1475 PetscCall(VecSetValuesLocal(quad_vec, 1, idxs, &one, INSERT_VALUES)); 1476 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1477 } 1478 PetscCall(VecLockReadPush(quad_vec)); 1479 PetscCall(VecDestroy(&quad_vec)); 1480 PetscCall(MatSetNearNullSpace(pc->pmat, nnsp)); 1481 PetscCall(MatNullSpaceDestroy(&nnsp)); 1482 } 1483 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1484 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1485 1486 /* Start assembling */ 1487 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1488 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1489 1490 /* Free */ 1491 if (fl2g) { 1492 PetscCall(ISDestroy(&primals)); 1493 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1494 PetscCall(PetscFree(eedges)); 1495 } 1496 1497 /* hack mat_graph with primal dofs on the coarse edges */ 1498 { 1499 PCBDDCGraph graph = pcbddc->mat_graph; 1500 PetscInt *oqueue = graph->queue; 1501 PetscInt *ocptr = graph->cptr; 1502 PetscInt ncc, *idxs; 1503 1504 /* find first primal edge */ 1505 if (pcbddc->nedclocal) { 1506 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1507 } else { 1508 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1509 idxs = cedges; 1510 } 1511 cum = 0; 1512 while (cum < nee && cedges[cum] < 0) cum++; 1513 1514 /* adapt connected components */ 1515 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1516 graph->cptr[0] = 0; 1517 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1518 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1519 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1520 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1521 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1522 ncc++; 1523 lc--; 1524 cum++; 1525 while (cum < nee && cedges[cum] < 0) cum++; 1526 } 1527 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1528 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1529 ncc++; 1530 } 1531 graph->ncc = ncc; 1532 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1533 PetscCall(PetscFree2(ocptr, oqueue)); 1534 } 1535 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1536 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1537 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1538 1539 PetscCall(ISDestroy(&nedfieldlocal)); 1540 PetscCall(PetscFree(extrow)); 1541 PetscCall(PetscFree2(work, rwork)); 1542 PetscCall(PetscFree(corners)); 1543 PetscCall(PetscFree(cedges)); 1544 PetscCall(PetscFree(extrows)); 1545 PetscCall(PetscFree(extcols)); 1546 PetscCall(MatDestroy(&lG)); 1547 1548 /* Complete assembling */ 1549 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1550 PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view")); 1551 if (pcbddc->nedcG) { 1552 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1553 PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view")); 1554 } 1555 1556 PetscCall(ISDestroy(&elements_corners)); 1557 1558 /* set change of basis */ 1559 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE)); 1560 PetscCall(MatDestroy(&T)); 1561 PetscFunctionReturn(PETSC_SUCCESS); 1562 } 1563 1564 /* the near-null space of BDDC carries information on quadrature weights, 1565 and these can be collinear -> so cheat with MatNullSpaceCreate 1566 and create a suitable set of basis vectors first */ 1567 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1568 { 1569 PetscInt i; 1570 1571 PetscFunctionBegin; 1572 for (i = 0; i < nvecs; i++) { 1573 PetscInt first, last; 1574 1575 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1576 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1577 if (i >= first && i < last) { 1578 PetscScalar *data; 1579 PetscCall(VecGetArray(quad_vecs[i], &data)); 1580 if (!has_const) { 1581 data[i - first] = 1.; 1582 } else { 1583 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1584 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1585 } 1586 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1587 } 1588 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1589 } 1590 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1591 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1592 PetscInt first, last; 1593 PetscCall(VecLockReadPop(quad_vecs[i])); 1594 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1595 if (i >= first && i < last) { 1596 PetscScalar *data; 1597 PetscCall(VecGetArray(quad_vecs[i], &data)); 1598 if (!has_const) { 1599 data[i - first] = 0.; 1600 } else { 1601 data[2 * i - first] = 0.; 1602 data[2 * i - first + 1] = 0.; 1603 } 1604 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1605 } 1606 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1607 PetscCall(VecLockReadPush(quad_vecs[i])); 1608 } 1609 PetscFunctionReturn(PETSC_SUCCESS); 1610 } 1611 1612 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1613 { 1614 Mat loc_divudotp; 1615 Vec p, v, quad_vec; 1616 ISLocalToGlobalMapping map; 1617 PetscScalar *array; 1618 1619 PetscFunctionBegin; 1620 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1621 if (!transpose) { 1622 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1623 } else { 1624 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1625 } 1626 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp)); 1627 PetscCall(VecLockReadPop(quad_vec)); 1628 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1629 1630 /* compute local quad vec */ 1631 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1632 if (!transpose) { 1633 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1634 } else { 1635 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1636 } 1637 /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */ 1638 PetscCall(VecSet(p, 1.)); 1639 if (!transpose) { 1640 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1641 } else { 1642 PetscCall(MatMult(loc_divudotp, p, v)); 1643 } 1644 PetscCall(VecDestroy(&p)); 1645 if (vl2l) { 1646 Mat lA; 1647 VecScatter sc; 1648 Vec vins; 1649 1650 PetscCall(MatISGetLocalMat(A, &lA)); 1651 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1652 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1653 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1654 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1655 PetscCall(VecScatterDestroy(&sc)); 1656 PetscCall(VecDestroy(&v)); 1657 v = vins; 1658 } 1659 1660 /* mask summation of interface values */ 1661 PetscInt n, *mmask, *mask, *idxs, nmr, nr; 1662 const PetscInt *degree; 1663 PetscSF msf; 1664 1665 PetscCall(VecGetLocalSize(v, &n)); 1666 PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL)); 1667 PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf)); 1668 PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL)); 1669 PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs)); 1670 PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, °ree)); 1671 PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, °ree)); 1672 for (PetscInt i = 0, c = 0; i < nr; i++) { 1673 mmask[c] = 1; 1674 c += degree[i]; 1675 } 1676 PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1677 PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1678 PetscCall(VecGetArray(v, &array)); 1679 for (PetscInt i = 0; i < n; i++) { 1680 array[i] *= mask[i]; 1681 idxs[i] = i; 1682 } 1683 PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES)); 1684 PetscCall(VecRestoreArray(v, &array)); 1685 PetscCall(PetscFree3(mmask, mask, idxs)); 1686 PetscCall(VecDestroy(&v)); 1687 PetscCall(VecAssemblyBegin(quad_vec)); 1688 PetscCall(VecAssemblyEnd(quad_vec)); 1689 PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view")); 1690 PetscCall(VecLockReadPush(quad_vec)); 1691 PetscCall(VecDestroy(&quad_vec)); 1692 PetscFunctionReturn(PETSC_SUCCESS); 1693 } 1694 1695 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1696 { 1697 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1698 1699 PetscFunctionBegin; 1700 if (primalv) { 1701 if (pcbddc->user_primal_vertices_local) { 1702 IS list[2], newp; 1703 1704 list[0] = primalv; 1705 list[1] = pcbddc->user_primal_vertices_local; 1706 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1707 PetscCall(ISSortRemoveDups(newp)); 1708 PetscCall(ISDestroy(&list[1])); 1709 pcbddc->user_primal_vertices_local = newp; 1710 } else { 1711 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1712 } 1713 } 1714 PetscFunctionReturn(PETSC_SUCCESS); 1715 } 1716 1717 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1718 { 1719 PetscInt f, *comp = (PetscInt *)ctx; 1720 1721 PetscFunctionBegin; 1722 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1723 PetscFunctionReturn(PETSC_SUCCESS); 1724 } 1725 1726 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1727 { 1728 Vec local, global; 1729 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1730 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1731 PetscBool monolithic = PETSC_FALSE; 1732 1733 PetscFunctionBegin; 1734 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1735 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1736 PetscOptionsEnd(); 1737 /* need to convert from global to local topology information and remove references to information in global ordering */ 1738 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1739 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1740 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1741 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1742 if (monolithic) { /* just get block size to properly compute vertices */ 1743 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1744 goto boundary; 1745 } 1746 1747 if (pcbddc->user_provided_isfordofs) { 1748 if (pcbddc->n_ISForDofs) { 1749 PetscInt i; 1750 1751 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1752 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1753 PetscInt bs; 1754 1755 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1756 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1757 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1758 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1759 } 1760 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1761 pcbddc->n_ISForDofs = 0; 1762 PetscCall(PetscFree(pcbddc->ISForDofs)); 1763 } 1764 } else { 1765 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1766 DM dm; 1767 1768 PetscCall(MatGetDM(pc->pmat, &dm)); 1769 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1770 if (dm) { 1771 IS *fields; 1772 PetscInt nf, i; 1773 1774 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1775 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1776 for (i = 0; i < nf; i++) { 1777 PetscInt bs; 1778 1779 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1780 PetscCall(ISGetBlockSize(fields[i], &bs)); 1781 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1782 PetscCall(ISDestroy(&fields[i])); 1783 } 1784 PetscCall(PetscFree(fields)); 1785 pcbddc->n_ISForDofsLocal = nf; 1786 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1787 PetscContainer c; 1788 1789 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1790 if (c) { 1791 MatISLocalFields lf; 1792 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1793 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1794 } else { /* fallback, create the default fields if bs > 1 */ 1795 PetscInt i, n = matis->A->rmap->n; 1796 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1797 if (i > 1) { 1798 pcbddc->n_ISForDofsLocal = i; 1799 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1800 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1801 } 1802 } 1803 } 1804 } else { 1805 PetscInt i; 1806 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1807 } 1808 } 1809 1810 boundary: 1811 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1812 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1813 } else if (pcbddc->DirichletBoundariesLocal) { 1814 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1815 } 1816 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1817 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1818 } else if (pcbddc->NeumannBoundariesLocal) { 1819 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1820 } 1821 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1822 PetscCall(VecDestroy(&global)); 1823 PetscCall(VecDestroy(&local)); 1824 /* detect local disconnected subdomains if requested or needed */ 1825 if (pcbddc->detect_disconnected || matis->allow_repeated) { 1826 IS primalv = NULL; 1827 PetscInt nel; 1828 PetscBool filter = pcbddc->detect_disconnected_filter; 1829 1830 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1831 PetscCall(PetscFree(pcbddc->local_subs)); 1832 PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL)); 1833 if (matis->allow_repeated && nel) { 1834 const PetscInt *elsizes; 1835 1836 pcbddc->n_local_subs = nel; 1837 PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes)); 1838 PetscCall(PetscMalloc1(nel, &pcbddc->local_subs)); 1839 for (PetscInt i = 0, c = 0; i < nel; i++) { 1840 PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i])); 1841 c += elsizes[i]; 1842 } 1843 } else { 1844 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1845 } 1846 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1847 PetscCall(ISDestroy(&primalv)); 1848 } 1849 /* early stage corner detection */ 1850 { 1851 DM dm; 1852 1853 PetscCall(MatGetDM(pc->pmat, &dm)); 1854 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1855 if (dm) { 1856 PetscBool isda; 1857 1858 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1859 if (isda) { 1860 ISLocalToGlobalMapping l2l; 1861 IS corners; 1862 Mat lA; 1863 PetscBool gl, lo; 1864 1865 { 1866 Vec cvec; 1867 const PetscScalar *coords; 1868 PetscInt dof, n, cdim; 1869 PetscBool memc = PETSC_TRUE; 1870 1871 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1872 PetscCall(DMGetCoordinates(dm, &cvec)); 1873 PetscCall(VecGetLocalSize(cvec, &n)); 1874 PetscCall(VecGetBlockSize(cvec, &cdim)); 1875 n /= cdim; 1876 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1877 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1878 PetscCall(VecGetArrayRead(cvec, &coords)); 1879 #if defined(PETSC_USE_COMPLEX) 1880 memc = PETSC_FALSE; 1881 #endif 1882 if (dof != 1) memc = PETSC_FALSE; 1883 if (memc) { 1884 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1885 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1886 PetscReal *bcoords = pcbddc->mat_graph->coords; 1887 PetscInt i, b, d; 1888 1889 for (i = 0; i < n; i++) { 1890 for (b = 0; b < dof; b++) { 1891 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1892 } 1893 } 1894 } 1895 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1896 pcbddc->mat_graph->cdim = cdim; 1897 pcbddc->mat_graph->cnloc = dof * n; 1898 pcbddc->mat_graph->cloc = PETSC_FALSE; 1899 } 1900 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 1901 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 1902 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 1903 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 1904 lo = (PetscBool)(l2l && corners); 1905 PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 1906 if (gl) { /* From PETSc's DMDA */ 1907 const PetscInt *idx; 1908 PetscInt dof, bs, *idxout, n; 1909 1910 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1911 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 1912 PetscCall(ISGetLocalSize(corners, &n)); 1913 PetscCall(ISGetIndices(corners, &idx)); 1914 if (bs == dof) { 1915 PetscCall(PetscMalloc1(n, &idxout)); 1916 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 1917 } else { /* the original DMDA local-to-local map have been modified */ 1918 PetscInt i, d; 1919 1920 PetscCall(PetscMalloc1(dof * n, &idxout)); 1921 for (i = 0; i < n; i++) 1922 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 1923 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 1924 1925 bs = 1; 1926 n *= dof; 1927 } 1928 PetscCall(ISRestoreIndices(corners, &idx)); 1929 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1930 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 1931 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 1932 PetscCall(ISDestroy(&corners)); 1933 pcbddc->corner_selected = PETSC_TRUE; 1934 pcbddc->corner_selection = PETSC_TRUE; 1935 } 1936 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1937 } 1938 } 1939 } 1940 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1941 DM dm; 1942 1943 PetscCall(MatGetDM(pc->pmat, &dm)); 1944 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1945 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1946 Vec vcoords; 1947 PetscSection section; 1948 PetscReal *coords; 1949 PetscInt d, cdim, nl, nf, **ctxs; 1950 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1951 /* debug coordinates */ 1952 PetscViewer viewer; 1953 PetscBool flg; 1954 PetscViewerFormat format; 1955 const char *prefix; 1956 1957 PetscCall(DMGetCoordinateDim(dm, &cdim)); 1958 PetscCall(DMGetLocalSection(dm, §ion)); 1959 PetscCall(PetscSectionGetNumFields(section, &nf)); 1960 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 1961 PetscCall(VecGetLocalSize(vcoords, &nl)); 1962 PetscCall(PetscMalloc1(nl * cdim, &coords)); 1963 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 1964 PetscCall(PetscMalloc1(nf, &ctxs[0])); 1965 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 1966 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 1967 1968 /* debug coordinates */ 1969 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1970 PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 1971 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 1972 for (d = 0; d < cdim; d++) { 1973 PetscInt i; 1974 const PetscScalar *v; 1975 char name[16]; 1976 1977 for (i = 0; i < nf; i++) ctxs[i][0] = d; 1978 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d)); 1979 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 1980 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 1981 if (flg) PetscCall(VecView(vcoords, viewer)); 1982 PetscCall(VecGetArrayRead(vcoords, &v)); 1983 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 1984 PetscCall(VecRestoreArrayRead(vcoords, &v)); 1985 } 1986 PetscCall(VecDestroy(&vcoords)); 1987 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 1988 PetscCall(PetscFree(coords)); 1989 PetscCall(PetscFree(ctxs[0])); 1990 PetscCall(PetscFree2(funcs, ctxs)); 1991 if (flg) { 1992 PetscCall(PetscViewerPopFormat(viewer)); 1993 PetscCall(PetscViewerDestroy(&viewer)); 1994 } 1995 } 1996 } 1997 PetscFunctionReturn(PETSC_SUCCESS); 1998 } 1999 2000 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 2001 { 2002 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2003 IS nis; 2004 const PetscInt *idxs; 2005 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 2006 2007 PetscFunctionBegin; 2008 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 2009 if (mop == MPI_LAND) { 2010 /* init rootdata with true */ 2011 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 2012 } else { 2013 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 2014 } 2015 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 2016 PetscCall(ISGetLocalSize(*is, &nd)); 2017 PetscCall(ISGetIndices(*is, &idxs)); 2018 for (i = 0; i < nd; i++) 2019 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 2020 PetscCall(ISRestoreIndices(*is, &idxs)); 2021 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2022 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2023 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2024 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2025 if (mop == MPI_LAND) { 2026 PetscCall(PetscMalloc1(nd, &nidxs)); 2027 } else { 2028 PetscCall(PetscMalloc1(n, &nidxs)); 2029 } 2030 for (i = 0, nnd = 0; i < n; i++) 2031 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 2032 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 2033 PetscCall(ISDestroy(is)); 2034 *is = nis; 2035 PetscFunctionReturn(PETSC_SUCCESS); 2036 } 2037 2038 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 2039 { 2040 PC_IS *pcis = (PC_IS *)pc->data; 2041 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2042 2043 PetscFunctionBegin; 2044 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 2045 if (pcbddc->ChangeOfBasisMatrix) { 2046 Vec swap; 2047 2048 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 2049 swap = pcbddc->work_change; 2050 pcbddc->work_change = r; 2051 r = swap; 2052 } 2053 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2054 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2055 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2056 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 2057 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2058 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 2059 PetscCall(VecSet(z, 0.)); 2060 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2061 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2062 if (pcbddc->ChangeOfBasisMatrix) { 2063 pcbddc->work_change = r; 2064 PetscCall(VecCopy(z, pcbddc->work_change)); 2065 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 2066 } 2067 PetscFunctionReturn(PETSC_SUCCESS); 2068 } 2069 2070 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 2071 { 2072 PCBDDCBenignMatMult_ctx ctx; 2073 PetscBool apply_right, apply_left, reset_x; 2074 2075 PetscFunctionBegin; 2076 PetscCall(MatShellGetContext(A, &ctx)); 2077 if (transpose) { 2078 apply_right = ctx->apply_left; 2079 apply_left = ctx->apply_right; 2080 } else { 2081 apply_right = ctx->apply_right; 2082 apply_left = ctx->apply_left; 2083 } 2084 reset_x = PETSC_FALSE; 2085 if (apply_right) { 2086 const PetscScalar *ax; 2087 PetscInt nl, i; 2088 2089 PetscCall(VecGetLocalSize(x, &nl)); 2090 PetscCall(VecGetArrayRead(x, &ax)); 2091 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 2092 PetscCall(VecRestoreArrayRead(x, &ax)); 2093 for (i = 0; i < ctx->benign_n; i++) { 2094 PetscScalar sum, val; 2095 const PetscInt *idxs; 2096 PetscInt nz, j; 2097 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2098 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2099 sum = 0.; 2100 if (ctx->apply_p0) { 2101 val = ctx->work[idxs[nz - 1]]; 2102 for (j = 0; j < nz - 1; j++) { 2103 sum += ctx->work[idxs[j]]; 2104 ctx->work[idxs[j]] += val; 2105 } 2106 } else { 2107 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 2108 } 2109 ctx->work[idxs[nz - 1]] -= sum; 2110 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2111 } 2112 PetscCall(VecPlaceArray(x, ctx->work)); 2113 reset_x = PETSC_TRUE; 2114 } 2115 if (transpose) { 2116 PetscCall(MatMultTranspose(ctx->A, x, y)); 2117 } else { 2118 PetscCall(MatMult(ctx->A, x, y)); 2119 } 2120 if (reset_x) PetscCall(VecResetArray(x)); 2121 if (apply_left) { 2122 PetscScalar *ay; 2123 PetscInt i; 2124 2125 PetscCall(VecGetArray(y, &ay)); 2126 for (i = 0; i < ctx->benign_n; i++) { 2127 PetscScalar sum, val; 2128 const PetscInt *idxs; 2129 PetscInt nz, j; 2130 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2131 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2132 val = -ay[idxs[nz - 1]]; 2133 if (ctx->apply_p0) { 2134 sum = 0.; 2135 for (j = 0; j < nz - 1; j++) { 2136 sum += ay[idxs[j]]; 2137 ay[idxs[j]] += val; 2138 } 2139 ay[idxs[nz - 1]] += sum; 2140 } else { 2141 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 2142 ay[idxs[nz - 1]] = 0.; 2143 } 2144 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2145 } 2146 PetscCall(VecRestoreArray(y, &ay)); 2147 } 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2152 { 2153 PetscFunctionBegin; 2154 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 2155 PetscFunctionReturn(PETSC_SUCCESS); 2156 } 2157 2158 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2159 { 2160 PetscFunctionBegin; 2161 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 2162 PetscFunctionReturn(PETSC_SUCCESS); 2163 } 2164 2165 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2166 { 2167 PC_IS *pcis = (PC_IS *)pc->data; 2168 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2169 PCBDDCBenignMatMult_ctx ctx; 2170 2171 PetscFunctionBegin; 2172 if (!restore) { 2173 Mat A_IB, A_BI; 2174 PetscScalar *work; 2175 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2176 2177 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 2178 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 2179 PetscCall(PetscMalloc1(pcis->n, &work)); 2180 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 2181 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 2182 PetscCall(MatSetType(A_IB, MATSHELL)); 2183 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private)); 2184 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2185 PetscCall(PetscNew(&ctx)); 2186 PetscCall(MatShellSetContext(A_IB, ctx)); 2187 ctx->apply_left = PETSC_TRUE; 2188 ctx->apply_right = PETSC_FALSE; 2189 ctx->apply_p0 = PETSC_FALSE; 2190 ctx->benign_n = pcbddc->benign_n; 2191 if (reuse) { 2192 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2193 ctx->free = PETSC_FALSE; 2194 } else { /* TODO: could be optimized for successive solves */ 2195 ISLocalToGlobalMapping N_to_D; 2196 PetscInt i; 2197 2198 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 2199 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 2200 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 2201 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2202 ctx->free = PETSC_TRUE; 2203 } 2204 ctx->A = pcis->A_IB; 2205 ctx->work = work; 2206 PetscCall(MatSetUp(A_IB)); 2207 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2208 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2209 pcis->A_IB = A_IB; 2210 2211 /* A_BI as A_IB^T */ 2212 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2213 pcbddc->benign_original_mat = pcis->A_BI; 2214 pcis->A_BI = A_BI; 2215 } else { 2216 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2217 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2218 PetscCall(MatDestroy(&pcis->A_IB)); 2219 pcis->A_IB = ctx->A; 2220 ctx->A = NULL; 2221 PetscCall(MatDestroy(&pcis->A_BI)); 2222 pcis->A_BI = pcbddc->benign_original_mat; 2223 pcbddc->benign_original_mat = NULL; 2224 if (ctx->free) { 2225 PetscInt i; 2226 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2227 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2228 } 2229 PetscCall(PetscFree(ctx->work)); 2230 PetscCall(PetscFree(ctx)); 2231 } 2232 PetscFunctionReturn(PETSC_SUCCESS); 2233 } 2234 2235 /* used just in bddc debug mode */ 2236 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2237 { 2238 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2239 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2240 Mat An; 2241 2242 PetscFunctionBegin; 2243 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2244 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2245 if (is1) { 2246 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2247 PetscCall(MatDestroy(&An)); 2248 } else { 2249 *B = An; 2250 } 2251 PetscFunctionReturn(PETSC_SUCCESS); 2252 } 2253 2254 /* TODO: add reuse flag */ 2255 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2256 { 2257 Mat Bt; 2258 PetscScalar *a, *bdata; 2259 const PetscInt *ii, *ij; 2260 PetscInt m, n, i, nnz, *bii, *bij; 2261 PetscBool flg_row; 2262 2263 PetscFunctionBegin; 2264 PetscCall(MatGetSize(A, &n, &m)); 2265 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2266 PetscCall(MatSeqAIJGetArray(A, &a)); 2267 nnz = n; 2268 for (i = 0; i < ii[n]; i++) { 2269 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2270 } 2271 PetscCall(PetscMalloc1(n + 1, &bii)); 2272 PetscCall(PetscMalloc1(nnz, &bij)); 2273 PetscCall(PetscMalloc1(nnz, &bdata)); 2274 nnz = 0; 2275 bii[0] = 0; 2276 for (i = 0; i < n; i++) { 2277 PetscInt j; 2278 for (j = ii[i]; j < ii[i + 1]; j++) { 2279 PetscScalar entry = a[j]; 2280 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2281 bij[nnz] = ij[j]; 2282 bdata[nnz] = entry; 2283 nnz++; 2284 } 2285 } 2286 bii[i + 1] = nnz; 2287 } 2288 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2289 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2290 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2291 { 2292 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data; 2293 b->free_a = PETSC_TRUE; 2294 b->free_ij = PETSC_TRUE; 2295 } 2296 if (*B == A) PetscCall(MatDestroy(&A)); 2297 *B = Bt; 2298 PetscFunctionReturn(PETSC_SUCCESS); 2299 } 2300 2301 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2302 { 2303 Mat B = NULL; 2304 DM dm; 2305 IS is_dummy, *cc_n; 2306 ISLocalToGlobalMapping l2gmap_dummy; 2307 PCBDDCGraph graph; 2308 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2309 PetscInt i, n; 2310 PetscInt *xadj, *adjncy; 2311 PetscBool isplex = PETSC_FALSE; 2312 2313 PetscFunctionBegin; 2314 if (ncc) *ncc = 0; 2315 if (cc) *cc = NULL; 2316 if (primalv) *primalv = NULL; 2317 PetscCall(PCBDDCGraphCreate(&graph)); 2318 PetscCall(MatGetDM(pc->pmat, &dm)); 2319 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2320 if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, "")); 2321 if (filter) isplex = PETSC_FALSE; 2322 2323 if (isplex) { /* this code has been modified from plexpartition.c */ 2324 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2325 PetscInt *adj = NULL; 2326 IS cellNumbering; 2327 const PetscInt *cellNum; 2328 PetscBool useCone, useClosure; 2329 PetscSection section; 2330 PetscSegBuffer adjBuffer; 2331 PetscSF sfPoint; 2332 2333 PetscCall(DMConvert(dm, DMPLEX, &dm)); 2334 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2335 PetscCall(DMGetPointSF(dm, &sfPoint)); 2336 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2337 /* Build adjacency graph via a section/segbuffer */ 2338 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2339 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2340 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2341 /* Always use FVM adjacency to create partitioner graph */ 2342 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2343 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2344 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2345 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2346 for (n = 0, p = pStart; p < pEnd; p++) { 2347 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2348 if (nroots > 0) { 2349 if (cellNum[p] < 0) continue; 2350 } 2351 adjSize = PETSC_DETERMINE; 2352 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2353 for (a = 0; a < adjSize; ++a) { 2354 const PetscInt point = adj[a]; 2355 if (pStart <= point && point < pEnd) { 2356 PetscInt *PETSC_RESTRICT pBuf; 2357 PetscCall(PetscSectionAddDof(section, p, 1)); 2358 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2359 *pBuf = point; 2360 } 2361 } 2362 n++; 2363 } 2364 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2365 /* Derive CSR graph from section/segbuffer */ 2366 PetscCall(PetscSectionSetUp(section)); 2367 PetscCall(PetscSectionGetStorageSize(section, &size)); 2368 PetscCall(PetscMalloc1(n + 1, &xadj)); 2369 for (idx = 0, p = pStart; p < pEnd; p++) { 2370 if (nroots > 0) { 2371 if (cellNum[p] < 0) continue; 2372 } 2373 PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++])); 2374 } 2375 xadj[n] = size; 2376 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2377 /* Clean up */ 2378 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2379 PetscCall(PetscSectionDestroy(§ion)); 2380 PetscCall(PetscFree(adj)); 2381 graph->xadj = xadj; 2382 graph->adjncy = adjncy; 2383 } else { 2384 Mat A; 2385 PetscBool isseqaij, flg_row; 2386 2387 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2388 if (!A->rmap->N || !A->cmap->N) { 2389 PetscCall(PCBDDCGraphDestroy(&graph)); 2390 PetscFunctionReturn(PETSC_SUCCESS); 2391 } 2392 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2393 if (!isseqaij && filter) { 2394 PetscBool isseqdense; 2395 2396 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2397 if (!isseqdense) { 2398 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2399 } else { /* TODO: rectangular case and LDA */ 2400 PetscScalar *array; 2401 PetscReal chop = 1.e-6; 2402 2403 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2404 PetscCall(MatDenseGetArray(B, &array)); 2405 PetscCall(MatGetSize(B, &n, NULL)); 2406 for (i = 0; i < n; i++) { 2407 PetscInt j; 2408 for (j = i + 1; j < n; j++) { 2409 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2410 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2411 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2412 } 2413 } 2414 PetscCall(MatDenseRestoreArray(B, &array)); 2415 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2416 } 2417 } else { 2418 PetscCall(PetscObjectReference((PetscObject)A)); 2419 B = A; 2420 } 2421 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2422 2423 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2424 if (filter) { 2425 PetscScalar *data; 2426 PetscInt j, cum; 2427 2428 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2429 PetscCall(MatSeqAIJGetArray(B, &data)); 2430 cum = 0; 2431 for (i = 0; i < n; i++) { 2432 PetscInt t; 2433 2434 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2435 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2436 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2437 } 2438 t = xadj_filtered[i]; 2439 xadj_filtered[i] = cum; 2440 cum += t; 2441 } 2442 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2443 graph->xadj = xadj_filtered; 2444 graph->adjncy = adjncy_filtered; 2445 } else { 2446 graph->xadj = xadj; 2447 graph->adjncy = adjncy; 2448 } 2449 } 2450 /* compute local connected components using PCBDDCGraph */ 2451 graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */ 2452 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2453 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2454 PetscCall(ISDestroy(&is_dummy)); 2455 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX)); 2456 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2457 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2458 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2459 2460 /* partial clean up */ 2461 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2462 if (B) { 2463 PetscBool flg_row; 2464 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2465 PetscCall(MatDestroy(&B)); 2466 } 2467 if (isplex) { 2468 PetscCall(PetscFree(xadj)); 2469 PetscCall(PetscFree(adjncy)); 2470 } 2471 2472 /* get back data */ 2473 if (isplex) { 2474 if (ncc) *ncc = graph->ncc; 2475 if (cc || primalv) { 2476 Mat A; 2477 PetscBT btv, btvt, btvc; 2478 PetscSection subSection; 2479 PetscInt *ids, cum, cump, *cids, *pids; 2480 PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd; 2481 2482 PetscCall(DMGetDimension(dm, &dim)); 2483 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2484 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 2485 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd)); 2486 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 2487 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 2488 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2489 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2490 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2491 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2492 PetscCall(PetscBTCreate(pEnd - pStart, &btvc)); 2493 2494 /* First see if we find corners for the subdomains, i.e. a vertex 2495 shared by at least dim subdomain boundary faces. This does not 2496 cover all the possible cases with simplices but it is enough 2497 for tensor cells */ 2498 if (vStart != fStart && dim <= 3) { 2499 for (PetscInt c = cStart; c < cEnd; c++) { 2500 PetscInt nf, cnt = 0, mcnt = dim, *cfaces; 2501 const PetscInt *faces; 2502 2503 PetscCall(DMPlexGetConeSize(dm, c, &nf)); 2504 PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces)); 2505 PetscCall(DMPlexGetCone(dm, c, &faces)); 2506 for (PetscInt f = 0; f < nf; f++) { 2507 PetscInt nc, ff; 2508 2509 PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc)); 2510 PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL)); 2511 if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f]; 2512 } 2513 if (cnt >= mcnt) { 2514 PetscInt size, *closure = NULL; 2515 2516 PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2517 for (PetscInt k = 0; k < 2 * size; k += 2) { 2518 PetscInt v = closure[k]; 2519 if (v >= vStart && v < vEnd) { 2520 PetscInt vsize, *vclosure = NULL; 2521 2522 cnt = 0; 2523 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2524 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) { 2525 PetscInt f = vclosure[vk]; 2526 if (f >= fStart && f < fEnd) { 2527 PetscInt nc, ff; 2528 PetscBool valid = PETSC_FALSE; 2529 2530 for (PetscInt fk = 0; fk < nf; fk++) 2531 if (f == cfaces[fk]) valid = PETSC_TRUE; 2532 if (!valid) continue; 2533 PetscCall(DMPlexGetSupportSize(dm, f, &nc)); 2534 PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL)); 2535 if (nc == 1 && f == ff) cnt++; 2536 } 2537 } 2538 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart)); 2539 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2540 } 2541 } 2542 PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2543 } 2544 PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces)); 2545 } 2546 } 2547 2548 cids[0] = 0; 2549 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2550 PetscInt j; 2551 2552 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2553 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2554 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2555 2556 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2557 for (k = 0; k < 2 * size; k += 2) { 2558 PetscInt s, pp, p = closure[k], off, dof, cdof; 2559 2560 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2561 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2562 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2563 for (s = 0; s < dof - cdof; s++) { 2564 if (PetscBTLookupSet(btvt, off + s)) continue; 2565 if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2566 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2567 else pids[cump++] = off + s; /* cross-vertex */ 2568 } 2569 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2570 if (pp != p) { 2571 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2572 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2573 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2574 for (s = 0; s < dof - cdof; s++) { 2575 if (PetscBTLookupSet(btvt, off + s)) continue; 2576 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2577 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2578 else pids[cump++] = off + s; /* cross-vertex */ 2579 } 2580 } 2581 } 2582 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2583 } 2584 cids[i + 1] = cum; 2585 /* mark dofs as already assigned */ 2586 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2587 } 2588 if (cc) { 2589 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2590 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2591 *cc = cc_n; 2592 } 2593 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2594 PetscCall(PetscFree3(ids, cids, pids)); 2595 PetscCall(PetscBTDestroy(&btv)); 2596 PetscCall(PetscBTDestroy(&btvt)); 2597 PetscCall(PetscBTDestroy(&btvc)); 2598 PetscCall(DMDestroy(&dm)); 2599 } 2600 } else { 2601 if (ncc) *ncc = graph->ncc; 2602 if (cc) { 2603 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2604 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2605 *cc = cc_n; 2606 } 2607 } 2608 /* clean up graph */ 2609 graph->xadj = NULL; 2610 graph->adjncy = NULL; 2611 PetscCall(PCBDDCGraphDestroy(&graph)); 2612 PetscFunctionReturn(PETSC_SUCCESS); 2613 } 2614 2615 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2616 { 2617 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2618 PC_IS *pcis = (PC_IS *)pc->data; 2619 IS dirIS = NULL; 2620 PetscInt i; 2621 2622 PetscFunctionBegin; 2623 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2624 if (zerodiag) { 2625 Mat A; 2626 Vec vec3_N; 2627 PetscScalar *vals; 2628 const PetscInt *idxs; 2629 PetscInt nz, *count; 2630 2631 /* p0 */ 2632 PetscCall(VecSet(pcis->vec1_N, 0.)); 2633 PetscCall(PetscMalloc1(pcis->n, &vals)); 2634 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2635 PetscCall(ISGetIndices(zerodiag, &idxs)); 2636 for (i = 0; i < nz; i++) vals[i] = 1.; 2637 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2638 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2639 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2640 /* v_I */ 2641 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2642 for (i = 0; i < nz; i++) vals[i] = 0.; 2643 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2644 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2645 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2646 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2647 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2648 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2649 if (dirIS) { 2650 PetscInt n; 2651 2652 PetscCall(ISGetLocalSize(dirIS, &n)); 2653 PetscCall(ISGetIndices(dirIS, &idxs)); 2654 for (i = 0; i < n; i++) vals[i] = 0.; 2655 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2656 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2657 } 2658 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2659 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2660 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2661 PetscCall(VecSet(vec3_N, 0.)); 2662 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2663 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2664 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2665 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2666 PetscCall(PetscFree(vals)); 2667 PetscCall(VecDestroy(&vec3_N)); 2668 2669 /* there should not be any pressure dofs lying on the interface */ 2670 PetscCall(PetscCalloc1(pcis->n, &count)); 2671 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2672 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2673 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2674 PetscCall(ISGetIndices(zerodiag, &idxs)); 2675 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2676 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2677 PetscCall(PetscFree(count)); 2678 } 2679 PetscCall(ISDestroy(&dirIS)); 2680 2681 /* check PCBDDCBenignGetOrSetP0 */ 2682 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2683 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2684 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2685 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2686 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2687 for (i = 0; i < pcbddc->benign_n; i++) { 2688 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2689 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2690 } 2691 PetscFunctionReturn(PETSC_SUCCESS); 2692 } 2693 2694 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2695 { 2696 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2697 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2698 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2699 PetscInt nz, n, benign_n, bsp = 1; 2700 PetscInt *interior_dofs, n_interior_dofs, nneu; 2701 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2702 2703 PetscFunctionBegin; 2704 if (reuse) goto project_b0; 2705 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2706 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2707 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2708 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2709 has_null_pressures = PETSC_TRUE; 2710 have_null = PETSC_TRUE; 2711 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2712 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2713 Checks if all the pressure dofs in each subdomain have a zero diagonal 2714 If not, a change of basis on pressures is not needed 2715 since the local Schur complements are already SPD 2716 */ 2717 if (pcbddc->n_ISForDofsLocal) { 2718 IS iP = NULL; 2719 PetscInt p, *pp; 2720 PetscBool flg, blocked = PETSC_FALSE; 2721 2722 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2723 n = pcbddc->n_ISForDofsLocal; 2724 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2725 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2726 PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL)); 2727 PetscOptionsEnd(); 2728 if (!flg) { 2729 n = 1; 2730 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2731 } 2732 2733 bsp = 0; 2734 for (p = 0; p < n; p++) { 2735 PetscInt bs = 1; 2736 2737 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2738 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2739 bsp += bs; 2740 } 2741 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2742 bsp = 0; 2743 for (p = 0; p < n; p++) { 2744 const PetscInt *idxs; 2745 PetscInt b, bs = 1, npl, *bidxs; 2746 2747 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2748 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2749 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2750 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2751 for (b = 0; b < bs; b++) { 2752 PetscInt i; 2753 2754 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2755 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2756 bsp++; 2757 } 2758 PetscCall(PetscFree(bidxs)); 2759 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2760 } 2761 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2762 2763 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2764 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2765 if (iP) { 2766 IS newpressures; 2767 2768 PetscCall(ISDifference(pressures, iP, &newpressures)); 2769 PetscCall(ISDestroy(&pressures)); 2770 pressures = newpressures; 2771 } 2772 PetscCall(ISSorted(pressures, &sorted)); 2773 if (!sorted) PetscCall(ISSort(pressures)); 2774 PetscCall(PetscFree(pp)); 2775 } 2776 2777 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2778 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2779 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2780 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2781 PetscCall(ISSorted(zerodiag, &sorted)); 2782 if (!sorted) PetscCall(ISSort(zerodiag)); 2783 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2784 zerodiag_save = zerodiag; 2785 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2786 if (!nz) { 2787 if (n) have_null = PETSC_FALSE; 2788 has_null_pressures = PETSC_FALSE; 2789 PetscCall(ISDestroy(&zerodiag)); 2790 } 2791 recompute_zerodiag = PETSC_FALSE; 2792 2793 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2794 zerodiag_subs = NULL; 2795 benign_n = 0; 2796 n_interior_dofs = 0; 2797 interior_dofs = NULL; 2798 nneu = 0; 2799 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2800 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2801 if (checkb) { /* need to compute interior nodes */ 2802 PetscInt n, i; 2803 PetscInt *count; 2804 ISLocalToGlobalMapping mapping; 2805 2806 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL)); 2807 PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL)); 2808 PetscCall(PetscMalloc1(n, &interior_dofs)); 2809 for (i = 0; i < n; i++) 2810 if (count[i] < 2) interior_dofs[n_interior_dofs++] = i; 2811 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL)); 2812 } 2813 if (has_null_pressures) { 2814 IS *subs; 2815 PetscInt nsubs, i, j, nl; 2816 const PetscInt *idxs; 2817 PetscScalar *array; 2818 Vec *work; 2819 2820 subs = pcbddc->local_subs; 2821 nsubs = pcbddc->n_local_subs; 2822 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2823 if (checkb) { 2824 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2825 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2826 PetscCall(ISGetIndices(zerodiag, &idxs)); 2827 /* work[0] = 1_p */ 2828 PetscCall(VecSet(work[0], 0.)); 2829 PetscCall(VecGetArray(work[0], &array)); 2830 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2831 PetscCall(VecRestoreArray(work[0], &array)); 2832 /* work[0] = 1_v */ 2833 PetscCall(VecSet(work[1], 1.)); 2834 PetscCall(VecGetArray(work[1], &array)); 2835 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2836 PetscCall(VecRestoreArray(work[1], &array)); 2837 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2838 } 2839 2840 if (nsubs > 1 || bsp > 1) { 2841 IS *is; 2842 PetscInt b, totb; 2843 2844 totb = bsp; 2845 is = bsp > 1 ? bzerodiag : &zerodiag; 2846 nsubs = PetscMax(nsubs, 1); 2847 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2848 for (b = 0; b < totb; b++) { 2849 for (i = 0; i < nsubs; i++) { 2850 ISLocalToGlobalMapping l2g; 2851 IS t_zerodiag_subs; 2852 PetscInt nl; 2853 2854 if (subs) { 2855 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2856 } else { 2857 IS tis; 2858 2859 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2860 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2861 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2862 PetscCall(ISDestroy(&tis)); 2863 } 2864 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2865 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2866 if (nl) { 2867 PetscBool valid = PETSC_TRUE; 2868 2869 if (checkb) { 2870 PetscCall(VecSet(matis->x, 0)); 2871 PetscCall(ISGetLocalSize(subs[i], &nl)); 2872 PetscCall(ISGetIndices(subs[i], &idxs)); 2873 PetscCall(VecGetArray(matis->x, &array)); 2874 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2875 PetscCall(VecRestoreArray(matis->x, &array)); 2876 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2877 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2878 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2879 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2880 PetscCall(VecGetArray(matis->y, &array)); 2881 for (j = 0; j < n_interior_dofs; j++) { 2882 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2883 valid = PETSC_FALSE; 2884 break; 2885 } 2886 } 2887 PetscCall(VecRestoreArray(matis->y, &array)); 2888 } 2889 if (valid && nneu) { 2890 const PetscInt *idxs; 2891 PetscInt nzb; 2892 2893 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2894 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2895 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2896 if (nzb) valid = PETSC_FALSE; 2897 } 2898 if (valid && pressures) { 2899 IS t_pressure_subs, tmp; 2900 PetscInt i1, i2; 2901 2902 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 2903 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 2904 PetscCall(ISGetLocalSize(tmp, &i1)); 2905 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 2906 if (i2 != i1) valid = PETSC_FALSE; 2907 PetscCall(ISDestroy(&t_pressure_subs)); 2908 PetscCall(ISDestroy(&tmp)); 2909 } 2910 if (valid) { 2911 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 2912 benign_n++; 2913 } else recompute_zerodiag = PETSC_TRUE; 2914 } 2915 PetscCall(ISDestroy(&t_zerodiag_subs)); 2916 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2917 } 2918 } 2919 } else { /* there's just one subdomain (or zero if they have not been detected */ 2920 PetscBool valid = PETSC_TRUE; 2921 2922 if (nneu) valid = PETSC_FALSE; 2923 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 2924 if (valid && checkb) { 2925 PetscCall(MatMult(matis->A, work[0], matis->x)); 2926 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 2927 PetscCall(VecGetArray(matis->x, &array)); 2928 for (j = 0; j < n_interior_dofs; j++) { 2929 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2930 valid = PETSC_FALSE; 2931 break; 2932 } 2933 } 2934 PetscCall(VecRestoreArray(matis->x, &array)); 2935 } 2936 if (valid) { 2937 benign_n = 1; 2938 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 2939 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2940 zerodiag_subs[0] = zerodiag; 2941 } 2942 } 2943 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 2944 } 2945 PetscCall(PetscFree(interior_dofs)); 2946 2947 if (!benign_n) { 2948 PetscInt n; 2949 2950 PetscCall(ISDestroy(&zerodiag)); 2951 recompute_zerodiag = PETSC_FALSE; 2952 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2953 if (n) have_null = PETSC_FALSE; 2954 } 2955 2956 /* final check for null pressures */ 2957 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 2958 2959 if (recompute_zerodiag) { 2960 PetscCall(ISDestroy(&zerodiag)); 2961 if (benign_n == 1) { 2962 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2963 zerodiag = zerodiag_subs[0]; 2964 } else { 2965 PetscInt i, nzn, *new_idxs; 2966 2967 nzn = 0; 2968 for (i = 0; i < benign_n; i++) { 2969 PetscInt ns; 2970 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2971 nzn += ns; 2972 } 2973 PetscCall(PetscMalloc1(nzn, &new_idxs)); 2974 nzn = 0; 2975 for (i = 0; i < benign_n; i++) { 2976 PetscInt ns, *idxs; 2977 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2978 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2979 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 2980 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2981 nzn += ns; 2982 } 2983 PetscCall(PetscSortInt(nzn, new_idxs)); 2984 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 2985 } 2986 have_null = PETSC_FALSE; 2987 } 2988 2989 /* determines if the coarse solver will be singular or not */ 2990 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2991 2992 /* Prepare matrix to compute no-net-flux */ 2993 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2994 Mat A, loc_divudotp; 2995 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 2996 IS row, col, isused = NULL; 2997 PetscInt M, N, n, st, n_isused; 2998 2999 if (pressures) { 3000 isused = pressures; 3001 } else { 3002 isused = zerodiag_save; 3003 } 3004 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 3005 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 3006 PetscCall(MatGetLocalSize(A, &n, NULL)); 3007 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 3008 n_isused = 0; 3009 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 3010 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 3011 st = st - n_isused; 3012 if (n) { 3013 const PetscInt *gidxs; 3014 3015 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 3016 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 3017 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 3018 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 3019 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 3020 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 3021 } else { 3022 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 3023 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 3024 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 3025 } 3026 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 3027 PetscCall(ISGetSize(row, &M)); 3028 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 3029 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 3030 PetscCall(ISDestroy(&row)); 3031 PetscCall(ISDestroy(&col)); 3032 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 3033 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 3034 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3035 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 3036 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 3037 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 3038 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 3039 PetscCall(MatDestroy(&loc_divudotp)); 3040 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3041 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3042 } 3043 PetscCall(ISDestroy(&zerodiag_save)); 3044 PetscCall(ISDestroy(&pressures)); 3045 if (bzerodiag) { 3046 PetscInt i; 3047 3048 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 3049 PetscCall(PetscFree(bzerodiag)); 3050 } 3051 pcbddc->benign_n = benign_n; 3052 pcbddc->benign_zerodiag_subs = zerodiag_subs; 3053 3054 /* determines if the problem has subdomains with 0 pressure block */ 3055 have_null = (PetscBool)(!!pcbddc->benign_n); 3056 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 3057 3058 project_b0: 3059 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 3060 /* change of basis and p0 dofs */ 3061 if (pcbddc->benign_n) { 3062 PetscInt i, s, *nnz; 3063 3064 /* local change of basis for pressures */ 3065 PetscCall(MatDestroy(&pcbddc->benign_change)); 3066 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 3067 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 3068 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 3069 PetscCall(PetscMalloc1(n, &nnz)); 3070 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 3071 for (i = 0; i < pcbddc->benign_n; i++) { 3072 const PetscInt *idxs; 3073 PetscInt nzs, j; 3074 3075 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 3076 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3077 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 3078 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 3079 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3080 } 3081 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 3082 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3083 PetscCall(PetscFree(nnz)); 3084 /* set identity by default */ 3085 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 3086 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3087 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 3088 /* set change on pressures */ 3089 for (s = 0; s < pcbddc->benign_n; s++) { 3090 PetscScalar *array; 3091 const PetscInt *idxs; 3092 PetscInt nzs; 3093 3094 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 3095 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3096 for (i = 0; i < nzs - 1; i++) { 3097 PetscScalar vals[2]; 3098 PetscInt cols[2]; 3099 3100 cols[0] = idxs[i]; 3101 cols[1] = idxs[nzs - 1]; 3102 vals[0] = 1.; 3103 vals[1] = 1.; 3104 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 3105 } 3106 PetscCall(PetscMalloc1(nzs, &array)); 3107 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 3108 array[nzs - 1] = 1.; 3109 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 3110 /* store local idxs for p0 */ 3111 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 3112 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3113 PetscCall(PetscFree(array)); 3114 } 3115 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3116 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3117 3118 /* project if needed */ 3119 if (pcbddc->benign_change_explicit) { 3120 Mat M; 3121 3122 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 3123 PetscCall(MatDestroy(&pcbddc->local_mat)); 3124 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 3125 PetscCall(MatDestroy(&M)); 3126 } 3127 /* store global idxs for p0 */ 3128 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 3129 } 3130 *zerodiaglocal = zerodiag; 3131 PetscFunctionReturn(PETSC_SUCCESS); 3132 } 3133 3134 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3135 { 3136 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3137 PetscScalar *array; 3138 3139 PetscFunctionBegin; 3140 if (!pcbddc->benign_sf) { 3141 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 3142 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 3143 } 3144 if (get) { 3145 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 3146 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3147 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3148 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 3149 } else { 3150 PetscCall(VecGetArray(v, &array)); 3151 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3152 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3153 PetscCall(VecRestoreArray(v, &array)); 3154 } 3155 PetscFunctionReturn(PETSC_SUCCESS); 3156 } 3157 3158 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3159 { 3160 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3161 3162 PetscFunctionBegin; 3163 /* TODO: add error checking 3164 - avoid nested pop (or push) calls. 3165 - cannot push before pop. 3166 - cannot call this if pcbddc->local_mat is NULL 3167 */ 3168 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 3169 if (pop) { 3170 if (pcbddc->benign_change_explicit) { 3171 IS is_p0; 3172 MatReuse reuse; 3173 3174 /* extract B_0 */ 3175 reuse = MAT_INITIAL_MATRIX; 3176 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 3177 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 3178 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 3179 /* remove rows and cols from local problem */ 3180 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 3181 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 3182 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 3183 PetscCall(ISDestroy(&is_p0)); 3184 } else { 3185 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 3186 PetscScalar *vals; 3187 PetscInt i, n, *idxs_ins; 3188 3189 PetscCall(VecGetLocalSize(matis->y, &n)); 3190 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 3191 if (!pcbddc->benign_B0) { 3192 PetscInt *nnz; 3193 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 3194 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 3195 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 3196 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 3197 for (i = 0; i < pcbddc->benign_n; i++) { 3198 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 3199 nnz[i] = n - nnz[i]; 3200 } 3201 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 3202 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3203 PetscCall(PetscFree(nnz)); 3204 } 3205 3206 for (i = 0; i < pcbddc->benign_n; i++) { 3207 PetscScalar *array; 3208 PetscInt *idxs, j, nz, cum; 3209 3210 PetscCall(VecSet(matis->x, 0.)); 3211 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 3212 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3213 for (j = 0; j < nz; j++) vals[j] = 1.; 3214 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 3215 PetscCall(VecAssemblyBegin(matis->x)); 3216 PetscCall(VecAssemblyEnd(matis->x)); 3217 PetscCall(VecSet(matis->y, 0.)); 3218 PetscCall(MatMult(matis->A, matis->x, matis->y)); 3219 PetscCall(VecGetArray(matis->y, &array)); 3220 cum = 0; 3221 for (j = 0; j < n; j++) { 3222 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3223 vals[cum] = array[j]; 3224 idxs_ins[cum] = j; 3225 cum++; 3226 } 3227 } 3228 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 3229 PetscCall(VecRestoreArray(matis->y, &array)); 3230 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3231 } 3232 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3233 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3234 PetscCall(PetscFree2(idxs_ins, vals)); 3235 } 3236 } else { /* push */ 3237 3238 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 3239 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 3240 PetscScalar *B0_vals; 3241 PetscInt *B0_cols, B0_ncol; 3242 3243 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3244 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 3245 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 3246 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 3247 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3248 } 3249 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3250 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3251 } 3252 PetscFunctionReturn(PETSC_SUCCESS); 3253 } 3254 3255 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3256 { 3257 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3258 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3259 PetscBLASInt B_neigs, B_ierr, B_lwork; 3260 PetscBLASInt *B_iwork, *B_ifail; 3261 PetscScalar *work, lwork; 3262 PetscScalar *St, *S, *eigv; 3263 PetscScalar *Sarray, *Starray; 3264 PetscReal *eigs, thresh, lthresh, uthresh; 3265 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 3266 PetscBool allocated_S_St, upart; 3267 #if defined(PETSC_USE_COMPLEX) 3268 PetscReal *rwork; 3269 #endif 3270 3271 PetscFunctionBegin; 3272 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3273 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3274 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3275 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3276 sub_schurs->is_posdef); 3277 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3278 3279 if (pcbddc->dbg_flag) { 3280 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3281 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3282 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3283 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3284 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3285 } 3286 3287 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3288 3289 /* max size of subsets */ 3290 mss = 0; 3291 for (i = 0; i < sub_schurs->n_subs; i++) { 3292 PetscInt subset_size; 3293 3294 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3295 mss = PetscMax(mss, subset_size); 3296 } 3297 3298 /* min/max and threshold */ 3299 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3300 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3301 nmax = PetscMax(nmin, nmax); 3302 allocated_S_St = PETSC_FALSE; 3303 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3304 allocated_S_St = PETSC_TRUE; 3305 } 3306 3307 /* allocate lapack workspace */ 3308 cum = cum2 = 0; 3309 maxneigs = 0; 3310 for (i = 0; i < sub_schurs->n_subs; i++) { 3311 PetscInt n, subset_size; 3312 3313 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3314 n = PetscMin(subset_size, nmax); 3315 cum += subset_size; 3316 cum2 += subset_size * n; 3317 maxneigs = PetscMax(maxneigs, n); 3318 } 3319 lwork = 0; 3320 if (mss) { 3321 PetscScalar sdummy = 0.; 3322 PetscBLASInt B_itype = 1; 3323 PetscBLASInt B_N, idummy = 0; 3324 PetscReal rdummy = 0., zero = 0.0; 3325 PetscReal eps = 0.0; /* dlamch? */ 3326 3327 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3328 PetscCall(PetscBLASIntCast(mss, &B_N)); 3329 B_lwork = -1; 3330 /* some implementations may complain about NULL pointers, even if we are querying */ 3331 S = &sdummy; 3332 St = &sdummy; 3333 eigs = &rdummy; 3334 eigv = &sdummy; 3335 B_iwork = &idummy; 3336 B_ifail = &idummy; 3337 #if defined(PETSC_USE_COMPLEX) 3338 rwork = &rdummy; 3339 #endif 3340 thresh = 1.0; 3341 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3342 #if defined(PETSC_USE_COMPLEX) 3343 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3344 #else 3345 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3346 #endif 3347 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr); 3348 PetscCall(PetscFPTrapPop()); 3349 } 3350 3351 nv = 0; 3352 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3353 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3354 } 3355 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3356 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3357 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3358 #if defined(PETSC_USE_COMPLEX) 3359 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3360 #endif 3361 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3362 &pcbddc->adaptive_constraints_data)); 3363 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3364 3365 maxneigs = 0; 3366 cum = cumarray = 0; 3367 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3368 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3369 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3370 const PetscInt *idxs; 3371 3372 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3373 for (cum = 0; cum < nv; cum++) { 3374 pcbddc->adaptive_constraints_n[cum] = 1; 3375 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3376 pcbddc->adaptive_constraints_data[cum] = 1.0; 3377 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3378 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3379 } 3380 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3381 } 3382 3383 if (mss) { /* multilevel */ 3384 if (sub_schurs->gdsw) { 3385 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3386 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3387 } else { 3388 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3389 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3390 } 3391 } 3392 3393 lthresh = pcbddc->adaptive_threshold[0]; 3394 uthresh = pcbddc->adaptive_threshold[1]; 3395 upart = pcbddc->use_deluxe_scaling; 3396 for (i = 0; i < sub_schurs->n_subs; i++) { 3397 const PetscInt *idxs; 3398 PetscReal upper, lower; 3399 PetscInt j, subset_size, eigs_start = 0; 3400 PetscBLASInt B_N; 3401 PetscBool same_data = PETSC_FALSE; 3402 PetscBool scal = PETSC_FALSE; 3403 3404 if (upart) { 3405 upper = PETSC_MAX_REAL; 3406 lower = uthresh; 3407 } else { 3408 if (sub_schurs->gdsw) { 3409 upper = uthresh; 3410 lower = PETSC_MIN_REAL; 3411 } else { 3412 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3413 upper = 1. / uthresh; 3414 lower = 0.; 3415 } 3416 } 3417 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3418 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3419 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3420 /* this is experimental: we assume the dofs have been properly grouped to have 3421 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3422 if (!sub_schurs->is_posdef) { 3423 Mat T; 3424 3425 for (j = 0; j < subset_size; j++) { 3426 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3427 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3428 PetscCall(MatScale(T, -1.0)); 3429 PetscCall(MatDestroy(&T)); 3430 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3431 PetscCall(MatScale(T, -1.0)); 3432 PetscCall(MatDestroy(&T)); 3433 if (sub_schurs->change_primal_sub) { 3434 PetscInt nz, k; 3435 const PetscInt *idxs; 3436 3437 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3438 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3439 for (k = 0; k < nz; k++) { 3440 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3441 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3442 } 3443 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3444 } 3445 scal = PETSC_TRUE; 3446 break; 3447 } 3448 } 3449 } 3450 3451 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3452 if (sub_schurs->is_symmetric) { 3453 PetscInt j, k; 3454 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3455 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3456 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3457 } 3458 for (j = 0; j < subset_size; j++) { 3459 for (k = j; k < subset_size; k++) { 3460 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3461 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3462 } 3463 } 3464 } else { 3465 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3466 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3467 } 3468 } else { 3469 S = Sarray + cumarray; 3470 St = Starray + cumarray; 3471 } 3472 /* see if we can save some work */ 3473 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3474 3475 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3476 B_neigs = 0; 3477 } else { 3478 PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0; 3479 PetscReal eps = -1.0; /* dlamch? */ 3480 PetscInt nmin_s; 3481 PetscBool compute_range; 3482 3483 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3484 B_neigs = 0; 3485 compute_range = (PetscBool)!same_data; 3486 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3487 3488 if (pcbddc->dbg_flag) { 3489 PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof; 3490 3491 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3492 PetscCall( 3493 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc)); 3494 } 3495 3496 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3497 if (compute_range) { 3498 /* ask for eigenvalues larger than thresh */ 3499 if (sub_schurs->is_posdef) { 3500 #if defined(PETSC_USE_COMPLEX) 3501 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3502 #else 3503 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3504 #endif 3505 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3506 } else { /* no theory so far, but it works nicely */ 3507 PetscInt recipe = 0, recipe_m = 1; 3508 PetscReal bb[2]; 3509 3510 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3511 switch (recipe) { 3512 case 0: 3513 if (scal) { 3514 bb[0] = PETSC_MIN_REAL; 3515 bb[1] = lthresh; 3516 } else { 3517 bb[0] = uthresh; 3518 bb[1] = PETSC_MAX_REAL; 3519 } 3520 #if defined(PETSC_USE_COMPLEX) 3521 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3522 #else 3523 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3524 #endif 3525 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3526 break; 3527 case 1: 3528 bb[0] = PETSC_MIN_REAL; 3529 bb[1] = lthresh * lthresh; 3530 #if defined(PETSC_USE_COMPLEX) 3531 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3532 #else 3533 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3534 #endif 3535 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3536 if (!scal) { 3537 PetscBLASInt B_neigs2 = 0; 3538 3539 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3540 bb[1] = PETSC_MAX_REAL; 3541 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3542 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3543 #if defined(PETSC_USE_COMPLEX) 3544 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3545 #else 3546 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3547 #endif 3548 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3549 B_neigs += B_neigs2; 3550 } 3551 break; 3552 case 2: 3553 if (scal) { 3554 bb[0] = PETSC_MIN_REAL; 3555 bb[1] = 0; 3556 #if defined(PETSC_USE_COMPLEX) 3557 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3558 #else 3559 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3560 #endif 3561 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3562 } else { 3563 PetscBLASInt B_neigs2 = 0; 3564 PetscBool do_copy = PETSC_FALSE; 3565 3566 lthresh = PetscMax(lthresh, 0.0); 3567 if (lthresh > 0.0) { 3568 bb[0] = PETSC_MIN_REAL; 3569 bb[1] = lthresh * lthresh; 3570 3571 do_copy = PETSC_TRUE; 3572 #if defined(PETSC_USE_COMPLEX) 3573 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3574 #else 3575 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3576 #endif 3577 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3578 } 3579 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3580 bb[1] = PETSC_MAX_REAL; 3581 if (do_copy) { 3582 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3583 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3584 } 3585 #if defined(PETSC_USE_COMPLEX) 3586 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3587 #else 3588 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3589 #endif 3590 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3591 B_neigs += B_neigs2; 3592 } 3593 break; 3594 case 3: 3595 if (scal) { 3596 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3597 } else { 3598 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3599 } 3600 if (!scal) { 3601 bb[0] = uthresh; 3602 bb[1] = PETSC_MAX_REAL; 3603 #if defined(PETSC_USE_COMPLEX) 3604 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3605 #else 3606 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3607 #endif 3608 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3609 } 3610 if (recipe_m > 0 && B_N - B_neigs > 0) { 3611 PetscBLASInt B_neigs2 = 0; 3612 3613 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3614 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3615 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3616 #if defined(PETSC_USE_COMPLEX) 3617 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3618 #else 3619 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3620 #endif 3621 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3622 B_neigs += B_neigs2; 3623 } 3624 break; 3625 case 4: 3626 bb[0] = PETSC_MIN_REAL; 3627 bb[1] = lthresh; 3628 #if defined(PETSC_USE_COMPLEX) 3629 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3630 #else 3631 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3632 #endif 3633 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3634 { 3635 PetscBLASInt B_neigs2 = 0; 3636 3637 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3638 bb[1] = PETSC_MAX_REAL; 3639 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3640 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3641 #if defined(PETSC_USE_COMPLEX) 3642 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3643 #else 3644 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3645 #endif 3646 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3647 B_neigs += B_neigs2; 3648 } 3649 break; 3650 case 5: /* same as before: first compute all eigenvalues, then filter */ 3651 #if defined(PETSC_USE_COMPLEX) 3652 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3653 #else 3654 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3655 #endif 3656 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3657 { 3658 PetscInt e, k, ne; 3659 for (e = 0, ne = 0; e < B_neigs; e++) { 3660 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3661 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3662 eigs[ne] = eigs[e]; 3663 ne++; 3664 } 3665 } 3666 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3667 PetscCall(PetscBLASIntCast(ne, &B_neigs)); 3668 } 3669 break; 3670 default: 3671 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3672 } 3673 } 3674 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3675 PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU)); 3676 #if defined(PETSC_USE_COMPLEX) 3677 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3678 #else 3679 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3680 #endif 3681 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3682 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3683 PetscInt k; 3684 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3685 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3686 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3687 nmin = nmax; 3688 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3689 for (k = 0; k < nmax; k++) { 3690 eigs[k] = 1. / PETSC_SMALL; 3691 eigv[k * (subset_size + 1)] = 1.0; 3692 } 3693 } 3694 PetscCall(PetscFPTrapPop()); 3695 if (B_ierr) { 3696 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3697 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3698 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3699 } 3700 3701 if (B_neigs > nmax) { 3702 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3703 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3704 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3705 } 3706 3707 nmin_s = PetscMin(nmin, B_N); 3708 if (B_neigs < nmin_s) { 3709 PetscBLASInt B_neigs2 = 0; 3710 3711 if (upart) { 3712 if (scal) { 3713 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3714 B_IL = B_neigs + 1; 3715 } else { 3716 PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL)); 3717 B_IU = B_N - B_neigs; 3718 } 3719 } else { 3720 B_IL = B_neigs + 1; 3721 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3722 } 3723 if (pcbddc->dbg_flag) { 3724 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3725 } 3726 if (sub_schurs->is_symmetric) { 3727 PetscInt j, k; 3728 for (j = 0; j < subset_size; j++) { 3729 for (k = j; k < subset_size; k++) { 3730 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3731 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3732 } 3733 } 3734 } else { 3735 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3736 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3737 } 3738 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3739 #if defined(PETSC_USE_COMPLEX) 3740 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3741 #else 3742 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3743 #endif 3744 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3745 PetscCall(PetscFPTrapPop()); 3746 B_neigs += B_neigs2; 3747 } 3748 if (B_ierr) { 3749 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3750 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3751 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3752 } 3753 if (pcbddc->dbg_flag) { 3754 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3755 for (j = 0; j < B_neigs; j++) { 3756 if (!sub_schurs->gdsw) { 3757 if (eigs[j] == 0.0) { 3758 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3759 } else { 3760 if (upart) { 3761 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3762 } else { 3763 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1 / eigs[j + eigs_start]))); 3764 } 3765 } 3766 } else { 3767 double pg = (double)eigs[j + eigs_start]; 3768 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3769 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3770 } 3771 } 3772 } 3773 } 3774 /* change the basis back to the original one */ 3775 if (sub_schurs->change) { 3776 Mat change, phi, phit; 3777 3778 if (pcbddc->dbg_flag > 2) { 3779 PetscInt ii; 3780 for (ii = 0; ii < B_neigs; ii++) { 3781 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3782 for (j = 0; j < B_N; j++) { 3783 #if defined(PETSC_USE_COMPLEX) 3784 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3785 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3786 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3787 #else 3788 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3789 #endif 3790 } 3791 } 3792 } 3793 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3794 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3795 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi)); 3796 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3797 PetscCall(MatDestroy(&phit)); 3798 PetscCall(MatDestroy(&phi)); 3799 } 3800 maxneigs = PetscMax(B_neigs, maxneigs); 3801 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3802 if (B_neigs) { 3803 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3804 3805 if (pcbddc->dbg_flag > 1) { 3806 PetscInt ii; 3807 for (ii = 0; ii < B_neigs; ii++) { 3808 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3809 for (j = 0; j < B_N; j++) { 3810 #if defined(PETSC_USE_COMPLEX) 3811 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3812 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3813 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3814 #else 3815 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3816 #endif 3817 } 3818 } 3819 } 3820 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3821 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3822 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3823 cum++; 3824 } 3825 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3826 /* shift for next computation */ 3827 cumarray += subset_size * subset_size; 3828 } 3829 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3830 3831 if (mss) { 3832 if (sub_schurs->gdsw) { 3833 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3834 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3835 } else { 3836 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3837 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3838 /* destroy matrices (junk) */ 3839 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3840 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3841 } 3842 } 3843 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3844 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3845 #if defined(PETSC_USE_COMPLEX) 3846 PetscCall(PetscFree(rwork)); 3847 #endif 3848 if (pcbddc->dbg_flag) { 3849 PetscInt maxneigs_r; 3850 PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3851 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3852 } 3853 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3854 PetscFunctionReturn(PETSC_SUCCESS); 3855 } 3856 3857 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3858 { 3859 Mat coarse_submat; 3860 3861 PetscFunctionBegin; 3862 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3863 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3864 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3865 3866 /* Setup local neumann solver ksp_R */ 3867 /* PCBDDCSetUpLocalScatters should be called first! */ 3868 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3869 3870 /* 3871 Setup local correction and local part of coarse basis. 3872 Gives back the dense local part of the coarse matrix in column major ordering 3873 */ 3874 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat)); 3875 3876 /* Compute total number of coarse nodes and setup coarse solver */ 3877 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat)); 3878 PetscCall(MatDestroy(&coarse_submat)); 3879 PetscFunctionReturn(PETSC_SUCCESS); 3880 } 3881 3882 PetscErrorCode PCBDDCResetCustomization(PC pc) 3883 { 3884 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3885 3886 PetscFunctionBegin; 3887 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3888 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3889 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3890 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3891 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3892 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3893 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3894 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3895 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3896 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3897 PetscFunctionReturn(PETSC_SUCCESS); 3898 } 3899 3900 PetscErrorCode PCBDDCResetTopography(PC pc) 3901 { 3902 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3903 PetscInt i; 3904 3905 PetscFunctionBegin; 3906 PetscCall(MatDestroy(&pcbddc->nedcG)); 3907 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3908 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3909 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3910 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3911 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3912 PetscCall(VecDestroy(&pcbddc->work_change)); 3913 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3914 PetscCall(MatDestroy(&pcbddc->divudotp)); 3915 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3916 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3917 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3918 pcbddc->n_local_subs = 0; 3919 PetscCall(PetscFree(pcbddc->local_subs)); 3920 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3921 pcbddc->graphanalyzed = PETSC_FALSE; 3922 pcbddc->recompute_topography = PETSC_TRUE; 3923 pcbddc->corner_selected = PETSC_FALSE; 3924 PetscFunctionReturn(PETSC_SUCCESS); 3925 } 3926 3927 PetscErrorCode PCBDDCResetSolvers(PC pc) 3928 { 3929 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3930 3931 PetscFunctionBegin; 3932 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3933 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3934 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3935 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3936 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3937 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3938 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3939 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3940 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3941 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3942 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3943 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3944 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3945 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3946 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3947 PetscCall(KSPReset(pcbddc->ksp_D)); 3948 PetscCall(KSPReset(pcbddc->ksp_R)); 3949 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3950 PetscCall(MatDestroy(&pcbddc->local_mat)); 3951 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3952 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 3953 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3954 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3955 PetscCall(MatDestroy(&pcbddc->benign_change)); 3956 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3957 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 3958 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3959 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3960 if (pcbddc->benign_zerodiag_subs) { 3961 PetscInt i; 3962 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3963 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3964 } 3965 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3966 PetscFunctionReturn(PETSC_SUCCESS); 3967 } 3968 3969 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3970 { 3971 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3972 PC_IS *pcis = (PC_IS *)pc->data; 3973 VecType impVecType; 3974 PetscInt n_constraints, n_R, old_size; 3975 3976 PetscFunctionBegin; 3977 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3978 n_R = pcis->n - pcbddc->n_vertices; 3979 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 3980 /* local work vectors (try to avoid unneeded work)*/ 3981 /* R nodes */ 3982 old_size = -1; 3983 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 3984 if (n_R != old_size) { 3985 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3986 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3987 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 3988 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 3989 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 3990 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 3991 } 3992 /* local primal dofs */ 3993 old_size = -1; 3994 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 3995 if (pcbddc->local_primal_size != old_size) { 3996 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3997 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 3998 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 3999 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 4000 } 4001 /* local explicit constraints */ 4002 old_size = -1; 4003 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 4004 if (n_constraints && n_constraints != old_size) { 4005 PetscCall(VecDestroy(&pcbddc->vec1_C)); 4006 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 4007 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 4008 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 4009 } 4010 PetscFunctionReturn(PETSC_SUCCESS); 4011 } 4012 4013 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode) 4014 { 4015 PetscBool flg; 4016 const PetscScalar *a; 4017 4018 PetscFunctionBegin; 4019 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg)); 4020 if (flg) { 4021 PetscCall(MatDenseGetArrayRead(S, &a)); 4022 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE)); 4023 PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode)); 4024 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE)); 4025 PetscCall(MatDenseRestoreArrayRead(S, &a)); 4026 } else { 4027 const PetscInt *ii, *jj; 4028 PetscInt n; 4029 PetscInt buf[8192], *bufc = NULL; 4030 PetscBool freeb = PETSC_FALSE; 4031 Mat Sm = S; 4032 4033 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg)); 4034 if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm)); 4035 else PetscCall(PetscObjectReference((PetscObject)S)); 4036 PetscCall(MatSeqAIJGetArrayRead(Sm, &a)); 4037 PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4038 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure"); 4039 if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) { 4040 bufc = buf; 4041 } else { 4042 PetscCall(PetscMalloc1(nc, &bufc)); 4043 freeb = PETSC_TRUE; 4044 } 4045 4046 for (PetscInt i = 0; i < n; i++) { 4047 const PetscInt nci = ii[i + 1] - ii[i]; 4048 4049 for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]]; 4050 PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode)); 4051 } 4052 PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4053 PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a)); 4054 PetscCall(MatDestroy(&Sm)); 4055 if (freeb) PetscCall(PetscFree(bufc)); 4056 } 4057 PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY)); 4058 PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY)); 4059 PetscFunctionReturn(PETSC_SUCCESS); 4060 } 4061 4062 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat) 4063 { 4064 Mat_SeqAIJ *aij; 4065 PetscInt *ii, *jj; 4066 PetscScalar *aa; 4067 PetscInt nnz = 0, m, nc; 4068 const PetscScalar *a; 4069 const PetscScalar zero = 0.0; 4070 4071 PetscFunctionBegin; 4072 PetscCall(MatGetLocalSize(D, &m, &nc)); 4073 PetscCall(MatDenseGetArrayRead(D, &a)); 4074 PetscCall(PetscMalloc1(m + 1, &ii)); 4075 PetscCall(PetscMalloc1(m * nc, &jj)); 4076 PetscCall(PetscMalloc1(m * nc, &aa)); 4077 ii[0] = 0; 4078 for (PetscInt k = 0; k < m; k++) { 4079 for (PetscInt s = 0; s < nc; s++) { 4080 const PetscInt c = s + k * nc; 4081 const PetscScalar v = a[k + s * m]; 4082 4083 if (PetscUnlikely(j[c] < 0 || v == zero)) continue; 4084 jj[nnz] = j[c]; 4085 aa[nnz] = a[k + s * m]; 4086 nnz++; 4087 } 4088 ii[k + 1] = nnz; 4089 } 4090 4091 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat)); 4092 PetscCall(MatDenseRestoreArrayRead(D, &a)); 4093 4094 aij = (Mat_SeqAIJ *)(*mat)->data; 4095 aij->free_a = PETSC_TRUE; 4096 aij->free_ij = PETSC_TRUE; 4097 PetscFunctionReturn(PETSC_SUCCESS); 4098 } 4099 4100 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */ 4101 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B) 4102 { 4103 PetscInt n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL; 4104 const PetscBool allowzeropivot = PETSC_FALSE; 4105 PetscBool zeropivotdetected = PETSC_FALSE; 4106 const PetscReal shift = 0.0; 4107 PetscInt ipvt[5], *ii, *jj, *indi, *indj; 4108 PetscScalar work[25], *v_work = NULL, *aa, *diag; 4109 PetscLogDouble flops = 0.0; 4110 4111 PetscFunctionBegin; 4112 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices"); 4113 for (PetscInt i = 0; i < nblocks; i++) { 4114 ncnt += bsizes[i]; 4115 ncnt2 += PetscSqr(bsizes[i]); 4116 } 4117 PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n); 4118 for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]); 4119 if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots)); 4120 4121 PetscCall(PetscMalloc1(n + 1, &ii)); 4122 PetscCall(PetscMalloc1(ncnt2, &jj)); 4123 PetscCall(PetscCalloc1(ncnt2, &aa)); 4124 4125 ncnt = 0; 4126 ii[0] = 0; 4127 indi = ii; 4128 indj = jj; 4129 diag = aa; 4130 for (PetscInt i = 0; i < nblocks; i++) { 4131 const PetscInt bs = bsizes[i]; 4132 4133 for (PetscInt k = 0; k < bs; k++) { 4134 indi[k + 1] = indi[k] + bs; 4135 for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j; 4136 } 4137 PetscCall(MatGetValues(A, bs, indj, bs, indj, diag)); 4138 switch (bs) { 4139 case 1: 4140 *diag = 1.0 / (*diag); 4141 break; 4142 case 2: 4143 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 4144 break; 4145 case 3: 4146 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 4147 break; 4148 case 4: 4149 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 4150 break; 4151 case 5: 4152 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4153 break; 4154 case 6: 4155 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 4156 break; 4157 case 7: 4158 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 4159 break; 4160 default: 4161 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4162 } 4163 ncnt += bs; 4164 flops += 2.0 * PetscPowInt(bs, 3) / 3.0; 4165 diag += bs * bs; 4166 indj += bs * bs; 4167 indi += bs; 4168 } 4169 PetscCall(PetscLogFlops(flops)); 4170 PetscCall(PetscFree2(v_work, v_pivots)); 4171 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B)); 4172 { 4173 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data; 4174 aij->free_a = PETSC_TRUE; 4175 aij->free_ij = PETSC_TRUE; 4176 } 4177 PetscFunctionReturn(PETSC_SUCCESS); 4178 } 4179 4180 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B) 4181 { 4182 const PetscScalar *rarr; 4183 PetscScalar *larr; 4184 PetscSF vsf; 4185 PetscInt n, rld, lld; 4186 4187 PetscFunctionBegin; 4188 PetscCall(MatGetSize(A, NULL, &n)); 4189 PetscCall(MatDenseGetLDA(A, &rld)); 4190 PetscCall(MatDenseGetLDA(B, &lld)); 4191 PetscCall(MatDenseGetArrayRead(A, &rarr)); 4192 PetscCall(MatDenseGetArrayWrite(B, &larr)); 4193 PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf)); 4194 PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4195 PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4196 PetscCall(MatDenseRestoreArrayRead(A, &rarr)); 4197 PetscCall(MatDenseRestoreArrayWrite(B, &larr)); 4198 PetscCall(PetscSFDestroy(&vsf)); 4199 PetscFunctionReturn(PETSC_SUCCESS); 4200 } 4201 4202 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat) 4203 { 4204 PC_IS *pcis = (PC_IS *)pc->data; 4205 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4206 PCBDDCGraph graph = pcbddc->mat_graph; 4207 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4208 /* submatrices of local problem */ 4209 Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL; 4210 /* submatrices of local coarse problem */ 4211 Mat S_CV = NULL, S_VC = NULL, S_CC = NULL; 4212 /* working matrices */ 4213 Mat C_CR; 4214 4215 /* additional working stuff */ 4216 PC pc_R; 4217 IS is_R, is_V, is_C; 4218 const PetscInt *idx_V, *idx_C; 4219 Mat F, Brhs = NULL; 4220 Vec dummy_vec; 4221 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 4222 PetscInt *idx_V_B; 4223 PetscInt lda_rhs, n_vertices, n_constraints, *p0_lidx_I; 4224 PetscInt n_eff_vertices, n_eff_constraints; 4225 PetscInt i, n_R, n_D, n_B; 4226 PetscScalar one = 1.0, m_one = -1.0; 4227 4228 /* Multi-element support */ 4229 PetscBool multi_element = graph->multi_element; 4230 PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL; 4231 PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL; 4232 IS is_C_perm = NULL; 4233 PetscInt n_C_bss = 0, *C_bss = NULL; 4234 Mat coarse_phi_multi; 4235 4236 PetscFunctionBegin; 4237 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 4238 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4239 4240 /* Set Non-overlapping dimensions */ 4241 n_vertices = pcbddc->n_vertices; 4242 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 4243 n_B = pcis->n_B; 4244 n_D = pcis->n - n_B; 4245 n_R = pcis->n - n_vertices; 4246 4247 /* vertices in boundary numbering */ 4248 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 4249 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 4250 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 4251 4252 /* these two cases still need to be optimized */ 4253 if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE; 4254 4255 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 4256 if (multi_element) { 4257 PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 4258 4259 PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat)); 4260 PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size)); 4261 PetscCall(MatSetType(*coarse_submat, MATSEQAIJ)); 4262 PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 4263 PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 4264 4265 /* group vertices and constraints by subdomain id */ 4266 const PetscInt *vidxs = pcbddc->primal_indices_local_idxs; 4267 const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices; 4268 PetscInt *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz; 4269 PetscInt n_el = PetscMax(graph->n_local_subs, 1); 4270 4271 PetscCall(PetscCalloc1(2 * n_el, &count_eff)); 4272 PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V)); 4273 PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C)); 4274 for (PetscInt i = 0; i < n_vertices; i++) { 4275 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4276 4277 V_to_eff_V[i] = count_eff[s]; 4278 count_eff[s] += 1; 4279 } 4280 for (PetscInt i = 0; i < n_constraints; i++) { 4281 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1; 4282 4283 C_to_eff_C[i] = count_eff[s]; 4284 count_eff[s] += 1; 4285 } 4286 4287 /* preallocation */ 4288 PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz)); 4289 for (PetscInt i = 0; i < n_vertices; i++) { 4290 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4291 4292 nnz[i] = count_eff[s] + count_eff[s + 1]; 4293 } 4294 for (PetscInt i = 0; i < n_constraints; i++) { 4295 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub; 4296 4297 nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1]; 4298 } 4299 PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz)); 4300 PetscCall(PetscFree(nnz)); 4301 4302 n_eff_vertices = 0; 4303 n_eff_constraints = 0; 4304 for (PetscInt i = 0; i < n_el; i++) { 4305 n_eff_vertices = PetscMax(n_eff_vertices, count_eff[2 * i]); 4306 n_eff_constraints = PetscMax(n_eff_constraints, count_eff[2 * i + 1]); 4307 count_eff[2 * i] = 0; 4308 count_eff[2 * i + 1] = 0; 4309 } 4310 4311 const PetscInt *idx; 4312 PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C)); 4313 4314 for (PetscInt i = 0; i < n_vertices; i++) { 4315 const PetscInt e = graph->nodes[vidxs[i]].local_sub; 4316 const PetscInt s = 2 * e; 4317 4318 V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i; 4319 count_eff[s] += 1; 4320 } 4321 for (PetscInt i = 0; i < n_constraints; i++) { 4322 const PetscInt e = graph->nodes[cidxs[i]].local_sub; 4323 const PetscInt s = 2 * e + 1; 4324 4325 C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i; 4326 count_eff[s] += 1; 4327 } 4328 4329 PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J)); 4330 PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J)); 4331 PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J)); 4332 PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J)); 4333 for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1; 4334 for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1; 4335 for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1; 4336 for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1; 4337 4338 PetscCall(ISGetIndices(pcbddc->is_R_local, &idx)); 4339 for (PetscInt i = 0; i < n_R; i++) { 4340 const PetscInt e = graph->nodes[idx[i]].local_sub; 4341 const PetscInt s = 2 * e; 4342 PetscInt j; 4343 4344 for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4345 for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4346 } 4347 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx)); 4348 PetscCall(ISGetIndices(pcis->is_B_local, &idx)); 4349 for (PetscInt i = 0; i < n_B; i++) { 4350 const PetscInt e = graph->nodes[idx[i]].local_sub; 4351 const PetscInt s = 2 * e; 4352 PetscInt j; 4353 4354 for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4355 for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4356 } 4357 PetscCall(ISRestoreIndices(pcis->is_B_local, &idx)); 4358 4359 /* permutation and blocksizes for block invert of S_CC */ 4360 PetscInt *idxp; 4361 4362 PetscCall(PetscMalloc1(n_constraints, &idxp)); 4363 PetscCall(PetscMalloc1(n_el, &C_bss)); 4364 n_C_bss = 0; 4365 for (PetscInt e = 0, cnt = 0; e < n_el; e++) { 4366 const PetscInt nc = count_eff[2 * e + 1]; 4367 4368 if (nc) C_bss[n_C_bss++] = nc; 4369 for (PetscInt c = 0; c < nc; c++) idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; 4370 cnt += nc; 4371 } 4372 4373 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm)); 4374 4375 PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C)); 4376 PetscCall(PetscFree(count_eff)); 4377 } else { 4378 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat)); 4379 n_eff_constraints = n_constraints; 4380 n_eff_vertices = n_vertices; 4381 } 4382 4383 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 4384 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 4385 PetscCall(PCSetUp(pc_R)); 4386 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 4387 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 4388 lda_rhs = n_R; 4389 need_benign_correction = PETSC_FALSE; 4390 if (isLU || isCHOL) { 4391 PetscCall(PCFactorGetMatrix(pc_R, &F)); 4392 } else if (sub_schurs && sub_schurs->reuse_solver) { 4393 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4394 MatFactorType type; 4395 4396 F = reuse_solver->F; 4397 PetscCall(MatGetFactorType(F, &type)); 4398 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 4399 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 4400 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 4401 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 4402 } else F = NULL; 4403 4404 /* determine if we can use a sparse right-hand side */ 4405 sparserhs = PETSC_FALSE; 4406 if (F && !multi_element) { 4407 MatSolverType solver; 4408 4409 PetscCall(MatFactorGetSolverType(F, &solver)); 4410 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 4411 } 4412 4413 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 4414 dummy_vec = NULL; 4415 if (need_benign_correction && lda_rhs != n_R && F) { 4416 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 4417 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 4418 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 4419 } 4420 4421 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4422 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4423 4424 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R)); 4425 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V)); 4426 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C)); 4427 PetscCall(ISGetIndices(is_V, &idx_V)); 4428 PetscCall(ISGetIndices(is_C, &idx_C)); 4429 4430 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4431 if (n_constraints) { 4432 Mat C_B; 4433 4434 /* Extract constraints on R nodes: C_{CR} */ 4435 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 4436 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4437 4438 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4439 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4440 if (!sparserhs) { 4441 PetscScalar *marr; 4442 4443 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs)); 4444 PetscCall(MatDenseGetArrayWrite(Brhs, &marr)); 4445 for (i = 0; i < n_constraints; i++) { 4446 const PetscScalar *row_cmat_values; 4447 const PetscInt *row_cmat_indices; 4448 PetscInt size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i; 4449 4450 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4451 for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j]; 4452 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4453 } 4454 PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr)); 4455 } else { 4456 Mat tC_CR; 4457 4458 PetscCall(MatScale(C_CR, -1.0)); 4459 if (lda_rhs != n_R) { 4460 PetscScalar *aa; 4461 PetscInt r, *ii, *jj; 4462 PetscBool done; 4463 4464 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4465 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4466 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 4467 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 4468 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4469 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4470 } else { 4471 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4472 tC_CR = C_CR; 4473 } 4474 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 4475 PetscCall(MatDestroy(&tC_CR)); 4476 } 4477 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R)); 4478 if (F) { 4479 if (need_benign_correction) { 4480 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4481 4482 /* rhs is already zero on interior dofs, no need to change the rhs */ 4483 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 4484 } 4485 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 4486 if (need_benign_correction) { 4487 PetscScalar *marr; 4488 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4489 4490 /* XXX multi_element? */ 4491 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4492 if (lda_rhs != n_R) { 4493 for (i = 0; i < n_eff_constraints; i++) { 4494 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4495 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4496 PetscCall(VecResetArray(dummy_vec)); 4497 } 4498 } else { 4499 for (i = 0; i < n_eff_constraints; i++) { 4500 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4501 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4502 PetscCall(VecResetArray(pcbddc->vec1_R)); 4503 } 4504 } 4505 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4506 } 4507 } else { 4508 const PetscScalar *barr; 4509 PetscScalar *marr; 4510 4511 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4512 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4513 for (i = 0; i < n_eff_constraints; i++) { 4514 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4515 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4516 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4517 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4518 PetscCall(VecResetArray(pcbddc->vec1_R)); 4519 PetscCall(VecResetArray(pcbddc->vec2_R)); 4520 } 4521 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4522 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4523 } 4524 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 4525 PetscCall(MatDestroy(&Brhs)); 4526 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4527 if (!pcbddc->switch_static) { 4528 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2)); 4529 for (i = 0; i < n_eff_constraints; i++) { 4530 Vec r, b; 4531 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 4532 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 4533 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4534 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4535 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 4536 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 4537 } 4538 if (multi_element) { 4539 Mat T; 4540 4541 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4542 PetscCall(MatDestroy(&local_auxmat2_R)); 4543 local_auxmat2_R = T; 4544 PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T)); 4545 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4546 pcbddc->local_auxmat2 = T; 4547 } 4548 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4549 } else { 4550 if (multi_element) { 4551 Mat T; 4552 4553 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4554 PetscCall(MatDestroy(&local_auxmat2_R)); 4555 local_auxmat2_R = T; 4556 } 4557 if (lda_rhs != n_R) { 4558 PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 4559 } else { 4560 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4561 pcbddc->local_auxmat2 = local_auxmat2_R; 4562 } 4563 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4564 } 4565 PetscCall(MatScale(S_CC, m_one)); 4566 if (multi_element) { 4567 Mat T, T2; 4568 IS isp, ispi; 4569 4570 isp = is_C_perm; 4571 4572 PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi)); 4573 PetscCall(MatPermute(S_CC, isp, isp, &T)); 4574 PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2)); 4575 PetscCall(MatDestroy(&T)); 4576 PetscCall(MatDestroy(&S_CC)); 4577 PetscCall(MatPermute(T2, ispi, ispi, &S_CC)); 4578 PetscCall(MatDestroy(&T2)); 4579 PetscCall(ISDestroy(&ispi)); 4580 } else { 4581 if (isCHOL) { 4582 PetscCall(MatCholeskyFactor(S_CC, NULL, NULL)); 4583 } else { 4584 PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL)); 4585 } 4586 PetscCall(MatSeqDenseInvertFactors_Private(S_CC)); 4587 } 4588 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4589 PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1)); 4590 PetscCall(MatDestroy(&C_B)); 4591 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES)); 4592 } 4593 4594 /* Get submatrices from subdomain matrix */ 4595 if (n_vertices) { 4596 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4597 PetscBool oldpin; 4598 #endif 4599 IS is_aux; 4600 4601 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4602 IS tis; 4603 4604 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4605 PetscCall(ISSort(tis)); 4606 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4607 PetscCall(ISDestroy(&tis)); 4608 } else { 4609 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4610 } 4611 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4612 oldpin = pcbddc->local_mat->boundtocpu; 4613 #endif 4614 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4615 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4616 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4617 /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4618 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4619 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4620 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4621 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4622 #endif 4623 PetscCall(ISDestroy(&is_aux)); 4624 } 4625 PetscCall(ISDestroy(&is_C_perm)); 4626 PetscCall(PetscFree(C_bss)); 4627 4628 p0_lidx_I = NULL; 4629 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4630 const PetscInt *idxs; 4631 4632 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4633 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4634 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4635 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4636 } 4637 4638 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4639 4640 /* Matrices of coarse basis functions (local) */ 4641 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4642 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4643 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4644 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4645 if (!multi_element) { 4646 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B)); 4647 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D)); 4648 coarse_phi_multi = NULL; 4649 } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */ 4650 IS is_rows[2] = {pcbddc->is_R_local, NULL}; 4651 IS is_cols[2] = {is_V, is_C}; 4652 4653 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1])); 4654 PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi)); 4655 PetscCall(ISDestroy(&is_rows[1])); 4656 } 4657 4658 /* vertices */ 4659 if (n_vertices) { 4660 PetscBool restoreavr = PETSC_FALSE; 4661 Mat A_RRmA_RV = NULL; 4662 4663 PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4664 PetscCall(MatDestroy(&A_VV)); 4665 4666 if (n_R) { 4667 Mat A_RV_bcorr = NULL, S_VV; 4668 4669 PetscCall(MatScale(A_RV, m_one)); 4670 if (need_benign_correction) { 4671 ISLocalToGlobalMapping RtoN; 4672 IS is_p0; 4673 PetscInt *idxs_p0, n; 4674 4675 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4676 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4677 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4678 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4679 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4680 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4681 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4682 PetscCall(ISDestroy(&is_p0)); 4683 } 4684 4685 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV)); 4686 if (!sparserhs || need_benign_correction) { 4687 if (lda_rhs == n_R && !multi_element) { 4688 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4689 } else { 4690 Mat T; 4691 PetscScalar *av, *array; 4692 const PetscInt *xadj, *adjncy; 4693 PetscInt n; 4694 PetscBool flg_row; 4695 4696 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T)); 4697 PetscCall(MatDenseGetArrayWrite(T, &array)); 4698 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4699 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4700 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4701 for (i = 0; i < n; i++) { 4702 PetscInt j; 4703 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j]; 4704 } 4705 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4706 PetscCall(MatDenseRestoreArrayWrite(T, &array)); 4707 PetscCall(MatDestroy(&A_RV)); 4708 A_RV = T; 4709 } 4710 if (need_benign_correction) { 4711 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4712 PetscScalar *marr; 4713 4714 /* XXX multi_element */ 4715 PetscCall(MatDenseGetArray(A_RV, &marr)); 4716 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4717 4718 | 0 0 0 | (V) 4719 L = | 0 0 -1 | (P-p0) 4720 | 0 0 -1 | (p0) 4721 4722 */ 4723 for (i = 0; i < reuse_solver->benign_n; i++) { 4724 const PetscScalar *vals; 4725 const PetscInt *idxs, *idxs_zero; 4726 PetscInt n, j, nz; 4727 4728 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4729 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4730 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4731 for (j = 0; j < n; j++) { 4732 PetscScalar val = vals[j]; 4733 PetscInt k, col = idxs[j]; 4734 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4735 } 4736 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4737 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4738 } 4739 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4740 } 4741 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4742 Brhs = A_RV; 4743 } else { 4744 Mat tA_RVT, A_RVT; 4745 4746 if (!pcbddc->symmetric_primal) { 4747 /* A_RV already scaled by -1 */ 4748 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4749 } else { 4750 restoreavr = PETSC_TRUE; 4751 PetscCall(MatScale(A_VR, -1.0)); 4752 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4753 A_RVT = A_VR; 4754 } 4755 if (lda_rhs != n_R) { 4756 PetscScalar *aa; 4757 PetscInt r, *ii, *jj; 4758 PetscBool done; 4759 4760 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4761 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4762 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4763 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4764 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4765 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4766 } else { 4767 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4768 tA_RVT = A_RVT; 4769 } 4770 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4771 PetscCall(MatDestroy(&tA_RVT)); 4772 PetscCall(MatDestroy(&A_RVT)); 4773 } 4774 if (F) { 4775 /* need to correct the rhs */ 4776 if (need_benign_correction) { 4777 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4778 PetscScalar *marr; 4779 4780 PetscCall(MatDenseGetArray(Brhs, &marr)); 4781 if (lda_rhs != n_R) { 4782 for (i = 0; i < n_eff_vertices; i++) { 4783 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4784 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4785 PetscCall(VecResetArray(dummy_vec)); 4786 } 4787 } else { 4788 for (i = 0; i < n_eff_vertices; i++) { 4789 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4790 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4791 PetscCall(VecResetArray(pcbddc->vec1_R)); 4792 } 4793 } 4794 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4795 } 4796 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4797 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4798 /* need to correct the solution */ 4799 if (need_benign_correction) { 4800 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4801 PetscScalar *marr; 4802 4803 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4804 if (lda_rhs != n_R) { 4805 for (i = 0; i < n_eff_vertices; i++) { 4806 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4807 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4808 PetscCall(VecResetArray(dummy_vec)); 4809 } 4810 } else { 4811 for (i = 0; i < n_eff_vertices; i++) { 4812 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4813 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4814 PetscCall(VecResetArray(pcbddc->vec1_R)); 4815 } 4816 } 4817 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4818 } 4819 } else { 4820 const PetscScalar *barr; 4821 PetscScalar *marr; 4822 4823 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4824 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4825 for (i = 0; i < n_eff_vertices; i++) { 4826 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4827 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4828 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4829 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4830 PetscCall(VecResetArray(pcbddc->vec1_R)); 4831 PetscCall(VecResetArray(pcbddc->vec2_R)); 4832 } 4833 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4834 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4835 } 4836 PetscCall(MatDestroy(&A_RV)); 4837 PetscCall(MatDestroy(&Brhs)); 4838 /* S_VV and S_CV */ 4839 if (n_constraints) { 4840 Mat B; 4841 4842 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B)); 4843 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B)); 4844 4845 /* S_CV = pcbddc->local_auxmat1 * B */ 4846 if (multi_element) { 4847 Mat T; 4848 4849 PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T)); 4850 PetscCall(MatDestroy(&B)); 4851 B = T; 4852 } 4853 PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV)); 4854 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4855 PetscCall(MatProductSetFromOptions(S_CV)); 4856 PetscCall(MatProductSymbolic(S_CV)); 4857 PetscCall(MatProductNumeric(S_CV)); 4858 PetscCall(MatProductClear(S_CV)); 4859 PetscCall(MatDestroy(&B)); 4860 4861 /* B = local_auxmat2_R * S_CV */ 4862 PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B)); 4863 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4864 PetscCall(MatProductSetFromOptions(B)); 4865 PetscCall(MatProductSymbolic(B)); 4866 PetscCall(MatProductNumeric(B)); 4867 4868 PetscCall(MatScale(S_CV, m_one)); 4869 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES)); 4870 4871 if (multi_element) { 4872 Mat T; 4873 4874 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4875 PetscCall(MatDestroy(&A_RRmA_RV)); 4876 A_RRmA_RV = T; 4877 } 4878 PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */ 4879 PetscCall(MatDestroy(&B)); 4880 } else if (multi_element) { 4881 Mat T; 4882 4883 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4884 PetscCall(MatDestroy(&A_RRmA_RV)); 4885 A_RRmA_RV = T; 4886 } 4887 4888 if (lda_rhs != n_R) { 4889 Mat T; 4890 4891 PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T)); 4892 PetscCall(MatDestroy(&A_RRmA_RV)); 4893 A_RRmA_RV = T; 4894 } 4895 4896 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4897 if (need_benign_correction) { /* XXX SPARSE */ 4898 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4899 PetscScalar *sums; 4900 const PetscScalar *marr; 4901 4902 PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr)); 4903 PetscCall(PetscMalloc1(n_vertices, &sums)); 4904 for (i = 0; i < reuse_solver->benign_n; i++) { 4905 const PetscScalar *vals; 4906 const PetscInt *idxs, *idxs_zero; 4907 PetscInt n, j, nz; 4908 4909 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4910 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4911 for (j = 0; j < n_vertices; j++) { 4912 sums[j] = 0.; 4913 for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R]; 4914 } 4915 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4916 for (j = 0; j < n; j++) { 4917 PetscScalar val = vals[j]; 4918 for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES)); 4919 } 4920 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4921 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4922 } 4923 PetscCall(PetscFree(sums)); 4924 PetscCall(MatDestroy(&A_RV_bcorr)); 4925 PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr)); 4926 } 4927 4928 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV)); 4929 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4930 PetscCall(MatDestroy(&S_VV)); 4931 } 4932 4933 /* coarse basis functions */ 4934 if (coarse_phi_multi) { 4935 Mat Vid; 4936 4937 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid)); 4938 PetscCall(MatShift_Basic(Vid, 1.0)); 4939 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV)); 4940 PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid)); 4941 PetscCall(MatDestroy(&Vid)); 4942 } else { 4943 if (A_RRmA_RV) { 4944 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B)); 4945 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4946 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D)); 4947 if (pcbddc->benign_n) { 4948 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); 4949 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY)); 4950 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY)); 4951 } 4952 } 4953 } 4954 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES)); 4955 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 4956 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 4957 } 4958 PetscCall(MatDestroy(&A_RRmA_RV)); 4959 } 4960 PetscCall(MatDestroy(&A_RV)); 4961 PetscCall(VecDestroy(&dummy_vec)); 4962 4963 if (n_constraints) { 4964 Mat B, B2; 4965 4966 PetscCall(MatScale(S_CC, m_one)); 4967 PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B)); 4968 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4969 PetscCall(MatProductSetFromOptions(B)); 4970 PetscCall(MatProductSymbolic(B)); 4971 PetscCall(MatProductNumeric(B)); 4972 4973 if (n_vertices) { 4974 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4975 PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC)); 4976 } else { 4977 if (lda_rhs != n_R) { 4978 Mat tB; 4979 4980 PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB)); 4981 PetscCall(MatDestroy(&B)); 4982 B = tB; 4983 } 4984 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC)); 4985 } 4986 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES)); 4987 } 4988 4989 /* coarse basis functions */ 4990 if (coarse_phi_multi) { 4991 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B)); 4992 } else { 4993 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 4994 PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2)); 4995 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2)); 4996 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4997 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 4998 PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2)); 4999 if (pcbddc->benign_n) { 5000 for (i = 0; i < n_constraints; i++) PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); 5001 } 5002 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2)); 5003 } 5004 } 5005 PetscCall(MatDestroy(&B)); 5006 } 5007 5008 /* assemble sparse coarse basis functions */ 5009 if (coarse_phi_multi) { 5010 Mat T; 5011 5012 PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T)); 5013 PetscCall(MatDestroy(&coarse_phi_multi)); 5014 PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B)); 5015 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); 5016 PetscCall(MatDestroy(&T)); 5017 } 5018 PetscCall(MatDestroy(&local_auxmat2_R)); 5019 PetscCall(PetscFree(p0_lidx_I)); 5020 5021 /* coarse matrix entries relative to B_0 */ 5022 if (pcbddc->benign_n) { 5023 Mat B0_B, B0_BPHI; 5024 IS is_dummy; 5025 const PetscScalar *data; 5026 PetscInt j; 5027 5028 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5029 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5030 PetscCall(ISDestroy(&is_dummy)); 5031 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5032 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5033 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 5034 for (j = 0; j < pcbddc->benign_n; j++) { 5035 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5036 for (i = 0; i < pcbddc->local_primal_size; i++) { 5037 PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5038 PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5039 } 5040 } 5041 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 5042 PetscCall(MatDestroy(&B0_B)); 5043 PetscCall(MatDestroy(&B0_BPHI)); 5044 } 5045 5046 /* compute other basis functions for non-symmetric problems */ 5047 if (!pcbddc->symmetric_primal) { 5048 Mat B_V = NULL, B_C = NULL; 5049 PetscScalar *marray, *work; 5050 5051 /* TODO multi_element MatDenseScatter */ 5052 if (n_constraints) { 5053 Mat S_CCT, C_CRT; 5054 5055 PetscCall(MatScale(S_CC, m_one)); 5056 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 5057 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 5058 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C)); 5059 PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C)); 5060 PetscCall(MatDestroy(&S_CCT)); 5061 if (n_vertices) { 5062 Mat S_VCT; 5063 5064 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 5065 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V)); 5066 PetscCall(MatDestroy(&S_VCT)); 5067 PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V)); 5068 } 5069 PetscCall(MatDestroy(&C_CRT)); 5070 } else { 5071 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 5072 } 5073 if (n_vertices && n_R) { 5074 PetscScalar *av, *marray; 5075 const PetscInt *xadj, *adjncy; 5076 PetscInt n; 5077 PetscBool flg_row; 5078 5079 /* B_V = B_V - A_VR^T */ 5080 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 5081 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5082 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 5083 PetscCall(MatDenseGetArray(B_V, &marray)); 5084 for (i = 0; i < n; i++) { 5085 PetscInt j; 5086 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 5087 } 5088 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5089 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5090 PetscCall(MatDestroy(&A_VR)); 5091 } 5092 5093 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 5094 PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work)); 5095 if (n_vertices) { 5096 PetscCall(MatDenseGetArray(B_V, &marray)); 5097 for (i = 0; i < n_vertices; i++) { 5098 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 5099 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5100 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5101 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5102 PetscCall(VecResetArray(pcbddc->vec1_R)); 5103 PetscCall(VecResetArray(pcbddc->vec2_R)); 5104 } 5105 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5106 } 5107 if (B_C) { 5108 PetscCall(MatDenseGetArray(B_C, &marray)); 5109 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 5110 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 5111 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5112 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5113 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5114 PetscCall(VecResetArray(pcbddc->vec1_R)); 5115 PetscCall(VecResetArray(pcbddc->vec2_R)); 5116 } 5117 PetscCall(MatDenseRestoreArray(B_C, &marray)); 5118 } 5119 /* coarse basis functions */ 5120 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B)); 5121 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D)); 5122 for (i = 0; i < pcbddc->local_primal_size; i++) { 5123 Vec v; 5124 5125 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 5126 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 5127 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5128 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5129 if (i < n_vertices) { 5130 PetscScalar one = 1.0; 5131 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 5132 PetscCall(VecAssemblyBegin(v)); 5133 PetscCall(VecAssemblyEnd(v)); 5134 } 5135 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 5136 5137 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5138 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 5139 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5140 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5141 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 5142 } 5143 PetscCall(VecResetArray(pcbddc->vec1_R)); 5144 } 5145 PetscCall(MatDestroy(&B_V)); 5146 PetscCall(MatDestroy(&B_C)); 5147 PetscCall(PetscFree(work)); 5148 } else { 5149 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 5150 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 5151 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 5152 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 5153 } 5154 PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5155 PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5156 5157 /* free memory */ 5158 PetscCall(PetscFree(V_to_eff_V)); 5159 PetscCall(PetscFree(C_to_eff_C)); 5160 PetscCall(PetscFree(R_eff_V_J)); 5161 PetscCall(PetscFree(R_eff_C_J)); 5162 PetscCall(PetscFree(B_eff_V_J)); 5163 PetscCall(PetscFree(B_eff_C_J)); 5164 PetscCall(ISDestroy(&is_R)); 5165 PetscCall(ISRestoreIndices(is_V, &idx_V)); 5166 PetscCall(ISRestoreIndices(is_C, &idx_C)); 5167 PetscCall(ISDestroy(&is_V)); 5168 PetscCall(ISDestroy(&is_C)); 5169 PetscCall(PetscFree(idx_V_B)); 5170 PetscCall(MatDestroy(&S_CV)); 5171 PetscCall(MatDestroy(&S_VC)); 5172 PetscCall(MatDestroy(&S_CC)); 5173 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 5174 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 5175 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 5176 5177 /* Checking coarse_sub_mat and coarse basis functions */ 5178 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5179 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5180 if (pcbddc->dbg_flag) { 5181 Mat AUXMAT, TM1, TM2, TM3, TM4; 5182 Mat coarse_phi_D, coarse_phi_B; 5183 Mat coarse_psi_D, coarse_psi_B; 5184 Mat A_II, A_BB, A_IB, A_BI; 5185 Mat C_B, CPHI; 5186 IS is_dummy; 5187 Vec mones; 5188 MatType checkmattype = MATSEQAIJ; 5189 PetscReal real_value; 5190 5191 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5192 Mat A; 5193 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 5194 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 5195 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 5196 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 5197 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 5198 PetscCall(MatDestroy(&A)); 5199 } else { 5200 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 5201 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 5202 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 5203 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 5204 } 5205 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 5206 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 5207 if (!pcbddc->symmetric_primal) { 5208 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 5209 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 5210 } 5211 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5212 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 5213 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5214 if (!pcbddc->symmetric_primal) { 5215 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5216 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5217 PetscCall(MatDestroy(&AUXMAT)); 5218 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5219 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5220 PetscCall(MatDestroy(&AUXMAT)); 5221 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5222 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5223 PetscCall(MatDestroy(&AUXMAT)); 5224 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5225 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5226 PetscCall(MatDestroy(&AUXMAT)); 5227 } else { 5228 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5229 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5230 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5231 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5232 PetscCall(MatDestroy(&AUXMAT)); 5233 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5234 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5235 PetscCall(MatDestroy(&AUXMAT)); 5236 } 5237 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 5238 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 5239 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 5240 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 5241 if (pcbddc->benign_n) { 5242 Mat B0_B, B0_BPHI; 5243 const PetscScalar *data2; 5244 PetscScalar *data; 5245 PetscInt j; 5246 5247 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5248 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5249 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5250 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5251 PetscCall(MatDenseGetArray(TM1, &data)); 5252 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 5253 for (j = 0; j < pcbddc->benign_n; j++) { 5254 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5255 for (i = 0; i < pcbddc->local_primal_size; i++) { 5256 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 5257 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 5258 } 5259 } 5260 PetscCall(MatDenseRestoreArray(TM1, &data)); 5261 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 5262 PetscCall(MatDestroy(&B0_B)); 5263 PetscCall(ISDestroy(&is_dummy)); 5264 PetscCall(MatDestroy(&B0_BPHI)); 5265 } 5266 PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN)); 5267 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 5268 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5269 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5270 5271 /* check constraints */ 5272 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 5273 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 5274 if (!pcbddc->benign_n) { /* TODO: add benign case */ 5275 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5276 } else { 5277 PetscScalar *data; 5278 Mat tmat; 5279 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 5280 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 5281 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 5282 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5283 PetscCall(MatDestroy(&tmat)); 5284 } 5285 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 5286 PetscCall(VecSet(mones, -1.0)); 5287 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5288 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5289 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5290 if (!pcbddc->symmetric_primal) { 5291 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 5292 PetscCall(VecSet(mones, -1.0)); 5293 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5294 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5295 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5296 } 5297 PetscCall(MatDestroy(&C_B)); 5298 PetscCall(MatDestroy(&CPHI)); 5299 PetscCall(ISDestroy(&is_dummy)); 5300 PetscCall(VecDestroy(&mones)); 5301 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5302 PetscCall(MatDestroy(&A_II)); 5303 PetscCall(MatDestroy(&A_BB)); 5304 PetscCall(MatDestroy(&A_IB)); 5305 PetscCall(MatDestroy(&A_BI)); 5306 PetscCall(MatDestroy(&TM1)); 5307 PetscCall(MatDestroy(&TM2)); 5308 PetscCall(MatDestroy(&TM3)); 5309 PetscCall(MatDestroy(&TM4)); 5310 PetscCall(MatDestroy(&coarse_phi_D)); 5311 PetscCall(MatDestroy(&coarse_phi_B)); 5312 if (!pcbddc->symmetric_primal) { 5313 PetscCall(MatDestroy(&coarse_psi_D)); 5314 PetscCall(MatDestroy(&coarse_psi_B)); 5315 } 5316 } 5317 5318 #if 0 5319 { 5320 PetscViewer viewer; 5321 char filename[256]; 5322 5323 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 5324 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 5325 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 5326 PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat")); 5327 PetscCall(MatView(*coarse_submat,viewer)); 5328 if (pcbddc->coarse_phi_B) { 5329 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 5330 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 5331 } 5332 if (pcbddc->coarse_phi_D) { 5333 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 5334 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 5335 } 5336 if (pcbddc->coarse_psi_B) { 5337 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 5338 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 5339 } 5340 if (pcbddc->coarse_psi_D) { 5341 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 5342 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 5343 } 5344 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 5345 PetscCall(MatView(pcbddc->local_mat,viewer)); 5346 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 5347 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 5348 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 5349 PetscCall(ISView(pcis->is_I_local,viewer)); 5350 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 5351 PetscCall(ISView(pcis->is_B_local,viewer)); 5352 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 5353 PetscCall(ISView(pcbddc->is_R_local,viewer)); 5354 PetscCall(PetscViewerDestroy(&viewer)); 5355 } 5356 #endif 5357 5358 /* device support */ 5359 { 5360 PetscBool iscuda, iship, iskokkos; 5361 MatType mtype = NULL; 5362 5363 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, "")); 5364 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, "")); 5365 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, "")); 5366 if (iskokkos) { 5367 if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE; 5368 else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE; 5369 } 5370 if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP); 5371 else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP; 5372 else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA; 5373 if (mtype) { 5374 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 5375 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 5376 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 5377 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 5378 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 5379 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 5380 } 5381 } 5382 PetscFunctionReturn(PETSC_SUCCESS); 5383 } 5384 5385 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 5386 { 5387 Mat *work_mat; 5388 IS isrow_s, iscol_s; 5389 PetscBool rsorted, csorted; 5390 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 5391 5392 PetscFunctionBegin; 5393 PetscCall(ISSorted(isrow, &rsorted)); 5394 PetscCall(ISSorted(iscol, &csorted)); 5395 PetscCall(ISGetLocalSize(isrow, &rsize)); 5396 PetscCall(ISGetLocalSize(iscol, &csize)); 5397 5398 if (!rsorted) { 5399 const PetscInt *idxs; 5400 PetscInt *idxs_sorted, i; 5401 5402 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 5403 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 5404 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 5405 PetscCall(ISGetIndices(isrow, &idxs)); 5406 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 5407 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 5408 PetscCall(ISRestoreIndices(isrow, &idxs)); 5409 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 5410 } else { 5411 PetscCall(PetscObjectReference((PetscObject)isrow)); 5412 isrow_s = isrow; 5413 } 5414 5415 if (!csorted) { 5416 if (isrow == iscol) { 5417 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 5418 iscol_s = isrow_s; 5419 } else { 5420 const PetscInt *idxs; 5421 PetscInt *idxs_sorted, i; 5422 5423 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 5424 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 5425 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 5426 PetscCall(ISGetIndices(iscol, &idxs)); 5427 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 5428 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5429 PetscCall(ISRestoreIndices(iscol, &idxs)); 5430 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 5431 } 5432 } else { 5433 PetscCall(PetscObjectReference((PetscObject)iscol)); 5434 iscol_s = iscol; 5435 } 5436 5437 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 5438 5439 if (!rsorted || !csorted) { 5440 Mat new_mat; 5441 IS is_perm_r, is_perm_c; 5442 5443 if (!rsorted) { 5444 PetscInt *idxs_r, i; 5445 PetscCall(PetscMalloc1(rsize, &idxs_r)); 5446 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 5447 PetscCall(PetscFree(idxs_perm_r)); 5448 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 5449 } else { 5450 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 5451 } 5452 PetscCall(ISSetPermutation(is_perm_r)); 5453 5454 if (!csorted) { 5455 if (isrow_s == iscol_s) { 5456 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5457 is_perm_c = is_perm_r; 5458 } else { 5459 PetscInt *idxs_c, i; 5460 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 5461 PetscCall(PetscMalloc1(csize, &idxs_c)); 5462 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 5463 PetscCall(PetscFree(idxs_perm_c)); 5464 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 5465 } 5466 } else { 5467 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 5468 } 5469 PetscCall(ISSetPermutation(is_perm_c)); 5470 5471 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 5472 PetscCall(MatDestroy(&work_mat[0])); 5473 work_mat[0] = new_mat; 5474 PetscCall(ISDestroy(&is_perm_r)); 5475 PetscCall(ISDestroy(&is_perm_c)); 5476 } 5477 5478 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5479 *B = work_mat[0]; 5480 PetscCall(MatDestroyMatrices(1, &work_mat)); 5481 PetscCall(ISDestroy(&isrow_s)); 5482 PetscCall(ISDestroy(&iscol_s)); 5483 PetscFunctionReturn(PETSC_SUCCESS); 5484 } 5485 5486 static PetscErrorCode MatPtAPWithPrefix_Private(Mat A, Mat P, PetscReal fill, const char *prefix, Mat *C) 5487 { 5488 PetscFunctionBegin; 5489 PetscCall(MatProductCreate(A, P, NULL, C)); 5490 PetscCall(MatProductSetType(*C, MATPRODUCT_PtAP)); 5491 PetscCall(MatProductSetAlgorithm(*C, "default")); 5492 PetscCall(MatProductSetFill(*C, fill)); 5493 PetscCall(MatSetOptionsPrefix(*C, prefix)); 5494 PetscCall(MatProductSetFromOptions(*C)); 5495 PetscCall(MatProductSymbolic(*C)); 5496 PetscCall(MatProductNumeric(*C)); 5497 (*C)->symmetric = A->symmetric; 5498 (*C)->spd = A->spd; 5499 PetscFunctionReturn(PETSC_SUCCESS); 5500 } 5501 5502 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5503 { 5504 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5505 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5506 Mat new_mat, lA; 5507 IS is_local, is_global; 5508 PetscInt local_size; 5509 PetscBool isseqaij, issym, isset; 5510 char ptapprefix[256]; 5511 5512 PetscFunctionBegin; 5513 PetscCall(MatDestroy(&pcbddc->local_mat)); 5514 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 5515 if (pcbddc->mat_graph->multi_element) { 5516 Mat *mats, *bdiags; 5517 IS *gsubs; 5518 PetscInt nsubs = pcbddc->n_local_subs; 5519 5520 PetscCall(PetscCalloc1(nsubs * nsubs, &mats)); 5521 #if 1 5522 PetscCall(PetscMalloc1(nsubs, &gsubs)); 5523 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i])); 5524 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags)); 5525 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i])); 5526 PetscCall(PetscFree(gsubs)); 5527 #else /* this does not work since MatCreateSubMatrices does not support repeated indices */ 5528 Mat *tmats; 5529 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5530 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5531 PetscCall(ISDestroy(&is_local)); 5532 PetscCall(MatSetOption(ChangeOfBasisMatrix, MAT_SUBMAT_SINGLEIS, PETSC_TRUE)); 5533 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, 1, &is_global, &is_global, MAT_INITIAL_MATRIX, &tmats)); 5534 PetscCall(ISDestroy(&is_global)); 5535 PetscCall(MatCreateSubMatrices(tmats[0], nsubs, pcbddc->local_subs, pcbddc->local_subs, MAT_INITIAL_MATRIX, &bdiags)); 5536 PetscCall(MatDestroySubMatrices(1, &tmats)); 5537 #endif 5538 for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i]; 5539 PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat)); 5540 PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat)); 5541 PetscCall(MatDestroySubMatrices(nsubs, &bdiags)); 5542 PetscCall(PetscFree(mats)); 5543 } else { 5544 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5545 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5546 PetscCall(ISDestroy(&is_local)); 5547 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 5548 PetscCall(ISDestroy(&is_global)); 5549 } 5550 if (pcbddc->dbg_flag) { 5551 Vec x, x_change; 5552 PetscReal error; 5553 5554 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 5555 PetscCall(VecSetRandom(x, NULL)); 5556 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 5557 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5558 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5559 PetscCall(MatMult(new_mat, matis->x, matis->y)); 5560 if (!pcbddc->change_interior) { 5561 const PetscScalar *x, *y, *v; 5562 PetscReal lerror = 0.; 5563 PetscInt i; 5564 5565 PetscCall(VecGetArrayRead(matis->x, &x)); 5566 PetscCall(VecGetArrayRead(matis->y, &y)); 5567 PetscCall(VecGetArrayRead(matis->counter, &v)); 5568 for (i = 0; i < local_size; i++) 5569 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 5570 PetscCall(VecRestoreArrayRead(matis->x, &x)); 5571 PetscCall(VecRestoreArrayRead(matis->y, &y)); 5572 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 5573 PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 5574 if (error > PETSC_SMALL) { 5575 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5576 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 5577 } else { 5578 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 5579 } 5580 } 5581 } 5582 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5583 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5584 PetscCall(VecAXPY(x, -1.0, x_change)); 5585 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 5586 if (error > PETSC_SMALL) { 5587 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5588 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 5589 } else { 5590 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 5591 } 5592 } 5593 PetscCall(VecDestroy(&x)); 5594 PetscCall(VecDestroy(&x_change)); 5595 } 5596 5597 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5598 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 5599 5600 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5601 if (((PetscObject)pc)->prefix) PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "%spc_bddc_change_", ((PetscObject)pc)->prefix)); 5602 else PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "pc_bddc_change_")); 5603 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 5604 if (isseqaij) { 5605 PetscCall(MatDestroy(&pcbddc->local_mat)); 5606 PetscCall(MatPtAPWithPrefix_Private(matis->A, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat)); 5607 if (lA) { 5608 Mat work; 5609 PetscCall(MatPtAPWithPrefix_Private(lA, new_mat, PETSC_DEFAULT, ptapprefix, &work)); 5610 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5611 PetscCall(MatDestroy(&work)); 5612 } 5613 } else { 5614 Mat work_mat; 5615 5616 PetscCall(MatDestroy(&pcbddc->local_mat)); 5617 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5618 PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat)); 5619 PetscCall(MatDestroy(&work_mat)); 5620 if (lA) { 5621 Mat work; 5622 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5623 PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &work)); 5624 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5625 PetscCall(MatDestroy(&work)); 5626 } 5627 } 5628 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 5629 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 5630 PetscCall(MatDestroy(&new_mat)); 5631 PetscFunctionReturn(PETSC_SUCCESS); 5632 } 5633 5634 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5635 { 5636 PC_IS *pcis = (PC_IS *)pc->data; 5637 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5638 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5639 PetscInt *idx_R_local = NULL; 5640 PetscInt n_vertices, i, j, n_R, n_D, n_B; 5641 PetscInt vbs, bs; 5642 PetscBT bitmask = NULL; 5643 5644 PetscFunctionBegin; 5645 /* 5646 No need to setup local scatters if 5647 - primal space is unchanged 5648 AND 5649 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5650 AND 5651 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5652 */ 5653 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 5654 /* destroy old objects */ 5655 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5656 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5657 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5658 /* Set Non-overlapping dimensions */ 5659 n_B = pcis->n_B; 5660 n_D = pcis->n - n_B; 5661 n_vertices = pcbddc->n_vertices; 5662 5663 /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5664 5665 /* create auxiliary bitmask and allocate workspace */ 5666 if (!sub_schurs || !sub_schurs->reuse_solver) { 5667 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5668 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5669 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5670 5671 for (i = 0, n_R = 0; i < pcis->n; i++) { 5672 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5673 } 5674 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5675 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5676 5677 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5678 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5679 } 5680 5681 /* Block code */ 5682 vbs = 1; 5683 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5684 if (bs > 1 && !(n_vertices % bs)) { 5685 PetscBool is_blocked = PETSC_TRUE; 5686 PetscInt *vary; 5687 if (!sub_schurs || !sub_schurs->reuse_solver) { 5688 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5689 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5690 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5691 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5692 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5693 for (i = 0; i < pcis->n / bs; i++) { 5694 if (vary[i] != 0 && vary[i] != bs) { 5695 is_blocked = PETSC_FALSE; 5696 break; 5697 } 5698 } 5699 PetscCall(PetscFree(vary)); 5700 } else { 5701 /* Verify directly the R set */ 5702 for (i = 0; i < n_R / bs; i++) { 5703 PetscInt j, node = idx_R_local[bs * i]; 5704 for (j = 1; j < bs; j++) { 5705 if (node != idx_R_local[bs * i + j] - j) { 5706 is_blocked = PETSC_FALSE; 5707 break; 5708 } 5709 } 5710 } 5711 } 5712 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5713 vbs = bs; 5714 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5715 } 5716 } 5717 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5718 if (sub_schurs && sub_schurs->reuse_solver) { 5719 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5720 5721 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5722 PetscCall(ISDestroy(&reuse_solver->is_R)); 5723 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5724 reuse_solver->is_R = pcbddc->is_R_local; 5725 } else { 5726 PetscCall(PetscFree(idx_R_local)); 5727 } 5728 5729 /* print some info if requested */ 5730 if (pcbddc->dbg_flag) { 5731 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5732 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5733 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5734 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5735 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5736 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5737 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5738 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5739 } 5740 5741 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5742 if (!sub_schurs || !sub_schurs->reuse_solver) { 5743 IS is_aux1, is_aux2; 5744 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5745 5746 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5747 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5748 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5749 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5750 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5751 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5752 for (i = 0, j = 0; i < n_R; i++) { 5753 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5754 } 5755 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5756 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5757 for (i = 0, j = 0; i < n_B; i++) { 5758 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5759 } 5760 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5761 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5762 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5763 PetscCall(ISDestroy(&is_aux1)); 5764 PetscCall(ISDestroy(&is_aux2)); 5765 5766 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5767 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5768 for (i = 0, j = 0; i < n_R; i++) { 5769 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5770 } 5771 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5772 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5773 PetscCall(ISDestroy(&is_aux1)); 5774 } 5775 PetscCall(PetscBTDestroy(&bitmask)); 5776 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5777 } else { 5778 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5779 IS tis; 5780 PetscInt schur_size; 5781 5782 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5783 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5784 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5785 PetscCall(ISDestroy(&tis)); 5786 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5787 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5788 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5789 PetscCall(ISDestroy(&tis)); 5790 } 5791 } 5792 PetscFunctionReturn(PETSC_SUCCESS); 5793 } 5794 5795 PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5796 { 5797 MatNullSpace NullSpace; 5798 Mat dmat; 5799 const Vec *nullvecs; 5800 Vec v, v2, *nullvecs2; 5801 VecScatter sct = NULL; 5802 PetscScalar *ddata; 5803 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5804 PetscBool nnsp_has_cnst; 5805 5806 PetscFunctionBegin; 5807 if (!is && !B) { /* MATIS */ 5808 Mat_IS *matis = (Mat_IS *)A->data; 5809 5810 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5811 sct = matis->cctx; 5812 PetscCall(PetscObjectReference((PetscObject)sct)); 5813 } else { 5814 PetscCall(MatGetNullSpace(B, &NullSpace)); 5815 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5816 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5817 } 5818 PetscCall(MatGetNullSpace(A, &NullSpace)); 5819 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5820 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5821 5822 PetscCall(MatCreateVecs(A, &v, NULL)); 5823 PetscCall(MatCreateVecs(B, &v2, NULL)); 5824 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5825 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs)); 5826 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5827 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5828 PetscCall(VecGetBlockSize(v2, &bs)); 5829 PetscCall(VecGetSize(v2, &N)); 5830 PetscCall(VecGetLocalSize(v2, &n)); 5831 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5832 for (k = 0; k < nnsp_size; k++) { 5833 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5834 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5835 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5836 } 5837 if (nnsp_has_cnst) { 5838 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5839 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5840 } 5841 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5842 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5843 5844 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5845 PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault)); 5846 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5847 PetscCall(MatDestroy(&dmat)); 5848 5849 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5850 PetscCall(PetscFree(nullvecs2)); 5851 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5852 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5853 PetscCall(VecDestroy(&v)); 5854 PetscCall(VecDestroy(&v2)); 5855 PetscCall(VecScatterDestroy(&sct)); 5856 PetscFunctionReturn(PETSC_SUCCESS); 5857 } 5858 5859 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5860 { 5861 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5862 PC_IS *pcis = (PC_IS *)pc->data; 5863 PC pc_temp; 5864 Mat A_RR; 5865 MatNullSpace nnsp; 5866 MatReuse reuse; 5867 PetscScalar m_one = -1.0; 5868 PetscReal value; 5869 PetscInt n_D, n_R; 5870 PetscBool issbaij, opts, isset, issym; 5871 PetscBool f = PETSC_FALSE; 5872 char dir_prefix[256], neu_prefix[256], str_level[16]; 5873 size_t len; 5874 5875 PetscFunctionBegin; 5876 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5877 /* approximate solver, propagate NearNullSpace if needed */ 5878 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5879 MatNullSpace gnnsp1, gnnsp2; 5880 PetscBool lhas, ghas; 5881 5882 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5883 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5884 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5885 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5886 PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5887 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5888 } 5889 5890 /* compute prefixes */ 5891 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5892 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5893 if (!pcbddc->current_level) { 5894 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5895 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5896 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5897 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5898 } else { 5899 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level)); 5900 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5901 len -= 15; /* remove "pc_bddc_coarse_" */ 5902 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5903 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5904 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5905 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5906 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5907 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5908 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5909 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5910 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5911 } 5912 5913 /* DIRICHLET PROBLEM */ 5914 if (dirichlet) { 5915 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5916 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5917 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5918 if (pcbddc->dbg_flag) { 5919 Mat A_IIn; 5920 5921 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5922 PetscCall(MatDestroy(&pcis->A_II)); 5923 pcis->A_II = A_IIn; 5924 } 5925 } 5926 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5927 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 5928 5929 /* Matrix for Dirichlet problem is pcis->A_II */ 5930 n_D = pcis->n - pcis->n_B; 5931 opts = PETSC_FALSE; 5932 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5933 opts = PETSC_TRUE; 5934 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 5935 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel)); 5936 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 5937 /* default */ 5938 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 5939 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 5940 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 5941 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5942 if (issbaij) { 5943 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5944 } else { 5945 PetscCall(PCSetType(pc_temp, PCLU)); 5946 } 5947 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 5948 } 5949 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 5950 PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view")); 5951 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 5952 /* Allow user's customization */ 5953 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5954 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5955 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5956 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 5957 } 5958 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5959 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5960 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5961 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5962 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5963 const PetscInt *idxs; 5964 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5965 5966 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 5967 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 5968 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5969 for (i = 0; i < nl; i++) { 5970 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5971 } 5972 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 5973 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5974 PetscCall(PetscFree(scoords)); 5975 } 5976 if (sub_schurs && sub_schurs->reuse_solver) { 5977 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5978 5979 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 5980 } 5981 5982 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5983 if (!n_D) { 5984 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5985 PetscCall(PCSetType(pc_temp, PCNONE)); 5986 } 5987 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5988 /* set ksp_D into pcis data */ 5989 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5990 PetscCall(KSPDestroy(&pcis->ksp_D)); 5991 pcis->ksp_D = pcbddc->ksp_D; 5992 } 5993 5994 /* NEUMANN PROBLEM */ 5995 A_RR = NULL; 5996 if (neumann) { 5997 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5998 PetscInt ibs, mbs; 5999 PetscBool issbaij, reuse_neumann_solver, isset, issym; 6000 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6001 6002 reuse_neumann_solver = PETSC_FALSE; 6003 if (sub_schurs && sub_schurs->reuse_solver) { 6004 IS iP; 6005 6006 reuse_neumann_solver = PETSC_TRUE; 6007 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 6008 if (iP) reuse_neumann_solver = PETSC_FALSE; 6009 } 6010 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 6011 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 6012 if (pcbddc->ksp_R) { /* already created ksp */ 6013 PetscInt nn_R; 6014 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 6015 PetscCall(PetscObjectReference((PetscObject)A_RR)); 6016 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 6017 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 6018 PetscCall(KSPReset(pcbddc->ksp_R)); 6019 PetscCall(MatDestroy(&A_RR)); 6020 reuse = MAT_INITIAL_MATRIX; 6021 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 6022 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 6023 PetscCall(MatDestroy(&A_RR)); 6024 reuse = MAT_INITIAL_MATRIX; 6025 } else { /* safe to reuse the matrix */ 6026 reuse = MAT_REUSE_MATRIX; 6027 } 6028 } 6029 /* last check */ 6030 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 6031 PetscCall(MatDestroy(&A_RR)); 6032 reuse = MAT_INITIAL_MATRIX; 6033 } 6034 } else { /* first time, so we need to create the matrix */ 6035 reuse = MAT_INITIAL_MATRIX; 6036 } 6037 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 6038 TODO: Get Rid of these conversions */ 6039 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 6040 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 6041 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 6042 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 6043 if (matis->A == pcbddc->local_mat) { 6044 PetscCall(MatDestroy(&pcbddc->local_mat)); 6045 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 6046 } else { 6047 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6048 } 6049 } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */ 6050 if (matis->A == pcbddc->local_mat) { 6051 PetscCall(MatDestroy(&pcbddc->local_mat)); 6052 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 6053 } else { 6054 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6055 } 6056 } 6057 /* extract A_RR */ 6058 if (reuse_neumann_solver) { 6059 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6060 6061 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 6062 PetscCall(MatDestroy(&A_RR)); 6063 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 6064 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 6065 } else { 6066 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 6067 } 6068 } else { 6069 PetscCall(MatDestroy(&A_RR)); 6070 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 6071 PetscCall(PetscObjectReference((PetscObject)A_RR)); 6072 } 6073 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 6074 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 6075 } 6076 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 6077 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 6078 opts = PETSC_FALSE; 6079 if (!pcbddc->ksp_R) { /* create object if not present */ 6080 opts = PETSC_TRUE; 6081 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 6082 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel)); 6083 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 6084 /* default */ 6085 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 6086 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 6087 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6088 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 6089 if (issbaij) { 6090 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 6091 } else { 6092 PetscCall(PCSetType(pc_temp, PCLU)); 6093 } 6094 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 6095 } 6096 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 6097 PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view")); 6098 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 6099 if (opts) { /* Allow user's customization once */ 6100 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 6101 } 6102 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6103 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 6104 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 6105 } 6106 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6107 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6108 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 6109 if (f && pcbddc->mat_graph->cloc && !nnsp) { 6110 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 6111 const PetscInt *idxs; 6112 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 6113 6114 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 6115 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 6116 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 6117 for (i = 0; i < nl; i++) { 6118 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 6119 } 6120 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 6121 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 6122 PetscCall(PetscFree(scoords)); 6123 } 6124 6125 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 6126 if (!n_R) { 6127 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6128 PetscCall(PCSetType(pc_temp, PCNONE)); 6129 } 6130 /* Reuse solver if it is present */ 6131 if (reuse_neumann_solver) { 6132 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6133 6134 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 6135 } 6136 PetscCall(KSPSetUp(pcbddc->ksp_R)); 6137 } 6138 6139 if (pcbddc->dbg_flag) { 6140 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6141 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6142 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 6143 } 6144 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 6145 6146 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 6147 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 6148 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 6149 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 6150 /* check Dirichlet and Neumann solvers */ 6151 if (pcbddc->dbg_flag) { 6152 if (dirichlet) { /* Dirichlet */ 6153 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 6154 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 6155 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 6156 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 6157 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 6158 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 6159 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value)); 6160 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6161 } 6162 if (neumann) { /* Neumann */ 6163 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 6164 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 6165 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 6166 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 6167 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 6168 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 6169 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value)); 6170 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6171 } 6172 } 6173 /* free Neumann problem's matrix */ 6174 PetscCall(MatDestroy(&A_RR)); 6175 PetscFunctionReturn(PETSC_SUCCESS); 6176 } 6177 6178 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 6179 { 6180 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6181 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6182 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 6183 6184 PetscFunctionBegin; 6185 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 6186 if (!pcbddc->switch_static) { 6187 if (applytranspose && pcbddc->local_auxmat1) { 6188 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 6189 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6190 } 6191 if (!reuse_solver) { 6192 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6193 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6194 } else { 6195 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6196 6197 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6198 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6199 } 6200 } else { 6201 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6202 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6203 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6204 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6205 if (applytranspose && pcbddc->local_auxmat1) { 6206 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 6207 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6208 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6209 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6210 } 6211 } 6212 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6213 if (!reuse_solver || pcbddc->switch_static) { 6214 if (applytranspose) { 6215 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6216 } else { 6217 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6218 } 6219 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 6220 } else { 6221 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6222 6223 if (applytranspose) { 6224 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6225 } else { 6226 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6227 } 6228 } 6229 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6230 PetscCall(VecSet(inout_B, 0.)); 6231 if (!pcbddc->switch_static) { 6232 if (!reuse_solver) { 6233 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6234 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6235 } else { 6236 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6237 6238 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6239 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6240 } 6241 if (!applytranspose && pcbddc->local_auxmat1) { 6242 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6243 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 6244 } 6245 } else { 6246 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6247 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6248 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6249 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6250 if (!applytranspose && pcbddc->local_auxmat1) { 6251 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6252 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 6253 } 6254 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6255 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6256 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6257 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6258 } 6259 PetscFunctionReturn(PETSC_SUCCESS); 6260 } 6261 6262 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 6263 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 6264 { 6265 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6266 PC_IS *pcis = (PC_IS *)pc->data; 6267 const PetscScalar zero = 0.0; 6268 6269 PetscFunctionBegin; 6270 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 6271 if (!pcbddc->benign_apply_coarse_only) { 6272 if (applytranspose) { 6273 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 6274 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6275 } else { 6276 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 6277 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6278 } 6279 } else { 6280 PetscCall(VecSet(pcbddc->vec1_P, zero)); 6281 } 6282 6283 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 6284 if (pcbddc->benign_n) { 6285 PetscScalar *array; 6286 PetscInt j; 6287 6288 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6289 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 6290 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6291 } 6292 6293 /* start communications from local primal nodes to rhs of coarse solver */ 6294 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 6295 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 6296 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 6297 6298 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 6299 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6300 if (pcbddc->coarse_ksp) { 6301 Mat coarse_mat; 6302 Vec rhs, sol; 6303 MatNullSpace nullsp; 6304 PetscBool isbddc = PETSC_FALSE; 6305 6306 if (pcbddc->benign_have_null) { 6307 PC coarse_pc; 6308 6309 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6310 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 6311 /* we need to propagate to coarser levels the need for a possible benign correction */ 6312 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 6313 PC_BDDC *coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6314 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 6315 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 6316 } 6317 } 6318 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 6319 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 6320 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 6321 if (applytranspose) { 6322 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 6323 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 6324 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6325 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 6326 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6327 } else { 6328 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 6329 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 6330 PC coarse_pc; 6331 6332 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 6333 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6334 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 6335 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 6336 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 6337 } else { 6338 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 6339 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6340 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6341 } 6342 } 6343 /* we don't need the benign correction at coarser levels anymore */ 6344 if (pcbddc->benign_have_null && isbddc) { 6345 PC coarse_pc; 6346 PC_BDDC *coarsepcbddc; 6347 6348 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6349 coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6350 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 6351 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 6352 } 6353 } 6354 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6355 6356 /* Local solution on R nodes */ 6357 if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 6358 /* communications from coarse sol to local primal nodes */ 6359 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 6360 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 6361 6362 /* Sum contributions from the two levels */ 6363 if (!pcbddc->benign_apply_coarse_only) { 6364 if (applytranspose) { 6365 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6366 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6367 } else { 6368 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6369 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6370 } 6371 /* store p0 */ 6372 if (pcbddc->benign_n) { 6373 PetscScalar *array; 6374 PetscInt j; 6375 6376 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6377 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 6378 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6379 } 6380 } else { /* expand the coarse solution */ 6381 if (applytranspose) { 6382 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 6383 } else { 6384 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 6385 } 6386 } 6387 PetscFunctionReturn(PETSC_SUCCESS); 6388 } 6389 6390 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 6391 { 6392 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6393 Vec from, to; 6394 const PetscScalar *array; 6395 6396 PetscFunctionBegin; 6397 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6398 from = pcbddc->coarse_vec; 6399 to = pcbddc->vec1_P; 6400 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6401 Vec tvec; 6402 6403 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6404 PetscCall(VecResetArray(tvec)); 6405 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 6406 PetscCall(VecGetArrayRead(tvec, &array)); 6407 PetscCall(VecPlaceArray(from, array)); 6408 PetscCall(VecRestoreArrayRead(tvec, &array)); 6409 } 6410 } else { /* from local to global -> put data in coarse right-hand side */ 6411 from = pcbddc->vec1_P; 6412 to = pcbddc->coarse_vec; 6413 } 6414 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6415 PetscFunctionReturn(PETSC_SUCCESS); 6416 } 6417 6418 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6419 { 6420 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6421 Vec from, to; 6422 const PetscScalar *array; 6423 6424 PetscFunctionBegin; 6425 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6426 from = pcbddc->coarse_vec; 6427 to = pcbddc->vec1_P; 6428 } else { /* from local to global -> put data in coarse right-hand side */ 6429 from = pcbddc->vec1_P; 6430 to = pcbddc->coarse_vec; 6431 } 6432 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6433 if (smode == SCATTER_FORWARD) { 6434 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6435 Vec tvec; 6436 6437 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6438 PetscCall(VecGetArrayRead(to, &array)); 6439 PetscCall(VecPlaceArray(tvec, array)); 6440 PetscCall(VecRestoreArrayRead(to, &array)); 6441 } 6442 } else { 6443 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6444 PetscCall(VecResetArray(from)); 6445 } 6446 } 6447 PetscFunctionReturn(PETSC_SUCCESS); 6448 } 6449 6450 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6451 { 6452 PC_IS *pcis = (PC_IS *)pc->data; 6453 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6454 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6455 /* one and zero */ 6456 PetscScalar one = 1.0, zero = 0.0; 6457 /* space to store constraints and their local indices */ 6458 PetscScalar *constraints_data; 6459 PetscInt *constraints_idxs, *constraints_idxs_B; 6460 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 6461 PetscInt *constraints_n; 6462 /* iterators */ 6463 PetscInt i, j, k, total_counts, total_counts_cc, cum; 6464 /* BLAS integers */ 6465 PetscBLASInt lwork, lierr; 6466 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 6467 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 6468 /* reuse */ 6469 PetscInt olocal_primal_size, olocal_primal_size_cc; 6470 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 6471 /* change of basis */ 6472 PetscBool qr_needed; 6473 PetscBT change_basis, qr_needed_idx; 6474 /* auxiliary stuff */ 6475 PetscInt *nnz, *is_indices; 6476 PetscInt ncc; 6477 /* some quantities */ 6478 PetscInt n_vertices, total_primal_vertices, valid_constraints; 6479 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 6480 PetscReal tol; /* tolerance for retaining eigenmodes */ 6481 6482 PetscFunctionBegin; 6483 tol = PetscSqrtReal(PETSC_SMALL); 6484 /* Destroy Mat objects computed previously */ 6485 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6486 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6487 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6488 /* save info on constraints from previous setup (if any) */ 6489 olocal_primal_size = pcbddc->local_primal_size; 6490 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6491 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 6492 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 6493 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 6494 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 6495 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6496 6497 if (!pcbddc->adaptive_selection) { 6498 IS ISForVertices, *ISForFaces, *ISForEdges; 6499 MatNullSpace nearnullsp; 6500 const Vec *nearnullvecs; 6501 Vec *localnearnullsp; 6502 PetscScalar *array; 6503 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 6504 PetscBool nnsp_has_cnst; 6505 /* LAPACK working arrays for SVD or POD */ 6506 PetscBool skip_lapack, boolforchange; 6507 PetscScalar *work; 6508 PetscReal *singular_vals; 6509 #if defined(PETSC_USE_COMPLEX) 6510 PetscReal *rwork; 6511 #endif 6512 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 6513 PetscBLASInt dummy_int = 1; 6514 PetscScalar dummy_scalar = 1.; 6515 PetscBool use_pod = PETSC_FALSE; 6516 6517 /* MKL SVD with same input gives different results on different processes! */ 6518 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6519 use_pod = PETSC_TRUE; 6520 #endif 6521 /* Get index sets for faces, edges and vertices from graph */ 6522 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 6523 o_nf = n_ISForFaces; 6524 o_ne = n_ISForEdges; 6525 n_vertices = 0; 6526 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 6527 /* print some info */ 6528 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6529 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6530 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 6531 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6532 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6533 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 6534 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 6535 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 6536 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6537 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6538 } 6539 6540 if (!pcbddc->use_vertices) n_vertices = 0; 6541 if (!pcbddc->use_edges) n_ISForEdges = 0; 6542 if (!pcbddc->use_faces) n_ISForFaces = 0; 6543 6544 /* check if near null space is attached to global mat */ 6545 if (pcbddc->use_nnsp) PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 6546 else nearnullsp = NULL; 6547 6548 if (nearnullsp) { 6549 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 6550 /* remove any stored info */ 6551 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6552 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6553 /* store information for BDDC solver reuse */ 6554 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6555 pcbddc->onearnullspace = nearnullsp; 6556 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 6557 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 6558 } else { /* if near null space is not provided BDDC uses constants by default */ 6559 nnsp_size = 0; 6560 nnsp_has_cnst = PETSC_TRUE; 6561 } 6562 /* get max number of constraints on a single cc */ 6563 max_constraints = nnsp_size; 6564 if (nnsp_has_cnst) max_constraints++; 6565 6566 /* 6567 Evaluate maximum storage size needed by the procedure 6568 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6569 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6570 There can be multiple constraints per connected component 6571 */ 6572 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 6573 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 6574 6575 total_counts = n_ISForFaces + n_ISForEdges; 6576 total_counts *= max_constraints; 6577 total_counts += n_vertices; 6578 PetscCall(PetscBTCreate(total_counts, &change_basis)); 6579 6580 total_counts = 0; 6581 max_size_of_constraint = 0; 6582 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 6583 IS used_is; 6584 if (i < n_ISForEdges) { 6585 used_is = ISForEdges[i]; 6586 } else { 6587 used_is = ISForFaces[i - n_ISForEdges]; 6588 } 6589 PetscCall(ISGetSize(used_is, &j)); 6590 total_counts += j; 6591 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 6592 } 6593 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 6594 6595 /* get local part of global near null space vectors */ 6596 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 6597 for (k = 0; k < nnsp_size; k++) { 6598 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 6599 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6600 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6601 } 6602 6603 /* whether or not to skip lapack calls */ 6604 skip_lapack = PETSC_TRUE; 6605 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6606 6607 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6608 if (!skip_lapack) { 6609 PetscScalar temp_work; 6610 6611 if (use_pod) { 6612 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6613 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 6614 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 6615 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 6616 #if defined(PETSC_USE_COMPLEX) 6617 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 6618 #endif 6619 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6620 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6621 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 6622 lwork = -1; 6623 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6624 #if !defined(PETSC_USE_COMPLEX) 6625 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 6626 #else 6627 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 6628 #endif 6629 PetscCall(PetscFPTrapPop()); 6630 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr); 6631 } else { 6632 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6633 /* SVD */ 6634 PetscInt max_n, min_n; 6635 max_n = max_size_of_constraint; 6636 min_n = max_constraints; 6637 if (max_size_of_constraint < max_constraints) { 6638 min_n = max_size_of_constraint; 6639 max_n = max_constraints; 6640 } 6641 PetscCall(PetscMalloc1(min_n, &singular_vals)); 6642 #if defined(PETSC_USE_COMPLEX) 6643 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 6644 #endif 6645 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6646 lwork = -1; 6647 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 6648 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 6649 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 6650 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6651 #if !defined(PETSC_USE_COMPLEX) 6652 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6653 #else 6654 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6655 #endif 6656 PetscCall(PetscFPTrapPop()); 6657 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 6658 #else 6659 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6660 #endif /* on missing GESVD */ 6661 } 6662 /* Allocate optimal workspace */ 6663 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6664 PetscCall(PetscMalloc1(lwork, &work)); 6665 } 6666 /* Now we can loop on constraining sets */ 6667 total_counts = 0; 6668 constraints_idxs_ptr[0] = 0; 6669 constraints_data_ptr[0] = 0; 6670 /* vertices */ 6671 if (n_vertices) { 6672 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6673 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6674 for (i = 0; i < n_vertices; i++) { 6675 constraints_n[total_counts] = 1; 6676 constraints_data[total_counts] = 1.0; 6677 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6678 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6679 total_counts++; 6680 } 6681 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6682 } 6683 6684 /* edges and faces */ 6685 total_counts_cc = total_counts; 6686 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6687 IS used_is; 6688 PetscBool idxs_copied = PETSC_FALSE; 6689 6690 if (ncc < n_ISForEdges) { 6691 used_is = ISForEdges[ncc]; 6692 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6693 } else { 6694 used_is = ISForFaces[ncc - n_ISForEdges]; 6695 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6696 } 6697 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6698 6699 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6700 if (!size_of_constraint) continue; 6701 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6702 if (nnsp_has_cnst) { 6703 PetscScalar quad_value; 6704 6705 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6706 idxs_copied = PETSC_TRUE; 6707 6708 if (!pcbddc->use_nnsp_true) { 6709 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6710 } else { 6711 quad_value = 1.0; 6712 } 6713 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6714 temp_constraints++; 6715 total_counts++; 6716 } 6717 for (k = 0; k < nnsp_size; k++) { 6718 PetscReal real_value; 6719 PetscScalar *ptr_to_data; 6720 6721 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6722 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6723 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6724 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6725 /* check if array is null on the connected component */ 6726 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6727 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6728 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6729 temp_constraints++; 6730 total_counts++; 6731 if (!idxs_copied) { 6732 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6733 idxs_copied = PETSC_TRUE; 6734 } 6735 } 6736 } 6737 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6738 valid_constraints = temp_constraints; 6739 if (!pcbddc->use_nnsp_true && temp_constraints) { 6740 if (temp_constraints == 1) { /* just normalize the constraint */ 6741 PetscScalar norm, *ptr_to_data; 6742 6743 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6744 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6745 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6746 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6747 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6748 } else { /* perform SVD */ 6749 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6750 6751 if (use_pod) { 6752 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6753 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6754 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6755 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6756 from that computed using LAPACKgesvd 6757 -> This is due to a different computation of eigenvectors in LAPACKheev 6758 -> The quality of the POD-computed basis will be the same */ 6759 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6760 /* Store upper triangular part of correlation matrix */ 6761 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6762 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6763 for (j = 0; j < temp_constraints; j++) { 6764 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6765 } 6766 /* compute eigenvalues and eigenvectors of correlation matrix */ 6767 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6768 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6769 #if !defined(PETSC_USE_COMPLEX) 6770 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6771 #else 6772 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6773 #endif 6774 PetscCall(PetscFPTrapPop()); 6775 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr); 6776 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6777 j = 0; 6778 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6779 total_counts = total_counts - j; 6780 valid_constraints = temp_constraints - j; 6781 /* scale and copy POD basis into used quadrature memory */ 6782 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6783 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6784 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6785 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6786 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6787 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6788 if (j < temp_constraints) { 6789 PetscInt ii; 6790 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6791 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6792 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6793 PetscCall(PetscFPTrapPop()); 6794 for (k = 0; k < temp_constraints - j; k++) { 6795 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6796 } 6797 } 6798 } else { 6799 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6800 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6801 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6802 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6803 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6804 #if !defined(PETSC_USE_COMPLEX) 6805 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6806 #else 6807 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6808 #endif 6809 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 6810 PetscCall(PetscFPTrapPop()); 6811 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6812 k = temp_constraints; 6813 if (k > size_of_constraint) k = size_of_constraint; 6814 j = 0; 6815 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6816 valid_constraints = k - j; 6817 total_counts = total_counts - temp_constraints + valid_constraints; 6818 #else 6819 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6820 #endif /* on missing GESVD */ 6821 } 6822 } 6823 } 6824 /* update pointers information */ 6825 if (valid_constraints) { 6826 constraints_n[total_counts_cc] = valid_constraints; 6827 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6828 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6829 /* set change_of_basis flag */ 6830 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6831 total_counts_cc++; 6832 } 6833 } 6834 /* free workspace */ 6835 if (!skip_lapack) { 6836 PetscCall(PetscFree(work)); 6837 #if defined(PETSC_USE_COMPLEX) 6838 PetscCall(PetscFree(rwork)); 6839 #endif 6840 PetscCall(PetscFree(singular_vals)); 6841 PetscCall(PetscFree(correlation_mat)); 6842 PetscCall(PetscFree(temp_basis)); 6843 } 6844 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6845 PetscCall(PetscFree(localnearnullsp)); 6846 /* free index sets of faces, edges and vertices */ 6847 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6848 } else { 6849 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6850 6851 total_counts = 0; 6852 n_vertices = 0; 6853 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6854 max_constraints = 0; 6855 total_counts_cc = 0; 6856 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6857 total_counts += pcbddc->adaptive_constraints_n[i]; 6858 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6859 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6860 } 6861 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6862 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6863 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6864 constraints_data = pcbddc->adaptive_constraints_data; 6865 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6866 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6867 total_counts_cc = 0; 6868 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6869 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6870 } 6871 6872 max_size_of_constraint = 0; 6873 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6874 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6875 /* Change of basis */ 6876 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6877 if (pcbddc->use_change_of_basis) { 6878 for (i = 0; i < sub_schurs->n_subs; i++) { 6879 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6880 } 6881 } 6882 } 6883 pcbddc->local_primal_size = total_counts; 6884 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6885 6886 /* map constraints_idxs in boundary numbering */ 6887 if (pcbddc->use_change_of_basis) { 6888 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6889 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6890 } 6891 6892 /* Create constraint matrix */ 6893 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6894 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6895 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6896 6897 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6898 /* determine if a QR strategy is needed for change of basis */ 6899 qr_needed = pcbddc->use_qr_single; 6900 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6901 total_primal_vertices = 0; 6902 pcbddc->local_primal_size_cc = 0; 6903 for (i = 0; i < total_counts_cc; i++) { 6904 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6905 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6906 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6907 pcbddc->local_primal_size_cc += 1; 6908 } else if (PetscBTLookup(change_basis, i)) { 6909 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6910 pcbddc->local_primal_size_cc += constraints_n[i]; 6911 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6912 PetscCall(PetscBTSet(qr_needed_idx, i)); 6913 qr_needed = PETSC_TRUE; 6914 } 6915 } else { 6916 pcbddc->local_primal_size_cc += 1; 6917 } 6918 } 6919 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6920 pcbddc->n_vertices = total_primal_vertices; 6921 /* permute indices in order to have a sorted set of vertices */ 6922 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 6923 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 6924 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 6925 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 6926 6927 /* nonzero structure of constraint matrix */ 6928 /* and get reference dof for local constraints */ 6929 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 6930 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 6931 6932 j = total_primal_vertices; 6933 total_counts = total_primal_vertices; 6934 cum = total_primal_vertices; 6935 for (i = n_vertices; i < total_counts_cc; i++) { 6936 if (!PetscBTLookup(change_basis, i)) { 6937 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6938 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6939 cum++; 6940 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6941 for (k = 0; k < constraints_n[i]; k++) { 6942 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6943 nnz[j + k] = size_of_constraint; 6944 } 6945 j += constraints_n[i]; 6946 } 6947 } 6948 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 6949 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6950 PetscCall(PetscFree(nnz)); 6951 6952 /* set values in constraint matrix */ 6953 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 6954 total_counts = total_primal_vertices; 6955 for (i = n_vertices; i < total_counts_cc; i++) { 6956 if (!PetscBTLookup(change_basis, i)) { 6957 PetscInt *cols; 6958 6959 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6960 cols = constraints_idxs + constraints_idxs_ptr[i]; 6961 for (k = 0; k < constraints_n[i]; k++) { 6962 PetscInt row = total_counts + k; 6963 PetscScalar *vals; 6964 6965 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 6966 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 6967 } 6968 total_counts += constraints_n[i]; 6969 } 6970 } 6971 /* assembling */ 6972 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6973 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6974 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 6975 6976 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6977 if (pcbddc->use_change_of_basis) { 6978 /* dual and primal dofs on a single cc */ 6979 PetscInt dual_dofs, primal_dofs; 6980 /* working stuff for GEQRF */ 6981 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 6982 PetscBLASInt lqr_work; 6983 /* working stuff for UNGQR */ 6984 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 6985 PetscBLASInt lgqr_work; 6986 /* working stuff for TRTRS */ 6987 PetscScalar *trs_rhs = NULL; 6988 PetscBLASInt Blas_NRHS; 6989 /* pointers for values insertion into change of basis matrix */ 6990 PetscInt *start_rows, *start_cols; 6991 PetscScalar *start_vals; 6992 /* working stuff for values insertion */ 6993 PetscBT is_primal; 6994 PetscInt *aux_primal_numbering_B; 6995 /* matrix sizes */ 6996 PetscInt global_size, local_size; 6997 /* temporary change of basis */ 6998 Mat localChangeOfBasisMatrix; 6999 /* extra space for debugging */ 7000 PetscScalar *dbg_work = NULL; 7001 7002 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 7003 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 7004 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 7005 /* nonzeros for local mat */ 7006 PetscCall(PetscMalloc1(pcis->n, &nnz)); 7007 if (!pcbddc->benign_change || pcbddc->fake_change) { 7008 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 7009 } else { 7010 const PetscInt *ii; 7011 PetscInt n; 7012 PetscBool flg_row; 7013 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 7014 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 7015 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 7016 } 7017 for (i = n_vertices; i < total_counts_cc; i++) { 7018 if (PetscBTLookup(change_basis, i)) { 7019 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 7020 if (PetscBTLookup(qr_needed_idx, i)) { 7021 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 7022 } else { 7023 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 7024 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 7025 } 7026 } 7027 } 7028 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 7029 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 7030 PetscCall(PetscFree(nnz)); 7031 /* Set interior change in the matrix */ 7032 if (!pcbddc->benign_change || pcbddc->fake_change) { 7033 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 7034 } else { 7035 const PetscInt *ii, *jj; 7036 PetscScalar *aa; 7037 PetscInt n; 7038 PetscBool flg_row; 7039 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 7040 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 7041 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 7042 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 7043 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 7044 } 7045 7046 if (pcbddc->dbg_flag) { 7047 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 7048 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 7049 } 7050 7051 /* Now we loop on the constraints which need a change of basis */ 7052 /* 7053 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 7054 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 7055 7056 Basic blocks of change of basis matrix T computed: 7057 7058 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 7059 7060 | 1 0 ... 0 s_1/S | 7061 | 0 1 ... 0 s_2/S | 7062 | ... | 7063 | 0 ... 1 s_{n-1}/S | 7064 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 7065 7066 with S = \sum_{i=1}^n s_i^2 7067 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 7068 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 7069 7070 - QR decomposition of constraints otherwise 7071 */ 7072 if (qr_needed && max_size_of_constraint) { 7073 /* space to store Q */ 7074 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 7075 /* array to store scaling factors for reflectors */ 7076 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 7077 /* first we issue queries for optimal work */ 7078 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7079 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 7080 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7081 lqr_work = -1; 7082 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 7083 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr); 7084 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 7085 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 7086 lgqr_work = -1; 7087 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7088 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 7089 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 7090 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7091 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 7092 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 7093 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr); 7094 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 7095 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 7096 /* array to store rhs and solution of triangular solver */ 7097 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 7098 /* allocating workspace for check */ 7099 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 7100 } 7101 /* array to store whether a node is primal or not */ 7102 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 7103 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 7104 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 7105 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 7106 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 7107 PetscCall(PetscFree(aux_primal_numbering_B)); 7108 7109 /* loop on constraints and see whether or not they need a change of basis and compute it */ 7110 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 7111 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 7112 if (PetscBTLookup(change_basis, total_counts)) { 7113 /* get constraint info */ 7114 primal_dofs = constraints_n[total_counts]; 7115 dual_dofs = size_of_constraint - primal_dofs; 7116 7117 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 7118 7119 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 7120 7121 /* copy quadrature constraints for change of basis check */ 7122 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7123 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 7124 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7125 7126 /* compute QR decomposition of constraints */ 7127 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7128 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7129 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7130 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7131 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 7132 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr); 7133 PetscCall(PetscFPTrapPop()); 7134 7135 /* explicitly compute R^-T */ 7136 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 7137 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 7138 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7139 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 7140 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7141 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7142 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7143 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 7144 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr); 7145 PetscCall(PetscFPTrapPop()); 7146 7147 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 7148 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7149 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7150 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7151 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7152 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7153 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 7154 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr); 7155 PetscCall(PetscFPTrapPop()); 7156 7157 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 7158 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 7159 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 7160 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7161 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7162 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7163 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7164 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7165 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 7166 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7167 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 7168 PetscCall(PetscFPTrapPop()); 7169 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7170 7171 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 7172 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 7173 /* insert cols for primal dofs */ 7174 for (j = 0; j < primal_dofs; j++) { 7175 start_vals = &qr_basis[j * size_of_constraint]; 7176 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7177 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7178 } 7179 /* insert cols for dual dofs */ 7180 for (j = 0, k = 0; j < dual_dofs; k++) { 7181 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 7182 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 7183 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7184 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7185 j++; 7186 } 7187 } 7188 7189 /* check change of basis */ 7190 if (pcbddc->dbg_flag) { 7191 PetscInt ii, jj; 7192 PetscBool valid_qr = PETSC_TRUE; 7193 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 7194 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7195 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 7196 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7197 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 7198 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 7199 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7200 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 7201 PetscCall(PetscFPTrapPop()); 7202 for (jj = 0; jj < size_of_constraint; jj++) { 7203 for (ii = 0; ii < primal_dofs; ii++) { 7204 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 7205 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 7206 } 7207 } 7208 if (!valid_qr) { 7209 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 7210 for (jj = 0; jj < size_of_constraint; jj++) { 7211 for (ii = 0; ii < primal_dofs; ii++) { 7212 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 7213 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7214 } 7215 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 7216 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7217 } 7218 } 7219 } 7220 } else { 7221 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 7222 } 7223 } 7224 } else { /* simple transformation block */ 7225 PetscInt row, col; 7226 PetscScalar val, norm; 7227 7228 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7229 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 7230 for (j = 0; j < size_of_constraint; j++) { 7231 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 7232 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7233 if (!PetscBTLookup(is_primal, row_B)) { 7234 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 7235 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 7236 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 7237 } else { 7238 for (k = 0; k < size_of_constraint; k++) { 7239 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7240 if (row != col) { 7241 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 7242 } else { 7243 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 7244 } 7245 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 7246 } 7247 } 7248 } 7249 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 7250 } 7251 } else { 7252 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 7253 } 7254 } 7255 7256 /* free workspace */ 7257 if (qr_needed) { 7258 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 7259 PetscCall(PetscFree(trs_rhs)); 7260 PetscCall(PetscFree(qr_tau)); 7261 PetscCall(PetscFree(qr_work)); 7262 PetscCall(PetscFree(gqr_work)); 7263 PetscCall(PetscFree(qr_basis)); 7264 } 7265 PetscCall(PetscBTDestroy(&is_primal)); 7266 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7267 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7268 7269 /* assembling of global change of variable */ 7270 if (!pcbddc->fake_change) { 7271 Mat tmat; 7272 7273 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 7274 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 7275 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 7276 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 7277 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 7278 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 7279 PetscCall(MatConvert(tmat, MATAIJ, MAT_INITIAL_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 7280 PetscCall(MatDestroy(&tmat)); 7281 PetscCall(VecSet(pcis->vec1_global, 0.0)); 7282 PetscCall(VecSet(pcis->vec1_N, 1.0)); 7283 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7284 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7285 PetscCall(VecReciprocal(pcis->vec1_global)); 7286 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 7287 7288 /* check */ 7289 if (pcbddc->dbg_flag) { 7290 PetscReal error; 7291 Vec x, x_change; 7292 7293 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 7294 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 7295 PetscCall(VecSetRandom(x, NULL)); 7296 PetscCall(VecCopy(x, pcis->vec1_global)); 7297 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7298 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7299 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 7300 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7301 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7302 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 7303 PetscCall(VecAXPY(x, -1.0, x_change)); 7304 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 7305 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 7306 PetscCall(VecDestroy(&x)); 7307 PetscCall(VecDestroy(&x_change)); 7308 } 7309 /* adapt sub_schurs computed (if any) */ 7310 if (pcbddc->use_deluxe_scaling) { 7311 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 7312 7313 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 7314 if (sub_schurs && sub_schurs->S_Ej_all) { 7315 Mat S_new, tmat; 7316 IS is_all_N, is_V_Sall = NULL; 7317 7318 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 7319 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 7320 if (pcbddc->deluxe_zerorows) { 7321 ISLocalToGlobalMapping NtoSall; 7322 IS is_V; 7323 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 7324 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 7325 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 7326 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 7327 PetscCall(ISDestroy(&is_V)); 7328 } 7329 PetscCall(ISDestroy(&is_all_N)); 7330 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7331 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 7332 PetscCall(PetscObjectReference((PetscObject)S_new)); 7333 if (pcbddc->deluxe_zerorows) { 7334 const PetscScalar *array; 7335 const PetscInt *idxs_V, *idxs_all; 7336 PetscInt i, n_V; 7337 7338 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7339 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 7340 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 7341 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 7342 PetscCall(VecGetArrayRead(pcis->D, &array)); 7343 for (i = 0; i < n_V; i++) { 7344 PetscScalar val; 7345 PetscInt idx; 7346 7347 idx = idxs_V[i]; 7348 val = array[idxs_all[idxs_V[i]]]; 7349 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 7350 } 7351 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 7352 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 7353 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 7354 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 7355 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 7356 } 7357 sub_schurs->S_Ej_all = S_new; 7358 PetscCall(MatDestroy(&S_new)); 7359 if (sub_schurs->sum_S_Ej_all) { 7360 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7361 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7362 PetscCall(PetscObjectReference((PetscObject)S_new)); 7363 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7364 sub_schurs->sum_S_Ej_all = S_new; 7365 PetscCall(MatDestroy(&S_new)); 7366 } 7367 PetscCall(ISDestroy(&is_V_Sall)); 7368 PetscCall(MatDestroy(&tmat)); 7369 } 7370 /* destroy any change of basis context in sub_schurs */ 7371 if (sub_schurs && sub_schurs->change) { 7372 PetscInt i; 7373 7374 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 7375 PetscCall(PetscFree(sub_schurs->change)); 7376 } 7377 } 7378 if (pcbddc->switch_static) { /* need to save the local change */ 7379 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7380 } else { 7381 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7382 } 7383 /* determine if any process has changed the pressures locally */ 7384 pcbddc->change_interior = pcbddc->benign_have_null; 7385 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7386 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7387 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7388 pcbddc->use_qr_single = qr_needed; 7389 } 7390 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7391 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7392 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7393 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7394 } else { 7395 Mat benign_global = NULL; 7396 if (pcbddc->benign_have_null) { 7397 Mat M; 7398 7399 pcbddc->change_interior = PETSC_TRUE; 7400 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 7401 PetscCall(VecReciprocal(pcis->vec1_N)); 7402 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 7403 if (pcbddc->benign_change) { 7404 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 7405 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 7406 } else { 7407 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 7408 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 7409 } 7410 PetscCall(MatISSetLocalMat(benign_global, M)); 7411 PetscCall(MatDestroy(&M)); 7412 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 7413 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 7414 } 7415 if (pcbddc->user_ChangeOfBasisMatrix) { 7416 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix)); 7417 PetscCall(MatDestroy(&benign_global)); 7418 } else if (pcbddc->benign_have_null) { 7419 pcbddc->ChangeOfBasisMatrix = benign_global; 7420 } 7421 } 7422 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7423 IS is_global; 7424 const PetscInt *gidxs; 7425 7426 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 7427 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 7428 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 7429 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 7430 PetscCall(ISDestroy(&is_global)); 7431 } 7432 } 7433 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 7434 7435 if (!pcbddc->fake_change) { 7436 /* add pressure dofs to set of primal nodes for numbering purposes */ 7437 for (i = 0; i < pcbddc->benign_n; i++) { 7438 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7439 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7440 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7441 pcbddc->local_primal_size_cc++; 7442 pcbddc->local_primal_size++; 7443 } 7444 7445 /* check if a new primal space has been introduced (also take into account benign trick) */ 7446 pcbddc->new_primal_space_local = PETSC_TRUE; 7447 if (olocal_primal_size == pcbddc->local_primal_size) { 7448 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7449 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7450 if (!pcbddc->new_primal_space_local) { 7451 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7452 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7453 } 7454 } 7455 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7456 PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7457 } 7458 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 7459 7460 /* flush dbg viewer */ 7461 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7462 7463 /* free workspace */ 7464 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7465 PetscCall(PetscBTDestroy(&change_basis)); 7466 if (!pcbddc->adaptive_selection) { 7467 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 7468 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 7469 } else { 7470 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 7471 PetscCall(PetscFree(constraints_n)); 7472 PetscCall(PetscFree(constraints_idxs_B)); 7473 } 7474 PetscFunctionReturn(PETSC_SUCCESS); 7475 } 7476 7477 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7478 { 7479 ISLocalToGlobalMapping map; 7480 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7481 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 7482 PetscInt i, N; 7483 PetscBool rcsr = PETSC_FALSE; 7484 7485 PetscFunctionBegin; 7486 if (pcbddc->recompute_topography) { 7487 pcbddc->graphanalyzed = PETSC_FALSE; 7488 /* Reset previously computed graph */ 7489 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7490 /* Init local Graph struct */ 7491 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 7492 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 7493 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 7494 7495 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 7496 /* Check validity of the csr graph passed in by the user */ 7497 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 7498 pcbddc->mat_graph->nvtxs); 7499 7500 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7501 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7502 PetscInt *xadj, *adjncy; 7503 PetscInt nvtxs; 7504 PetscBool flg_row; 7505 Mat A; 7506 7507 PetscCall(PetscObjectReference((PetscObject)matis->A)); 7508 A = matis->A; 7509 for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) { 7510 Mat AtA; 7511 7512 PetscCall(MatProductCreate(A, A, NULL, &AtA)); 7513 PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_")); 7514 PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB)); 7515 PetscCall(MatProductSetFromOptions(AtA)); 7516 PetscCall(MatProductSymbolic(AtA)); 7517 PetscCall(MatProductClear(AtA)); 7518 /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */ 7519 AtA->assembled = PETSC_TRUE; 7520 PetscCall(MatDestroy(&A)); 7521 A = AtA; 7522 } 7523 PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7524 if (flg_row) { 7525 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 7526 pcbddc->computed_rowadj = PETSC_TRUE; 7527 PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7528 rcsr = PETSC_TRUE; 7529 } 7530 PetscCall(MatDestroy(&A)); 7531 } 7532 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7533 7534 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7535 PetscReal *lcoords; 7536 PetscInt n; 7537 MPI_Datatype dimrealtype; 7538 PetscMPIInt cdimi; 7539 7540 /* TODO: support for blocked */ 7541 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 7542 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7543 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 7544 PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi)); 7545 PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype)); 7546 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7547 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7548 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7549 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7550 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7551 7552 pcbddc->mat_graph->coords = lcoords; 7553 pcbddc->mat_graph->cloc = PETSC_TRUE; 7554 pcbddc->mat_graph->cnloc = n; 7555 } 7556 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 7557 pcbddc->mat_graph->nvtxs); 7558 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7559 7560 /* attach info on disconnected subdomains if present */ 7561 if (pcbddc->n_local_subs) { 7562 PetscInt *local_subs, n, totn; 7563 7564 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7565 PetscCall(PetscMalloc1(n, &local_subs)); 7566 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 7567 for (i = 0; i < pcbddc->n_local_subs; i++) { 7568 const PetscInt *idxs; 7569 PetscInt nl, j; 7570 7571 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 7572 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 7573 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 7574 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 7575 } 7576 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 7577 pcbddc->mat_graph->n_local_subs = totn + 1; 7578 pcbddc->mat_graph->local_subs = local_subs; 7579 } 7580 7581 /* Setup of Graph */ 7582 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 7583 } 7584 7585 if (!pcbddc->graphanalyzed) { 7586 /* Graph's connected components analysis */ 7587 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7588 pcbddc->graphanalyzed = PETSC_TRUE; 7589 pcbddc->corner_selected = pcbddc->corner_selection; 7590 } 7591 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7592 PetscFunctionReturn(PETSC_SUCCESS); 7593 } 7594 7595 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7596 { 7597 PetscInt i, j, n; 7598 PetscScalar *alphas; 7599 PetscReal norm, *onorms; 7600 7601 PetscFunctionBegin; 7602 n = *nio; 7603 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 7604 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 7605 PetscCall(VecNormalize(vecs[0], &norm)); 7606 if (norm < PETSC_SMALL) { 7607 onorms[0] = 0.0; 7608 PetscCall(VecSet(vecs[0], 0.0)); 7609 } else { 7610 onorms[0] = norm; 7611 } 7612 7613 for (i = 1; i < n; i++) { 7614 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 7615 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 7616 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 7617 PetscCall(VecNormalize(vecs[i], &norm)); 7618 if (norm < PETSC_SMALL) { 7619 onorms[i] = 0.0; 7620 PetscCall(VecSet(vecs[i], 0.0)); 7621 } else { 7622 onorms[i] = norm; 7623 } 7624 } 7625 /* push nonzero vectors at the beginning */ 7626 for (i = 0; i < n; i++) { 7627 if (onorms[i] == 0.0) { 7628 for (j = i + 1; j < n; j++) { 7629 if (onorms[j] != 0.0) { 7630 PetscCall(VecCopy(vecs[j], vecs[i])); 7631 onorms[i] = onorms[j]; 7632 onorms[j] = 0.0; 7633 break; 7634 } 7635 } 7636 } 7637 } 7638 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7639 PetscCall(PetscFree2(alphas, onorms)); 7640 PetscFunctionReturn(PETSC_SUCCESS); 7641 } 7642 7643 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 7644 { 7645 ISLocalToGlobalMapping mapping; 7646 Mat A; 7647 PetscInt n_neighs, *neighs, *n_shared, **shared; 7648 PetscMPIInt size, rank, color; 7649 PetscInt *xadj, *adjncy; 7650 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 7651 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 7652 PetscInt void_procs, *procs_candidates = NULL; 7653 PetscInt xadj_count, *count; 7654 PetscBool ismatis, use_vwgt = PETSC_FALSE; 7655 PetscSubcomm psubcomm; 7656 MPI_Comm subcomm; 7657 7658 PetscFunctionBegin; 7659 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7660 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7661 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7662 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 7663 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 7664 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7665 7666 if (have_void) *have_void = PETSC_FALSE; 7667 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7668 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7669 PetscCall(MatISGetLocalMat(mat, &A)); 7670 PetscCall(MatGetLocalSize(A, &n, NULL)); 7671 im_active = !!n; 7672 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7673 void_procs = size - active_procs; 7674 /* get ranks of non-active processes in mat communicator */ 7675 if (void_procs) { 7676 PetscInt ncand; 7677 7678 if (have_void) *have_void = PETSC_TRUE; 7679 PetscCall(PetscMalloc1(size, &procs_candidates)); 7680 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7681 for (i = 0, ncand = 0; i < size; i++) { 7682 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7683 } 7684 /* force n_subdomains to be not greater that the number of non-active processes */ 7685 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7686 } 7687 7688 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7689 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7690 PetscCall(MatGetSize(mat, &N, NULL)); 7691 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7692 PetscInt issize, isidx, dest; 7693 PetscBool default_sub; 7694 7695 if (*n_subdomains == 1) dest = 0; 7696 else dest = rank; 7697 if (im_active) { 7698 issize = 1; 7699 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7700 isidx = procs_candidates[dest]; 7701 } else { 7702 isidx = dest; 7703 } 7704 } else { 7705 issize = 0; 7706 isidx = rank; 7707 } 7708 if (*n_subdomains != 1) *n_subdomains = active_procs; 7709 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7710 default_sub = (PetscBool)(isidx == rank); 7711 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &default_sub, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 7712 if (default_sub) PetscCall(PetscObjectSetName((PetscObject)*is_sends, "default subassembling")); 7713 PetscCall(PetscFree(procs_candidates)); 7714 PetscFunctionReturn(PETSC_SUCCESS); 7715 } 7716 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL)); 7717 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL)); 7718 threshold = PetscMax(threshold, 2); 7719 7720 /* Get info on mapping */ 7721 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7722 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7723 7724 /* build local CSR graph of subdomains' connectivity */ 7725 PetscCall(PetscMalloc1(2, &xadj)); 7726 xadj[0] = 0; 7727 xadj[1] = PetscMax(n_neighs - 1, 0); 7728 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7729 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7730 PetscCall(PetscCalloc1(n, &count)); 7731 for (i = 1; i < n_neighs; i++) 7732 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7733 7734 xadj_count = 0; 7735 for (i = 1; i < n_neighs; i++) { 7736 for (j = 0; j < n_shared[i]; j++) { 7737 if (count[shared[i][j]] < threshold) { 7738 adjncy[xadj_count] = neighs[i]; 7739 adjncy_wgt[xadj_count] = n_shared[i]; 7740 xadj_count++; 7741 break; 7742 } 7743 } 7744 } 7745 xadj[1] = xadj_count; 7746 PetscCall(PetscFree(count)); 7747 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7748 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7749 7750 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7751 7752 /* Restrict work on active processes only */ 7753 PetscCall(PetscMPIIntCast(im_active, &color)); 7754 if (void_procs) { 7755 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7756 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7757 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7758 subcomm = PetscSubcommChild(psubcomm); 7759 } else { 7760 psubcomm = NULL; 7761 subcomm = PetscObjectComm((PetscObject)mat); 7762 } 7763 7764 v_wgt = NULL; 7765 if (!color) { 7766 PetscCall(PetscFree(xadj)); 7767 PetscCall(PetscFree(adjncy)); 7768 PetscCall(PetscFree(adjncy_wgt)); 7769 } else { 7770 Mat subdomain_adj; 7771 IS new_ranks, new_ranks_contig; 7772 MatPartitioning partitioner; 7773 PetscInt rstart, rend; 7774 PetscMPIInt irstart = 0, irend = 0; 7775 PetscInt *is_indices, *oldranks; 7776 PetscMPIInt size; 7777 PetscBool aggregate; 7778 7779 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7780 if (void_procs) { 7781 PetscInt prank = rank; 7782 PetscCall(PetscMalloc1(size, &oldranks)); 7783 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7784 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7785 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7786 } else { 7787 oldranks = NULL; 7788 } 7789 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7790 if (aggregate) { /* TODO: all this part could be made more efficient */ 7791 PetscInt lrows, row, ncols, *cols; 7792 PetscMPIInt nrank; 7793 PetscScalar *vals; 7794 7795 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7796 lrows = 0; 7797 if (nrank < redprocs) { 7798 lrows = size / redprocs; 7799 if (nrank < size % redprocs) lrows++; 7800 } 7801 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7802 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7803 PetscCall(PetscMPIIntCast(rstart, &irstart)); 7804 PetscCall(PetscMPIIntCast(rend, &irend)); 7805 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7806 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7807 row = nrank; 7808 ncols = xadj[1] - xadj[0]; 7809 cols = adjncy; 7810 PetscCall(PetscMalloc1(ncols, &vals)); 7811 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7812 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7813 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7814 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7815 PetscCall(PetscFree(xadj)); 7816 PetscCall(PetscFree(adjncy)); 7817 PetscCall(PetscFree(adjncy_wgt)); 7818 PetscCall(PetscFree(vals)); 7819 if (use_vwgt) { 7820 Vec v; 7821 const PetscScalar *array; 7822 PetscInt nl; 7823 7824 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7825 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7826 PetscCall(VecAssemblyBegin(v)); 7827 PetscCall(VecAssemblyEnd(v)); 7828 PetscCall(VecGetLocalSize(v, &nl)); 7829 PetscCall(VecGetArrayRead(v, &array)); 7830 PetscCall(PetscMalloc1(nl, &v_wgt)); 7831 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7832 PetscCall(VecRestoreArrayRead(v, &array)); 7833 PetscCall(VecDestroy(&v)); 7834 } 7835 } else { 7836 PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7837 if (use_vwgt) { 7838 PetscCall(PetscMalloc1(1, &v_wgt)); 7839 v_wgt[0] = n; 7840 } 7841 } 7842 /* PetscCall(MatView(subdomain_adj,0)); */ 7843 7844 /* Partition */ 7845 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7846 #if defined(PETSC_HAVE_PTSCOTCH) 7847 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7848 #elif defined(PETSC_HAVE_PARMETIS) 7849 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7850 #else 7851 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7852 #endif 7853 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7854 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7855 *n_subdomains = PetscMin(size, *n_subdomains); 7856 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7857 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7858 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7859 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7860 7861 /* renumber new_ranks to avoid "holes" in new set of processors */ 7862 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7863 PetscCall(ISDestroy(&new_ranks)); 7864 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7865 if (!aggregate) { 7866 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7867 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7868 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7869 } else if (oldranks) { 7870 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7871 } else { 7872 ranks_send_to_idx[0] = is_indices[0]; 7873 } 7874 } else { 7875 PetscInt idx = 0; 7876 PetscMPIInt tag; 7877 MPI_Request *reqs; 7878 7879 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7880 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7881 for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7882 PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7883 PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE)); 7884 PetscCall(PetscFree(reqs)); 7885 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7886 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7887 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7888 } else if (oldranks) { 7889 ranks_send_to_idx[0] = oldranks[idx]; 7890 } else { 7891 ranks_send_to_idx[0] = idx; 7892 } 7893 } 7894 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7895 /* clean up */ 7896 PetscCall(PetscFree(oldranks)); 7897 PetscCall(ISDestroy(&new_ranks_contig)); 7898 PetscCall(MatDestroy(&subdomain_adj)); 7899 PetscCall(MatPartitioningDestroy(&partitioner)); 7900 } 7901 PetscCall(PetscSubcommDestroy(&psubcomm)); 7902 PetscCall(PetscFree(procs_candidates)); 7903 7904 /* assemble parallel IS for sends */ 7905 i = 1; 7906 if (!color) i = 0; 7907 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7908 PetscFunctionReturn(PETSC_SUCCESS); 7909 } 7910 7911 typedef enum { 7912 MATDENSE_PRIVATE = 0, 7913 MATAIJ_PRIVATE, 7914 MATBAIJ_PRIVATE, 7915 MATSBAIJ_PRIVATE 7916 } MatTypePrivate; 7917 7918 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7919 { 7920 Mat local_mat; 7921 IS is_sends_internal; 7922 PetscInt rows, cols, new_local_rows; 7923 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 7924 PetscBool ismatis, isdense, newisdense, destroy_mat; 7925 ISLocalToGlobalMapping l2gmap; 7926 PetscInt *l2gmap_indices; 7927 const PetscInt *is_indices; 7928 MatType new_local_type; 7929 /* buffers */ 7930 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 7931 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 7932 PetscInt *recv_buffer_idxs_local; 7933 PetscScalar *ptr_vals, *recv_buffer_vals; 7934 const PetscScalar *send_buffer_vals; 7935 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 7936 /* MPI */ 7937 MPI_Comm comm, comm_n; 7938 PetscSubcomm subcomm; 7939 PetscMPIInt n_sends, n_recvs, size; 7940 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 7941 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 7942 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 7943 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 7944 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 7945 7946 PetscFunctionBegin; 7947 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7948 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7949 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7950 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 7951 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 7952 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 7953 PetscValidLogicalCollectiveBool(mat, reuse, 6); 7954 PetscValidLogicalCollectiveInt(mat, nis, 8); 7955 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 7956 if (nvecs) { 7957 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 7958 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 7959 } 7960 /* further checks */ 7961 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7962 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7963 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7964 7965 PetscCall(MatGetSize(local_mat, &rows, &cols)); 7966 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 7967 if (reuse && *mat_n) { 7968 PetscInt mrows, mcols, mnrows, mncols; 7969 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 7970 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 7971 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 7972 PetscCall(MatGetSize(mat, &mrows, &mcols)); 7973 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 7974 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 7975 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 7976 } 7977 PetscCall(MatGetBlockSize(local_mat, &bs)); 7978 PetscValidLogicalCollectiveInt(mat, bs, 1); 7979 7980 /* prepare IS for sending if not provided */ 7981 if (!is_sends) { 7982 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 7983 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 7984 } else { 7985 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7986 is_sends_internal = is_sends; 7987 } 7988 7989 /* get comm */ 7990 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 7991 7992 /* compute number of sends */ 7993 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 7994 PetscCall(PetscMPIIntCast(i, &n_sends)); 7995 7996 /* compute number of receives */ 7997 PetscCallMPI(MPI_Comm_size(comm, &size)); 7998 PetscCall(PetscMalloc1(size, &iflags)); 7999 PetscCall(PetscArrayzero(iflags, size)); 8000 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 8001 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 8002 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 8003 PetscCall(PetscFree(iflags)); 8004 8005 /* restrict comm if requested */ 8006 subcomm = NULL; 8007 destroy_mat = PETSC_FALSE; 8008 if (restrict_comm) { 8009 PetscMPIInt color, subcommsize; 8010 8011 color = 0; 8012 if (restrict_full) { 8013 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 8014 } else { 8015 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 8016 } 8017 PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 8018 subcommsize = size - subcommsize; 8019 /* check if reuse has been requested */ 8020 if (reuse) { 8021 if (*mat_n) { 8022 PetscMPIInt subcommsize2; 8023 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 8024 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 8025 comm_n = PetscObjectComm((PetscObject)*mat_n); 8026 } else { 8027 comm_n = PETSC_COMM_SELF; 8028 } 8029 } else { /* MAT_INITIAL_MATRIX */ 8030 PetscMPIInt rank; 8031 8032 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 8033 PetscCall(PetscSubcommCreate(comm, &subcomm)); 8034 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 8035 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 8036 comm_n = PetscSubcommChild(subcomm); 8037 } 8038 /* flag to destroy *mat_n if not significative */ 8039 if (color) destroy_mat = PETSC_TRUE; 8040 } else { 8041 comm_n = comm; 8042 } 8043 8044 /* prepare send/receive buffers */ 8045 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 8046 PetscCall(PetscArrayzero(ilengths_idxs, size)); 8047 PetscCall(PetscMalloc1(size, &ilengths_vals)); 8048 PetscCall(PetscArrayzero(ilengths_vals, size)); 8049 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 8050 8051 /* Get data from local matrices */ 8052 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 8053 /* TODO: See below some guidelines on how to prepare the local buffers */ 8054 /* 8055 send_buffer_vals should contain the raw values of the local matrix 8056 send_buffer_idxs should contain: 8057 - MatType_PRIVATE type 8058 - PetscInt size_of_l2gmap 8059 - PetscInt global_row_indices[size_of_l2gmap] 8060 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 8061 */ 8062 { 8063 ISLocalToGlobalMapping mapping; 8064 8065 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 8066 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 8067 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 8068 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 8069 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 8070 send_buffer_idxs[1] = i; 8071 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 8072 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 8073 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 8074 PetscCall(PetscMPIIntCast(i, &len)); 8075 for (i = 0; i < n_sends; i++) { 8076 ilengths_vals[is_indices[i]] = len * len; 8077 ilengths_idxs[is_indices[i]] = len + 2; 8078 } 8079 } 8080 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 8081 /* additional is (if any) */ 8082 if (nis) { 8083 PetscMPIInt psum; 8084 PetscInt j; 8085 for (j = 0, psum = 0; j < nis; j++) { 8086 PetscInt plen; 8087 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8088 PetscCall(PetscMPIIntCast(plen, &len)); 8089 psum += len + 1; /* indices + length */ 8090 } 8091 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 8092 for (j = 0, psum = 0; j < nis; j++) { 8093 PetscInt plen; 8094 const PetscInt *is_array_idxs; 8095 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8096 send_buffer_idxs_is[psum] = plen; 8097 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 8098 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 8099 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 8100 psum += plen + 1; /* indices + length */ 8101 } 8102 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 8103 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 8104 } 8105 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8106 8107 buf_size_idxs = 0; 8108 buf_size_vals = 0; 8109 buf_size_idxs_is = 0; 8110 buf_size_vecs = 0; 8111 for (i = 0; i < n_recvs; i++) { 8112 buf_size_idxs += olengths_idxs[i]; 8113 buf_size_vals += olengths_vals[i]; 8114 if (nis) buf_size_idxs_is += olengths_idxs_is[i]; 8115 if (nvecs) buf_size_vecs += olengths_idxs[i]; 8116 } 8117 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 8118 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 8119 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 8120 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 8121 8122 /* get new tags for clean communications */ 8123 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 8124 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 8125 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 8126 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 8127 8128 /* allocate for requests */ 8129 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 8130 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 8131 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 8132 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 8133 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 8134 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 8135 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 8136 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 8137 8138 /* communications */ 8139 ptr_idxs = recv_buffer_idxs; 8140 ptr_vals = recv_buffer_vals; 8141 ptr_idxs_is = recv_buffer_idxs_is; 8142 ptr_vecs = recv_buffer_vecs; 8143 for (i = 0; i < n_recvs; i++) { 8144 PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i])); 8145 PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i])); 8146 ptr_idxs += olengths_idxs[i]; 8147 ptr_vals += olengths_vals[i]; 8148 if (nis) { 8149 PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i])); 8150 ptr_idxs_is += olengths_idxs_is[i]; 8151 } 8152 if (nvecs) { 8153 PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i])); 8154 ptr_vecs += olengths_idxs[i] - 2; 8155 } 8156 } 8157 for (i = 0; i < n_sends; i++) { 8158 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 8159 PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 8160 PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 8161 if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 8162 if (nvecs) { 8163 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8164 PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 8165 } 8166 } 8167 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 8168 PetscCall(ISDestroy(&is_sends_internal)); 8169 8170 /* assemble new l2g map */ 8171 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 8172 ptr_idxs = recv_buffer_idxs; 8173 new_local_rows = 0; 8174 for (i = 0; i < n_recvs; i++) { 8175 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8176 ptr_idxs += olengths_idxs[i]; 8177 } 8178 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 8179 ptr_idxs = recv_buffer_idxs; 8180 new_local_rows = 0; 8181 for (i = 0; i < n_recvs; i++) { 8182 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 8183 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8184 ptr_idxs += olengths_idxs[i]; 8185 } 8186 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 8187 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 8188 PetscCall(PetscFree(l2gmap_indices)); 8189 8190 /* infer new local matrix type from received local matrices type */ 8191 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 8192 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 8193 if (n_recvs) { 8194 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 8195 ptr_idxs = recv_buffer_idxs; 8196 for (i = 0; i < n_recvs; i++) { 8197 if ((PetscInt)new_local_type_private != *ptr_idxs) { 8198 new_local_type_private = MATAIJ_PRIVATE; 8199 break; 8200 } 8201 ptr_idxs += olengths_idxs[i]; 8202 } 8203 switch (new_local_type_private) { 8204 case MATDENSE_PRIVATE: 8205 new_local_type = MATSEQAIJ; 8206 bs = 1; 8207 break; 8208 case MATAIJ_PRIVATE: 8209 new_local_type = MATSEQAIJ; 8210 bs = 1; 8211 break; 8212 case MATBAIJ_PRIVATE: 8213 new_local_type = MATSEQBAIJ; 8214 break; 8215 case MATSBAIJ_PRIVATE: 8216 new_local_type = MATSEQSBAIJ; 8217 break; 8218 default: 8219 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 8220 } 8221 } else { /* by default, new_local_type is seqaij */ 8222 new_local_type = MATSEQAIJ; 8223 bs = 1; 8224 } 8225 8226 /* create MATIS object if needed */ 8227 if (!reuse) { 8228 PetscCall(MatGetSize(mat, &rows, &cols)); 8229 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8230 } else { 8231 /* it also destroys the local matrices */ 8232 if (*mat_n) { 8233 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 8234 } else { /* this is a fake object */ 8235 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8236 } 8237 } 8238 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 8239 PetscCall(MatSetType(local_mat, new_local_type)); 8240 8241 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 8242 8243 /* Global to local map of received indices */ 8244 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 8245 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 8246 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 8247 8248 /* restore attributes -> type of incoming data and its size */ 8249 buf_size_idxs = 0; 8250 for (i = 0; i < n_recvs; i++) { 8251 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 8252 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 8253 buf_size_idxs += olengths_idxs[i]; 8254 } 8255 PetscCall(PetscFree(recv_buffer_idxs)); 8256 8257 /* set preallocation */ 8258 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 8259 if (!newisdense) { 8260 PetscInt *new_local_nnz = NULL; 8261 8262 ptr_idxs = recv_buffer_idxs_local; 8263 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 8264 for (i = 0; i < n_recvs; i++) { 8265 PetscInt j; 8266 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 8267 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 8268 } else { 8269 /* TODO */ 8270 } 8271 ptr_idxs += olengths_idxs[i]; 8272 } 8273 if (new_local_nnz) { 8274 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 8275 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 8276 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 8277 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8278 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 8279 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8280 } else { 8281 PetscCall(MatSetUp(local_mat)); 8282 } 8283 PetscCall(PetscFree(new_local_nnz)); 8284 } else { 8285 PetscCall(MatSetUp(local_mat)); 8286 } 8287 8288 /* set values */ 8289 ptr_vals = recv_buffer_vals; 8290 ptr_idxs = recv_buffer_idxs_local; 8291 for (i = 0; i < n_recvs; i++) { 8292 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 8293 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 8294 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 8295 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 8296 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 8297 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 8298 } else { 8299 /* TODO */ 8300 } 8301 ptr_idxs += olengths_idxs[i]; 8302 ptr_vals += olengths_vals[i]; 8303 } 8304 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 8305 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 8306 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 8307 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 8308 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 8309 PetscCall(PetscFree(recv_buffer_vals)); 8310 8311 #if 0 8312 if (!restrict_comm) { /* check */ 8313 Vec lvec,rvec; 8314 PetscReal infty_error; 8315 8316 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 8317 PetscCall(VecSetRandom(rvec,NULL)); 8318 PetscCall(MatMult(mat,rvec,lvec)); 8319 PetscCall(VecScale(lvec,-1.0)); 8320 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 8321 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 8322 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 8323 PetscCall(VecDestroy(&rvec)); 8324 PetscCall(VecDestroy(&lvec)); 8325 } 8326 #endif 8327 8328 /* assemble new additional is (if any) */ 8329 if (nis) { 8330 PetscInt **temp_idxs, *count_is, j, psum; 8331 8332 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 8333 PetscCall(PetscCalloc1(nis, &count_is)); 8334 ptr_idxs = recv_buffer_idxs_is; 8335 psum = 0; 8336 for (i = 0; i < n_recvs; i++) { 8337 for (j = 0; j < nis; j++) { 8338 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8339 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8340 psum += plen; 8341 ptr_idxs += plen + 1; /* shift pointer to received data */ 8342 } 8343 } 8344 PetscCall(PetscMalloc1(nis, &temp_idxs)); 8345 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 8346 for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]); 8347 PetscCall(PetscArrayzero(count_is, nis)); 8348 ptr_idxs = recv_buffer_idxs_is; 8349 for (i = 0; i < n_recvs; i++) { 8350 for (j = 0; j < nis; j++) { 8351 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8352 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 8353 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8354 ptr_idxs += plen + 1; /* shift pointer to received data */ 8355 } 8356 } 8357 for (i = 0; i < nis; i++) { 8358 PetscCall(ISDestroy(&isarray[i])); 8359 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 8360 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 8361 } 8362 PetscCall(PetscFree(count_is)); 8363 PetscCall(PetscFree(temp_idxs[0])); 8364 PetscCall(PetscFree(temp_idxs)); 8365 } 8366 /* free workspace */ 8367 PetscCall(PetscFree(recv_buffer_idxs_is)); 8368 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 8369 PetscCall(PetscFree(send_buffer_idxs)); 8370 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 8371 if (isdense) { 8372 PetscCall(MatISGetLocalMat(mat, &local_mat)); 8373 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 8374 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8375 } else { 8376 /* PetscCall(PetscFree(send_buffer_vals)); */ 8377 } 8378 if (nis) { 8379 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 8380 PetscCall(PetscFree(send_buffer_idxs_is)); 8381 } 8382 8383 if (nvecs) { 8384 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 8385 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 8386 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8387 PetscCall(VecDestroy(&nnsp_vec[0])); 8388 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 8389 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 8390 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 8391 /* set values */ 8392 ptr_vals = recv_buffer_vecs; 8393 ptr_idxs = recv_buffer_idxs_local; 8394 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8395 for (i = 0; i < n_recvs; i++) { 8396 PetscInt j; 8397 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 8398 ptr_idxs += olengths_idxs[i]; 8399 ptr_vals += olengths_idxs[i] - 2; 8400 } 8401 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8402 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8403 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8404 } 8405 8406 PetscCall(PetscFree(recv_buffer_vecs)); 8407 PetscCall(PetscFree(recv_buffer_idxs_local)); 8408 PetscCall(PetscFree(recv_req_idxs)); 8409 PetscCall(PetscFree(recv_req_vals)); 8410 PetscCall(PetscFree(recv_req_vecs)); 8411 PetscCall(PetscFree(recv_req_idxs_is)); 8412 PetscCall(PetscFree(send_req_idxs)); 8413 PetscCall(PetscFree(send_req_vals)); 8414 PetscCall(PetscFree(send_req_vecs)); 8415 PetscCall(PetscFree(send_req_idxs_is)); 8416 PetscCall(PetscFree(ilengths_vals)); 8417 PetscCall(PetscFree(ilengths_idxs)); 8418 PetscCall(PetscFree(olengths_vals)); 8419 PetscCall(PetscFree(olengths_idxs)); 8420 PetscCall(PetscFree(onodes)); 8421 if (nis) { 8422 PetscCall(PetscFree(ilengths_idxs_is)); 8423 PetscCall(PetscFree(olengths_idxs_is)); 8424 PetscCall(PetscFree(onodes_is)); 8425 } 8426 PetscCall(PetscSubcommDestroy(&subcomm)); 8427 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8428 PetscCall(MatDestroy(mat_n)); 8429 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 8430 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8431 PetscCall(VecDestroy(&nnsp_vec[0])); 8432 } 8433 *mat_n = NULL; 8434 } 8435 PetscFunctionReturn(PETSC_SUCCESS); 8436 } 8437 8438 /* temporary hack into ksp private data structure */ 8439 #include <petsc/private/kspimpl.h> 8440 8441 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat) 8442 { 8443 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8444 PC_IS *pcis = (PC_IS *)pc->data; 8445 PCBDDCGraph graph = pcbddc->mat_graph; 8446 Mat coarse_mat, coarse_mat_is; 8447 Mat coarsedivudotp = NULL; 8448 Mat coarseG, t_coarse_mat_is; 8449 MatNullSpace CoarseNullSpace = NULL; 8450 ISLocalToGlobalMapping coarse_islg; 8451 IS coarse_is, *isarray, corners; 8452 PetscInt i, im_active = -1, active_procs = -1; 8453 PetscInt nis, nisdofs, nisneu, nisvert; 8454 PetscInt coarse_eqs_per_proc, coarsening_ratio; 8455 PC pc_temp; 8456 PCType coarse_pc_type; 8457 KSPType coarse_ksp_type; 8458 PetscBool multilevel_requested, multilevel_allowed; 8459 PetscBool coarse_reuse, multi_element = graph->multi_element; 8460 PetscInt ncoarse, nedcfield; 8461 PetscBool compute_vecs = PETSC_FALSE; 8462 PetscScalar *array; 8463 MatReuse coarse_mat_reuse; 8464 PetscBool restr, full_restr, have_void; 8465 PetscMPIInt size; 8466 8467 PetscFunctionBegin; 8468 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8469 /* Assign global numbering to coarse dofs */ 8470 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8471 PetscInt ocoarse_size; 8472 compute_vecs = PETSC_TRUE; 8473 8474 pcbddc->new_primal_space = PETSC_TRUE; 8475 ocoarse_size = pcbddc->coarse_size; 8476 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8477 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 8478 /* see if we can avoid some work */ 8479 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8480 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8481 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8482 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8483 coarse_reuse = PETSC_FALSE; 8484 } else { /* we can safely reuse already computed coarse matrix */ 8485 coarse_reuse = PETSC_TRUE; 8486 } 8487 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8488 coarse_reuse = PETSC_FALSE; 8489 } 8490 /* reset any subassembling information */ 8491 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8492 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8493 coarse_reuse = PETSC_TRUE; 8494 } 8495 if (coarse_reuse && pcbddc->coarse_ksp) { 8496 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 8497 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8498 coarse_mat_reuse = MAT_REUSE_MATRIX; 8499 } else { 8500 coarse_mat = NULL; 8501 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8502 } 8503 8504 /* creates temporary l2gmap and IS for coarse indexes */ 8505 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 8506 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 8507 8508 /* creates temporary MATIS object for coarse matrix */ 8509 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is)); 8510 PetscCall(MatSetType(t_coarse_mat_is, MATIS)); 8511 PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size)); 8512 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, multi_element)); 8513 PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg)); 8514 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat)); 8515 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8516 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8517 PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view")); 8518 8519 /* count "active" (i.e. with positive local size) and "void" processes */ 8520 im_active = !!pcis->n; 8521 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8522 8523 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8524 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8525 /* full_restr : just use the receivers from the subassembling pattern */ 8526 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 8527 coarse_mat_is = NULL; 8528 multilevel_allowed = PETSC_FALSE; 8529 multilevel_requested = PETSC_FALSE; 8530 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 8531 if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1); 8532 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8533 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8534 coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio; 8535 if (multilevel_requested) { 8536 ncoarse = active_procs / coarsening_ratio; 8537 restr = PETSC_FALSE; 8538 full_restr = PETSC_FALSE; 8539 } else { 8540 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 8541 restr = PETSC_TRUE; 8542 full_restr = PETSC_TRUE; 8543 } 8544 if (!pcbddc->coarse_size || (size == 1 && !multi_element)) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8545 ncoarse = PetscMax(1, ncoarse); 8546 if (!pcbddc->coarse_subassembling) { 8547 if (coarsening_ratio > 1) { 8548 if (multilevel_requested) { 8549 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8550 } else { 8551 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8552 } 8553 } else { 8554 PetscMPIInt rank; 8555 8556 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 8557 have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE; 8558 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 8559 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_subassembling, "default subassembling")); 8560 } 8561 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8562 PetscInt psum; 8563 if (pcbddc->coarse_ksp) psum = 1; 8564 else psum = 0; 8565 PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8566 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8567 } 8568 /* determine if we can go multilevel */ 8569 if (multilevel_requested) { 8570 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8571 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8572 } 8573 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8574 8575 /* dump subassembling pattern */ 8576 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 8577 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8578 nedcfield = -1; 8579 corners = NULL; 8580 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8581 PetscInt *tidxs, *tidxs2, nout, tsize, i; 8582 const PetscInt *idxs; 8583 ISLocalToGlobalMapping tmap; 8584 8585 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8586 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 8587 /* allocate space for temporary storage */ 8588 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 8589 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 8590 /* allocate for IS array */ 8591 nisdofs = pcbddc->n_ISForDofsLocal; 8592 if (pcbddc->nedclocal) { 8593 if (pcbddc->nedfield > -1) { 8594 nedcfield = pcbddc->nedfield; 8595 } else { 8596 nedcfield = 0; 8597 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 8598 nisdofs = 1; 8599 } 8600 } 8601 nisneu = !!pcbddc->NeumannBoundariesLocal; 8602 nisvert = 0; /* nisvert is not used */ 8603 nis = nisdofs + nisneu + nisvert; 8604 PetscCall(PetscMalloc1(nis, &isarray)); 8605 /* dofs splitting */ 8606 for (i = 0; i < nisdofs; i++) { 8607 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8608 if (nedcfield != i) { 8609 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 8610 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8611 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8612 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8613 } else { 8614 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 8615 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 8616 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8617 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8618 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 8619 } 8620 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8621 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 8622 /* PetscCall(ISView(isarray[i],0)); */ 8623 } 8624 /* neumann boundaries */ 8625 if (pcbddc->NeumannBoundariesLocal) { 8626 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8627 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 8628 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8629 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8630 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8631 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8632 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 8633 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8634 } 8635 /* coordinates */ 8636 if (pcbddc->corner_selected) { 8637 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8638 PetscCall(ISGetLocalSize(corners, &tsize)); 8639 PetscCall(ISGetIndices(corners, &idxs)); 8640 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8641 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8642 PetscCall(ISRestoreIndices(corners, &idxs)); 8643 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8644 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8645 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 8646 } 8647 PetscCall(PetscFree(tidxs)); 8648 PetscCall(PetscFree(tidxs2)); 8649 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8650 } else { 8651 nis = 0; 8652 nisdofs = 0; 8653 nisneu = 0; 8654 nisvert = 0; 8655 isarray = NULL; 8656 } 8657 /* destroy no longer needed map */ 8658 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8659 8660 /* subassemble */ 8661 if (multilevel_allowed) { 8662 Vec vp[1]; 8663 PetscInt nvecs = 0; 8664 PetscBool reuse; 8665 8666 vp[0] = NULL; 8667 /* XXX HDIV also */ 8668 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8669 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 8670 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 8671 PetscCall(VecSetType(vp[0], VECSTANDARD)); 8672 nvecs = 1; 8673 8674 if (pcbddc->divudotp) { 8675 Mat B, loc_divudotp; 8676 Vec v, p; 8677 IS dummy; 8678 PetscInt np; 8679 8680 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8681 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8682 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8683 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8684 PetscCall(MatCreateVecs(B, &v, &p)); 8685 PetscCall(VecSet(p, 1.)); 8686 PetscCall(MatMultTranspose(B, p, v)); 8687 PetscCall(VecDestroy(&p)); 8688 PetscCall(MatDestroy(&B)); 8689 PetscCall(VecGetArray(vp[0], &array)); 8690 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8691 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8692 PetscCall(VecResetArray(pcbddc->vec1_P)); 8693 PetscCall(VecRestoreArray(vp[0], &array)); 8694 PetscCall(ISDestroy(&dummy)); 8695 PetscCall(VecDestroy(&v)); 8696 } 8697 } 8698 if (coarse_mat) reuse = PETSC_TRUE; 8699 else reuse = PETSC_FALSE; 8700 if (multi_element) { 8701 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8702 coarse_mat_is = t_coarse_mat_is; 8703 } else { 8704 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8705 if (reuse) { 8706 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8707 } else { 8708 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8709 } 8710 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8711 PetscScalar *arraym; 8712 const PetscScalar *arrayv; 8713 PetscInt nl; 8714 PetscCall(VecGetLocalSize(vp[0], &nl)); 8715 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8716 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8717 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8718 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8719 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8720 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8721 PetscCall(VecDestroy(&vp[0])); 8722 } else { 8723 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8724 } 8725 } 8726 } else { 8727 PetscBool default_sub; 8728 8729 PetscCall(PetscStrcmp(((PetscObject)pcbddc->coarse_subassembling)->name, "default subassembling", &default_sub)); 8730 if (!default_sub) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8731 else { 8732 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8733 coarse_mat_is = t_coarse_mat_is; 8734 } 8735 } 8736 if (coarse_mat_is || coarse_mat) { 8737 if (!multilevel_allowed) { 8738 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8739 } else { 8740 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8741 if (coarse_mat_is) { 8742 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8743 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8744 coarse_mat = coarse_mat_is; 8745 } 8746 } 8747 } 8748 PetscCall(MatDestroy(&t_coarse_mat_is)); 8749 PetscCall(MatDestroy(&coarse_mat_is)); 8750 8751 /* create local to global scatters for coarse problem */ 8752 if (compute_vecs) { 8753 PetscInt lrows; 8754 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8755 if (coarse_mat) { 8756 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8757 } else { 8758 lrows = 0; 8759 } 8760 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8761 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8762 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8763 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8764 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8765 } 8766 PetscCall(ISDestroy(&coarse_is)); 8767 8768 /* set defaults for coarse KSP and PC */ 8769 if (multilevel_allowed) { 8770 coarse_ksp_type = KSPRICHARDSON; 8771 coarse_pc_type = PCBDDC; 8772 } else { 8773 coarse_ksp_type = KSPPREONLY; 8774 coarse_pc_type = PCREDUNDANT; 8775 } 8776 8777 /* print some info if requested */ 8778 if (pcbddc->dbg_flag) { 8779 if (!multilevel_allowed) { 8780 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8781 if (multilevel_requested) { 8782 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio)); 8783 } else if (pcbddc->max_levels) { 8784 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8785 } 8786 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8787 } 8788 } 8789 8790 /* communicate coarse discrete gradient */ 8791 coarseG = NULL; 8792 if (pcbddc->nedcG && multilevel_allowed) { 8793 MPI_Comm ccomm; 8794 if (coarse_mat) { 8795 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8796 } else { 8797 ccomm = MPI_COMM_NULL; 8798 } 8799 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8800 } 8801 8802 /* create the coarse KSP object only once with defaults */ 8803 if (coarse_mat) { 8804 PetscBool isredundant, isbddc, force, valid; 8805 PetscViewer dbg_viewer = NULL; 8806 PetscBool isset, issym, isher, isspd; 8807 8808 if (pcbddc->dbg_flag) { 8809 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8810 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8811 } 8812 if (!pcbddc->coarse_ksp) { 8813 char prefix[256], str_level[16]; 8814 size_t len; 8815 8816 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8817 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel)); 8818 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8819 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8820 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1)); 8821 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8822 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8823 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8824 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8825 /* TODO is this logic correct? should check for coarse_mat type */ 8826 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8827 /* prefix */ 8828 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8829 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8830 if (!pcbddc->current_level) { 8831 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8832 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8833 } else { 8834 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8835 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8836 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8837 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8838 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8839 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level)); 8840 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8841 } 8842 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8843 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8844 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8845 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8846 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8847 /* allow user customization */ 8848 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8849 /* get some info after set from options */ 8850 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8851 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8852 force = PETSC_FALSE; 8853 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8854 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8855 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8856 if (multilevel_allowed && !force && !valid) { 8857 isbddc = PETSC_TRUE; 8858 PetscCall(PCSetType(pc_temp, PCBDDC)); 8859 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8860 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8861 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8862 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8863 PetscObjectOptionsBegin((PetscObject)pc_temp); 8864 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8865 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8866 PetscOptionsEnd(); 8867 pc_temp->setfromoptionscalled++; 8868 } 8869 } 8870 } 8871 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8872 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8873 if (nisdofs) { 8874 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8875 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8876 } 8877 if (nisneu) { 8878 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8879 PetscCall(ISDestroy(&isarray[nisdofs])); 8880 } 8881 if (nisvert) { 8882 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8883 PetscCall(ISDestroy(&isarray[nis - 1])); 8884 } 8885 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8886 8887 /* get some info after set from options */ 8888 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8889 8890 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8891 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8892 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8893 force = PETSC_FALSE; 8894 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8895 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8896 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8897 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8898 if (isredundant) { 8899 KSP inner_ksp; 8900 PC inner_pc; 8901 8902 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8903 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8904 } 8905 8906 /* parameters which miss an API */ 8907 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8908 if (isbddc) { 8909 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8910 8911 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8912 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8913 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8914 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8915 if (pcbddc_coarse->benign_saddle_point) { 8916 Mat coarsedivudotp_is; 8917 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8918 IS row, col; 8919 const PetscInt *gidxs; 8920 PetscInt n, st, M, N; 8921 8922 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 8923 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 8924 st = st - n; 8925 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 8926 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 8927 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 8928 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 8929 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 8930 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 8931 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 8932 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 8933 PetscCall(ISGetSize(row, &M)); 8934 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 8935 PetscCall(ISDestroy(&row)); 8936 PetscCall(ISDestroy(&col)); 8937 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 8938 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 8939 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 8940 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 8941 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8942 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8943 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 8944 PetscCall(MatDestroy(&coarsedivudotp)); 8945 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 8946 PetscCall(MatDestroy(&coarsedivudotp_is)); 8947 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8948 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8949 } 8950 } 8951 8952 /* propagate symmetry info of coarse matrix */ 8953 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 8954 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 8955 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 8956 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 8957 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 8958 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 8959 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 8960 8961 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 8962 /* set operators */ 8963 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 8964 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 8965 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8966 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 8967 } 8968 PetscCall(MatDestroy(&coarseG)); 8969 PetscCall(PetscFree(isarray)); 8970 #if 0 8971 { 8972 PetscViewer viewer; 8973 char filename[256]; 8974 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 8975 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8976 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8977 PetscCall(MatView(coarse_mat,viewer)); 8978 PetscCall(PetscViewerPopFormat(viewer)); 8979 PetscCall(PetscViewerDestroy(&viewer)); 8980 } 8981 #endif 8982 8983 if (corners) { 8984 Vec gv; 8985 IS is; 8986 const PetscInt *idxs; 8987 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 8988 PetscScalar *coords; 8989 8990 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 8991 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 8992 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 8993 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 8994 PetscCall(VecSetBlockSize(gv, cdim)); 8995 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 8996 PetscCall(VecSetType(gv, VECSTANDARD)); 8997 PetscCall(VecSetFromOptions(gv)); 8998 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8999 9000 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 9001 PetscCall(ISGetLocalSize(is, &n)); 9002 PetscCall(ISGetIndices(is, &idxs)); 9003 PetscCall(PetscMalloc1(n * cdim, &coords)); 9004 for (i = 0; i < n; i++) { 9005 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 9006 } 9007 PetscCall(ISRestoreIndices(is, &idxs)); 9008 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 9009 9010 PetscCall(ISGetLocalSize(corners, &n)); 9011 PetscCall(ISGetIndices(corners, &idxs)); 9012 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 9013 PetscCall(ISRestoreIndices(corners, &idxs)); 9014 PetscCall(PetscFree(coords)); 9015 PetscCall(VecAssemblyBegin(gv)); 9016 PetscCall(VecAssemblyEnd(gv)); 9017 PetscCall(VecGetArray(gv, &coords)); 9018 if (pcbddc->coarse_ksp) { 9019 PC coarse_pc; 9020 PetscBool isbddc; 9021 9022 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 9023 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 9024 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 9025 PetscReal *realcoords; 9026 9027 PetscCall(VecGetLocalSize(gv, &n)); 9028 #if defined(PETSC_USE_COMPLEX) 9029 PetscCall(PetscMalloc1(n, &realcoords)); 9030 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 9031 #else 9032 realcoords = coords; 9033 #endif 9034 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 9035 #if defined(PETSC_USE_COMPLEX) 9036 PetscCall(PetscFree(realcoords)); 9037 #endif 9038 } 9039 } 9040 PetscCall(VecRestoreArray(gv, &coords)); 9041 PetscCall(VecDestroy(&gv)); 9042 } 9043 PetscCall(ISDestroy(&corners)); 9044 9045 if (pcbddc->coarse_ksp) { 9046 Vec crhs, csol; 9047 9048 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 9049 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 9050 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL)); 9051 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs)); 9052 } 9053 PetscCall(MatDestroy(&coarsedivudotp)); 9054 9055 /* compute null space for coarse solver if the benign trick has been requested */ 9056 if (pcbddc->benign_null) { 9057 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 9058 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 9059 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 9060 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 9061 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9062 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9063 if (coarse_mat) { 9064 Vec nullv; 9065 PetscScalar *array, *array2; 9066 PetscInt nl; 9067 9068 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 9069 PetscCall(VecGetLocalSize(nullv, &nl)); 9070 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9071 PetscCall(VecGetArray(nullv, &array2)); 9072 PetscCall(PetscArraycpy(array2, array, nl)); 9073 PetscCall(VecRestoreArray(nullv, &array2)); 9074 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9075 PetscCall(VecNormalize(nullv, NULL)); 9076 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 9077 PetscCall(VecDestroy(&nullv)); 9078 } 9079 } 9080 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 9081 9082 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9083 if (pcbddc->coarse_ksp) { 9084 PetscBool ispreonly; 9085 9086 if (CoarseNullSpace) { 9087 PetscBool isnull; 9088 9089 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 9090 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 9091 /* TODO: add local nullspaces (if any) */ 9092 } 9093 /* setup coarse ksp */ 9094 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 9095 /* Check coarse problem if in debug mode or if solving with an iterative method */ 9096 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 9097 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 9098 KSP check_ksp; 9099 KSPType check_ksp_type; 9100 PC check_pc; 9101 Vec check_vec, coarse_vec; 9102 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 9103 PetscInt its; 9104 PetscBool compute_eigs; 9105 PetscReal *eigs_r, *eigs_c; 9106 PetscInt neigs; 9107 const char *prefix; 9108 9109 /* Create ksp object suitable for estimation of extreme eigenvalues */ 9110 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 9111 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel)); 9112 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 9113 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 9114 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 9115 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size)); 9116 /* prevent from setup unneeded object */ 9117 PetscCall(KSPGetPC(check_ksp, &check_pc)); 9118 PetscCall(PCSetType(check_pc, PCNONE)); 9119 if (ispreonly) { 9120 check_ksp_type = KSPPREONLY; 9121 compute_eigs = PETSC_FALSE; 9122 } else { 9123 check_ksp_type = KSPGMRES; 9124 compute_eigs = PETSC_TRUE; 9125 } 9126 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 9127 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 9128 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 9129 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 9130 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 9131 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 9132 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 9133 PetscCall(KSPSetFromOptions(check_ksp)); 9134 PetscCall(KSPSetUp(check_ksp)); 9135 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 9136 PetscCall(KSPSetPC(check_ksp, check_pc)); 9137 /* create random vec */ 9138 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 9139 PetscCall(VecSetRandom(check_vec, NULL)); 9140 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9141 /* solve coarse problem */ 9142 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 9143 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 9144 /* set eigenvalue estimation if preonly has not been requested */ 9145 if (compute_eigs) { 9146 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 9147 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 9148 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 9149 if (neigs) { 9150 lambda_max = eigs_r[neigs - 1]; 9151 lambda_min = eigs_r[0]; 9152 if (pcbddc->use_coarse_estimates) { 9153 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 9154 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 9155 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 9156 } 9157 } 9158 } 9159 } 9160 9161 /* check coarse problem residual error */ 9162 if (pcbddc->dbg_flag) { 9163 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 9164 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9165 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 9166 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 9167 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9168 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 9169 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 9170 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer)); 9171 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer)); 9172 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 9173 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 9174 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 9175 if (compute_eigs) { 9176 PetscReal lambda_max_s, lambda_min_s; 9177 KSPConvergedReason reason; 9178 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 9179 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 9180 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 9181 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 9182 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 9183 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 9184 } 9185 PetscCall(PetscViewerFlush(dbg_viewer)); 9186 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9187 } 9188 PetscCall(VecDestroy(&check_vec)); 9189 PetscCall(VecDestroy(&coarse_vec)); 9190 PetscCall(KSPDestroy(&check_ksp)); 9191 if (compute_eigs) { 9192 PetscCall(PetscFree(eigs_r)); 9193 PetscCall(PetscFree(eigs_c)); 9194 } 9195 } 9196 } 9197 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 9198 /* print additional info */ 9199 if (pcbddc->dbg_flag) { 9200 /* waits until all processes reaches this point */ 9201 PetscCall(PetscBarrier((PetscObject)pc)); 9202 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 9203 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9204 } 9205 9206 /* free memory */ 9207 PetscCall(MatDestroy(&coarse_mat)); 9208 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9209 PetscFunctionReturn(PETSC_SUCCESS); 9210 } 9211 9212 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 9213 { 9214 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9215 PC_IS *pcis = (PC_IS *)pc->data; 9216 IS subset, subset_mult, subset_n; 9217 PetscInt local_size, coarse_size = 0; 9218 PetscInt *local_primal_indices = NULL; 9219 const PetscInt *t_local_primal_indices; 9220 9221 PetscFunctionBegin; 9222 /* Compute global number of coarse dofs */ 9223 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 9224 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 9225 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 9226 PetscCall(ISDestroy(&subset_n)); 9227 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 9228 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 9229 PetscCall(ISDestroy(&subset)); 9230 PetscCall(ISDestroy(&subset_mult)); 9231 PetscCall(ISGetLocalSize(subset_n, &local_size)); 9232 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 9233 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 9234 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 9235 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 9236 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 9237 PetscCall(ISDestroy(&subset_n)); 9238 9239 if (pcbddc->dbg_flag) { 9240 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9241 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 9242 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size)); 9243 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9244 } 9245 9246 /* get back data */ 9247 *coarse_size_n = coarse_size; 9248 *local_primal_indices_n = local_primal_indices; 9249 PetscFunctionReturn(PETSC_SUCCESS); 9250 } 9251 9252 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 9253 { 9254 IS localis_t; 9255 PetscInt i, lsize, *idxs, n; 9256 PetscScalar *vals; 9257 9258 PetscFunctionBegin; 9259 /* get indices in local ordering exploiting local to global map */ 9260 PetscCall(ISGetLocalSize(globalis, &lsize)); 9261 PetscCall(PetscMalloc1(lsize, &vals)); 9262 for (i = 0; i < lsize; i++) vals[i] = 1.0; 9263 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 9264 PetscCall(VecSet(gwork, 0.0)); 9265 PetscCall(VecSet(lwork, 0.0)); 9266 if (idxs) { /* multilevel guard */ 9267 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 9268 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 9269 } 9270 PetscCall(VecAssemblyBegin(gwork)); 9271 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 9272 PetscCall(PetscFree(vals)); 9273 PetscCall(VecAssemblyEnd(gwork)); 9274 /* now compute set in local ordering */ 9275 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9276 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9277 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 9278 PetscCall(VecGetSize(lwork, &n)); 9279 for (i = 0, lsize = 0; i < n; i++) { 9280 if (PetscRealPart(vals[i]) > 0.5) lsize++; 9281 } 9282 PetscCall(PetscMalloc1(lsize, &idxs)); 9283 for (i = 0, lsize = 0; i < n; i++) { 9284 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 9285 } 9286 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 9287 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 9288 *localis = localis_t; 9289 PetscFunctionReturn(PETSC_SUCCESS); 9290 } 9291 9292 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 9293 { 9294 PC_IS *pcis = (PC_IS *)pc->data; 9295 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9296 PC_IS *pcisf; 9297 PC_BDDC *pcbddcf; 9298 PC pcf; 9299 9300 PetscFunctionBegin; 9301 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 9302 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 9303 PetscCall(PCSetType(pcf, PCBDDC)); 9304 9305 pcisf = (PC_IS *)pcf->data; 9306 pcbddcf = (PC_BDDC *)pcf->data; 9307 9308 pcisf->is_B_local = pcis->is_B_local; 9309 pcisf->vec1_N = pcis->vec1_N; 9310 pcisf->BtoNmap = pcis->BtoNmap; 9311 pcisf->n = pcis->n; 9312 pcisf->n_B = pcis->n_B; 9313 9314 PetscCall(PetscFree(pcbddcf->mat_graph)); 9315 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9316 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 9317 pcbddcf->sub_schurs = schurs; 9318 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 9319 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 9320 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 9321 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 9322 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9323 pcbddcf->use_faces = PETSC_TRUE; 9324 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9325 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9326 pcbddcf->use_qr_single = (PetscBool)!constraints; 9327 pcbddcf->fake_change = PETSC_TRUE; 9328 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9329 9330 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9331 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9332 9333 *change = pcbddcf->ConstraintMatrix; 9334 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 9335 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 9336 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9337 9338 if (schurs) pcbddcf->sub_schurs = NULL; 9339 pcbddcf->ConstraintMatrix = NULL; 9340 pcbddcf->mat_graph = NULL; 9341 pcisf->is_B_local = NULL; 9342 pcisf->vec1_N = NULL; 9343 pcisf->BtoNmap = NULL; 9344 PetscCall(PCDestroy(&pcf)); 9345 PetscFunctionReturn(PETSC_SUCCESS); 9346 } 9347 9348 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9349 { 9350 PC_IS *pcis = (PC_IS *)pc->data; 9351 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9352 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 9353 Mat S_j; 9354 PetscInt *used_xadj, *used_adjncy; 9355 PetscBool free_used_adj; 9356 9357 PetscFunctionBegin; 9358 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9359 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9360 free_used_adj = PETSC_FALSE; 9361 if (pcbddc->sub_schurs_layers == -1) { 9362 used_xadj = NULL; 9363 used_adjncy = NULL; 9364 } else { 9365 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9366 used_xadj = pcbddc->mat_graph->xadj; 9367 used_adjncy = pcbddc->mat_graph->adjncy; 9368 } else if (pcbddc->computed_rowadj) { 9369 used_xadj = pcbddc->mat_graph->xadj; 9370 used_adjncy = pcbddc->mat_graph->adjncy; 9371 } else { 9372 PetscBool flg_row = PETSC_FALSE; 9373 const PetscInt *xadj, *adjncy; 9374 PetscInt nvtxs; 9375 9376 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9377 if (flg_row) { 9378 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 9379 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 9380 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 9381 free_used_adj = PETSC_TRUE; 9382 } else { 9383 pcbddc->sub_schurs_layers = -1; 9384 used_xadj = NULL; 9385 used_adjncy = NULL; 9386 } 9387 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9388 } 9389 } 9390 9391 /* setup sub_schurs data */ 9392 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 9393 if (!sub_schurs->schur_explicit) { 9394 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9395 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 9396 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 9397 } else { 9398 Mat change = NULL; 9399 Vec scaling = NULL; 9400 IS change_primal = NULL, iP; 9401 PetscInt benign_n; 9402 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9403 PetscBool need_change = PETSC_FALSE; 9404 PetscBool discrete_harmonic = PETSC_FALSE; 9405 9406 if (!pcbddc->use_vertices && reuse_solvers) { 9407 PetscInt n_vertices; 9408 9409 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 9410 reuse_solvers = (PetscBool)!n_vertices; 9411 } 9412 if (!pcbddc->benign_change_explicit) { 9413 benign_n = pcbddc->benign_n; 9414 } else { 9415 benign_n = 0; 9416 } 9417 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9418 We need a global reduction to avoid possible deadlocks. 9419 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9420 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9421 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9422 PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 9423 need_change = (PetscBool)(!need_change); 9424 } 9425 /* If the user defines additional constraints, we import them here */ 9426 if (need_change) { 9427 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 9428 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 9429 } 9430 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9431 9432 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 9433 if (iP) { 9434 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 9435 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 9436 PetscOptionsEnd(); 9437 } 9438 if (discrete_harmonic) { 9439 Mat A; 9440 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 9441 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 9442 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 9443 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 9444 pcbddc->benign_zerodiag_subs, change, change_primal)); 9445 PetscCall(MatDestroy(&A)); 9446 } else { 9447 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 9448 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 9449 } 9450 PetscCall(MatDestroy(&change)); 9451 PetscCall(ISDestroy(&change_primal)); 9452 } 9453 PetscCall(MatDestroy(&S_j)); 9454 9455 /* free adjacency */ 9456 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 9457 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9458 PetscFunctionReturn(PETSC_SUCCESS); 9459 } 9460 9461 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9462 { 9463 PC_IS *pcis = (PC_IS *)pc->data; 9464 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9465 PCBDDCGraph graph; 9466 9467 PetscFunctionBegin; 9468 /* attach interface graph for determining subsets */ 9469 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9470 IS verticesIS, verticescomm; 9471 PetscInt vsize, *idxs; 9472 9473 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9474 PetscCall(ISGetSize(verticesIS, &vsize)); 9475 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 9476 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 9477 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 9478 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9479 PetscCall(PCBDDCGraphCreate(&graph)); 9480 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 9481 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 9482 PetscCall(ISDestroy(&verticescomm)); 9483 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9484 } else { 9485 graph = pcbddc->mat_graph; 9486 } 9487 /* print some info */ 9488 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9489 IS vertices; 9490 PetscInt nv, nedges, nfaces; 9491 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 9492 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9493 PetscCall(ISGetSize(vertices, &nv)); 9494 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9495 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 9496 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 9497 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 9498 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 9499 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9500 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9501 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9502 } 9503 9504 /* sub_schurs init */ 9505 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9506 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 9507 9508 /* free graph struct */ 9509 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 9510 PetscFunctionReturn(PETSC_SUCCESS); 9511 } 9512 9513 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer) 9514 { 9515 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 9516 PetscInt n = pc->pmat->rmap->n, ln, ni, st; 9517 const PetscInt *idxs; 9518 IS gis; 9519 9520 PetscFunctionBegin; 9521 if (!is) PetscFunctionReturn(PETSC_SUCCESS); 9522 PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL)); 9523 PetscCall(MatGetLocalSize(matis->A, NULL, &ln)); 9524 PetscCall(PetscArrayzero(matis->sf_leafdata, ln)); 9525 PetscCall(PetscArrayzero(matis->sf_rootdata, n)); 9526 PetscCall(ISGetLocalSize(is, &ni)); 9527 PetscCall(ISGetIndices(is, &idxs)); 9528 for (PetscInt i = 0; i < ni; i++) { 9529 if (idxs[i] < 0 || idxs[i] >= ln) continue; 9530 matis->sf_leafdata[idxs[i]] = 1; 9531 } 9532 PetscCall(ISRestoreIndices(is, &idxs)); 9533 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9534 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9535 ln = 0; 9536 for (PetscInt i = 0; i < n; i++) { 9537 if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st; 9538 } 9539 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis)); 9540 PetscCall(ISView(gis, viewer)); 9541 PetscCall(ISDestroy(&gis)); 9542 PetscFunctionReturn(PETSC_SUCCESS); 9543 } 9544 9545 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile) 9546 { 9547 PetscInt header[11]; 9548 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9549 PetscViewer viewer; 9550 MPI_Comm comm = PetscObjectComm((PetscObject)pc); 9551 9552 PetscFunctionBegin; 9553 PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer)); 9554 if (load) { 9555 IS is; 9556 Mat A; 9557 9558 PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT)); 9559 PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9560 PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9561 PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9562 PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9563 PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9564 PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9565 PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9566 PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9567 PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9568 PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9569 if (header[0]) { 9570 PetscCall(ISCreate(comm, &is)); 9571 PetscCall(ISLoad(is, viewer)); 9572 PetscCall(PCBDDCSetDirichletBoundaries(pc, is)); 9573 PetscCall(ISDestroy(&is)); 9574 } 9575 if (header[1]) { 9576 PetscCall(ISCreate(comm, &is)); 9577 PetscCall(ISLoad(is, viewer)); 9578 PetscCall(PCBDDCSetNeumannBoundaries(pc, is)); 9579 PetscCall(ISDestroy(&is)); 9580 } 9581 if (header[2]) { 9582 IS *isarray; 9583 9584 PetscCall(PetscMalloc1(header[2], &isarray)); 9585 for (PetscInt i = 0; i < header[2]; i++) { 9586 PetscCall(ISCreate(comm, &isarray[i])); 9587 PetscCall(ISLoad(isarray[i], viewer)); 9588 } 9589 PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray)); 9590 for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i])); 9591 PetscCall(PetscFree(isarray)); 9592 } 9593 if (header[3]) { 9594 PetscCall(ISCreate(comm, &is)); 9595 PetscCall(ISLoad(is, viewer)); 9596 PetscCall(PCBDDCSetPrimalVerticesIS(pc, is)); 9597 PetscCall(ISDestroy(&is)); 9598 } 9599 if (header[4]) { 9600 PetscCall(MatCreate(comm, &A)); 9601 PetscCall(MatSetType(A, MATAIJ)); 9602 PetscCall(MatLoad(A, viewer)); 9603 PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8])); 9604 PetscCall(MatDestroy(&A)); 9605 } 9606 if (header[9]) { 9607 PetscCall(MatCreate(comm, &A)); 9608 PetscCall(MatSetType(A, MATIS)); 9609 PetscCall(MatLoad(A, viewer)); 9610 PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL)); 9611 PetscCall(MatDestroy(&A)); 9612 } 9613 } else { 9614 header[0] = (PetscInt)!!pcbddc->DirichletBoundariesLocal; 9615 header[1] = (PetscInt)!!pcbddc->NeumannBoundariesLocal; 9616 header[2] = pcbddc->n_ISForDofsLocal; 9617 header[3] = (PetscInt)!!pcbddc->user_primal_vertices_local; 9618 header[4] = (PetscInt)!!pcbddc->discretegradient; 9619 header[5] = pcbddc->nedorder; 9620 header[6] = pcbddc->nedfield; 9621 header[7] = (PetscInt)pcbddc->nedglobal; 9622 header[8] = (PetscInt)pcbddc->conforming; 9623 header[9] = (PetscInt)!!pcbddc->divudotp; 9624 header[10] = (PetscInt)pcbddc->divudotp_trans; 9625 if (header[4]) header[3] = 0; 9626 9627 PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT)); 9628 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer)); 9629 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer)); 9630 for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer)); 9631 if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer)); 9632 if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer)); 9633 if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer)); 9634 } 9635 PetscCall(PetscViewerDestroy(&viewer)); 9636 PetscFunctionReturn(PETSC_SUCCESS); 9637 } 9638 9639 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9640 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9641 { 9642 Mat At; 9643 IS rows; 9644 PetscInt rst, ren; 9645 PetscLayout rmap; 9646 9647 PetscFunctionBegin; 9648 rst = ren = 0; 9649 if (ccomm != MPI_COMM_NULL) { 9650 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 9651 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 9652 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 9653 PetscCall(PetscLayoutSetUp(rmap)); 9654 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 9655 } 9656 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 9657 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 9658 PetscCall(ISDestroy(&rows)); 9659 9660 if (ccomm != MPI_COMM_NULL) { 9661 Mat_MPIAIJ *a, *b; 9662 IS from, to; 9663 Vec gvec; 9664 PetscInt lsize; 9665 9666 PetscCall(MatCreate(ccomm, B)); 9667 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 9668 PetscCall(MatSetType(*B, MATAIJ)); 9669 PetscCall(PetscLayoutDestroy(&(*B)->rmap)); 9670 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9671 a = (Mat_MPIAIJ *)At->data; 9672 b = (Mat_MPIAIJ *)(*B)->data; 9673 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 9674 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 9675 PetscCall(PetscObjectReference((PetscObject)a->A)); 9676 PetscCall(PetscObjectReference((PetscObject)a->B)); 9677 b->A = a->A; 9678 b->B = a->B; 9679 9680 b->donotstash = a->donotstash; 9681 b->roworiented = a->roworiented; 9682 b->rowindices = NULL; 9683 b->rowvalues = NULL; 9684 b->getrowactive = PETSC_FALSE; 9685 9686 (*B)->rmap = rmap; 9687 (*B)->factortype = A->factortype; 9688 (*B)->assembled = PETSC_TRUE; 9689 (*B)->insertmode = NOT_SET_VALUES; 9690 (*B)->preallocated = PETSC_TRUE; 9691 9692 if (a->colmap) { 9693 #if defined(PETSC_USE_CTABLE) 9694 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 9695 #else 9696 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9697 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9698 #endif 9699 } else b->colmap = NULL; 9700 if (a->garray) { 9701 PetscInt len; 9702 len = a->B->cmap->n; 9703 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9704 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9705 } else b->garray = NULL; 9706 9707 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9708 b->lvec = a->lvec; 9709 9710 /* cannot use VecScatterCopy */ 9711 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9712 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9713 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9714 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9715 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9716 PetscCall(ISDestroy(&from)); 9717 PetscCall(ISDestroy(&to)); 9718 PetscCall(VecDestroy(&gvec)); 9719 } 9720 PetscCall(MatDestroy(&At)); 9721 PetscFunctionReturn(PETSC_SUCCESS); 9722 } 9723 9724 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */ 9725 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA) 9726 { 9727 PetscBool isaij; 9728 MPI_Comm comm; 9729 9730 PetscFunctionBegin; 9731 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9732 PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, "")); 9733 PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented"); 9734 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij)); 9735 if (isaij) { /* SeqAIJ supports repeated rows */ 9736 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA)); 9737 } else { 9738 Mat A_loc; 9739 Mat_SeqAIJ *da; 9740 PetscSF sf; 9741 PetscInt ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata; 9742 PetscScalar *daa; 9743 const PetscInt *idxs; 9744 const PetscSFNode *iremotes; 9745 PetscSFNode *remotes; 9746 9747 /* SF for incoming rows */ 9748 PetscCall(PetscSFCreate(comm, &sf)); 9749 PetscCall(ISGetLocalSize(rows, &ni)); 9750 PetscCall(ISGetIndices(rows, &idxs)); 9751 PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs)); 9752 PetscCall(ISRestoreIndices(rows, &idxs)); 9753 9754 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc)); 9755 da = (Mat_SeqAIJ *)A_loc->data; 9756 PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata)); 9757 for (PetscInt i = 0; i < m; i++) { 9758 rdata[2 * i + 0] = da->i[i + 1] - da->i[i]; 9759 rdata[2 * i + 1] = da->i[i]; 9760 } 9761 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9762 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9763 PetscCall(PetscMalloc1(ni + 1, &di)); 9764 di[0] = 0; 9765 for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0]; 9766 PetscCall(PetscMalloc1(di[ni], &dj)); 9767 PetscCall(PetscMalloc1(di[ni], &daa)); 9768 PetscCall(PetscMalloc1(di[ni], &remotes)); 9769 9770 PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes)); 9771 9772 /* SF graph for nonzeros */ 9773 c = 0; 9774 for (PetscInt i = 0; i < ni; i++) { 9775 const PetscInt rank = iremotes[i].rank; 9776 const PetscInt rsize = ldata[2 * i]; 9777 for (PetscInt j = 0; j < rsize; j++) { 9778 remotes[c].rank = rank; 9779 remotes[c].index = ldata[2 * i + 1] + j; 9780 c++; 9781 } 9782 } 9783 PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]); 9784 PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER)); 9785 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9786 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9787 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9788 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9789 9790 PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA)); 9791 PetscCall(MatDestroy(&A_loc)); 9792 PetscCall(PetscSFDestroy(&sf)); 9793 PetscCall(PetscFree(di)); 9794 PetscCall(PetscFree(dj)); 9795 PetscCall(PetscFree(daa)); 9796 PetscCall(PetscFree(remotes)); 9797 PetscCall(PetscFree2(ldata, rdata)); 9798 } 9799 PetscFunctionReturn(PETSC_SUCCESS); 9800 } 9801