1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <petsc/private/kernels/blockinvert.h> 5 #include <../src/mat/impls/dense/seq/dense.h> 6 #include <petscdmplex.h> 7 #include <petscblaslapack.h> 8 #include <petsc/private/sfimpl.h> 9 #include <petsc/private/dmpleximpl.h> 10 #include <petscdmda.h> 11 12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 13 14 /* if range is true, it returns B s.t. span{B} = range(A) 15 if range is false, it returns B s.t. range(B) _|_ range(A) */ 16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 17 { 18 PetscScalar *uwork, *data, *U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM, bN, lwork, lierr, di = 1; 21 PetscInt ulw, i, nr, nc, n; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 PetscCall(MatGetSize(A, &nr, &nc)); 28 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 33 PetscCall(PetscMalloc1(ulw, &uwork)); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr, nc); 39 if (!rwork) { 40 PetscCall(PetscMalloc1(n, &sing)); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 PetscCall(PetscMalloc1(nr * nr, &U)); 47 PetscCall(PetscBLASIntCast(nr, &bM)); 48 PetscCall(PetscBLASIntCast(nc, &bN)); 49 PetscCall(PetscBLASIntCast(ulw, &lwork)); 50 PetscCall(MatDenseGetArray(A, &data)); 51 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 54 #else 55 PetscCall(PetscMalloc1(5 * n, &rwork2)); 56 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 57 PetscCall(PetscFree(rwork2)); 58 #endif 59 PetscCall(PetscFPTrapPop()); 60 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 61 PetscCall(MatDenseRestoreArray(A, &data)); 62 for (i = 0; i < n; i++) 63 if (sing[i] < PETSC_SMALL) break; 64 if (!rwork) PetscCall(PetscFree(sing)); 65 if (!work) PetscCall(PetscFree(uwork)); 66 /* create B */ 67 if (!range) { 68 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 69 PetscCall(MatDenseGetArray(*B, &data)); 70 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 71 } else { 72 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 73 PetscCall(MatDenseGetArray(*B, &data)); 74 PetscCall(PetscArraycpy(data, U, i * nr)); 75 } 76 PetscCall(MatDenseRestoreArray(*B, &data)); 77 PetscCall(PetscFree(U)); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* TODO REMOVE */ 82 #if defined(PRINT_GDET) 83 static int inc = 0; 84 static int lev = 0; 85 #endif 86 87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 88 { 89 Mat GE, GEd; 90 PetscInt rsize, csize, esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 PetscCall(ISGetSize(edge, &esize)); 95 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 96 PetscCall(ISGetSize(extrow, &rsize)); 97 PetscCall(ISGetSize(extcol, &csize)); 98 99 /* gradients */ 100 ptr = work + 5 * esize; 101 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 102 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 103 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 104 PetscCall(MatDestroy(&GE)); 105 106 /* constants */ 107 ptr += rsize * csize; 108 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 109 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 110 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 111 PetscCall(MatDestroy(&GE)); 112 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 113 PetscCall(MatDestroy(&GEd)); 114 115 if (corners) { 116 Mat GEc; 117 const PetscScalar *vals; 118 PetscScalar v; 119 120 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 121 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 122 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 123 /* v = PetscAbsScalar(vals[0]); */ 124 v = 1.; 125 cvals[0] = vals[0] / v; 126 cvals[1] = vals[1] / v; 127 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 128 PetscCall(MatScale(*GKins, 1. / v)); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 134 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 135 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 136 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 137 PetscCall(MatView(GEc, viewer)); 138 PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK")); 139 PetscCall(MatView(*GKins, viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 141 PetscCall(MatView(GEd, viewer)); 142 PetscCall(PetscViewerDestroy(&viewer)); 143 } 144 #endif 145 PetscCall(MatDestroy(&GEd)); 146 PetscCall(MatDestroy(&GEc)); 147 } 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *); 152 153 PetscErrorCode PCBDDCNedelecSupport(PC pc) 154 { 155 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 156 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 157 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 160 MPI_Comm comm; 161 IS lned, primals, allprimals, nedfieldlocal, elements_corners = NULL; 162 IS *eedges, *extrows, *extcols, *alleedges; 163 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 164 PetscScalar *vals, *work; 165 PetscReal *rwork; 166 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 167 PetscInt ne, nv, Lv, order, n, field; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 187 /* Command line customization */ 188 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 189 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 190 /* print debug info and adaptive order TODO: to be removed */ 191 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 192 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 193 PetscOptionsEnd(); 194 195 /* Return if there are no edges in the decomposition */ 196 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 197 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 198 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 199 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 200 lrc[0] = PETSC_FALSE; 201 for (i = 0; i < n; i++) { 202 if (PetscRealPart(vals[i]) > 2.) { 203 lrc[0] = PETSC_TRUE; 204 break; 205 } 206 } 207 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 208 PetscCall(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm)); 209 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 210 211 /* Get Nedelec field */ 212 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 213 if (pcbddc->n_ISForDofsLocal && field >= 0) { 214 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 215 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 216 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 217 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 218 ne = n; 219 nedfieldlocal = NULL; 220 global = PETSC_TRUE; 221 } else if (field == PETSC_DECIDE) { 222 PetscInt rst, ren, *idx; 223 224 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 225 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 226 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 227 for (i = rst; i < ren; i++) { 228 PetscInt nc; 229 230 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 231 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 232 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 233 } 234 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 235 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 236 PetscCall(PetscMalloc1(n, &idx)); 237 for (i = 0, ne = 0; i < n; i++) 238 if (matis->sf_leafdata[i]) idx[ne++] = i; 239 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 240 } else { 241 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 242 } 243 244 /* Sanity checks */ 245 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 246 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 247 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 248 249 /* Just set primal dofs and return */ 250 if (setprimal) { 251 IS enedfieldlocal; 252 PetscInt *eidxs; 253 254 PetscCall(PetscMalloc1(ne, &eidxs)); 255 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 256 if (nedfieldlocal) { 257 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 258 for (i = 0, cum = 0; i < ne; i++) { 259 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 260 } 261 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 262 } else { 263 for (i = 0, cum = 0; i < ne; i++) { 264 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 265 } 266 } 267 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 268 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 269 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 270 PetscCall(PetscFree(eidxs)); 271 PetscCall(ISDestroy(&nedfieldlocal)); 272 PetscCall(ISDestroy(&enedfieldlocal)); 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 /* Compute some l2g maps */ 277 if (nedfieldlocal) { 278 IS is; 279 280 /* need to map from the local Nedelec field to local numbering */ 281 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 282 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 283 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 284 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 285 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 286 if (global) { 287 PetscCall(PetscObjectReference((PetscObject)al2g)); 288 el2g = al2g; 289 } else { 290 IS gis; 291 292 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 293 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 294 PetscCall(ISDestroy(&gis)); 295 } 296 PetscCall(ISDestroy(&is)); 297 } else { 298 /* one ref for the destruction of al2g, one for el2g */ 299 PetscCall(PetscObjectReference((PetscObject)al2g)); 300 PetscCall(PetscObjectReference((PetscObject)al2g)); 301 el2g = al2g; 302 fl2g = NULL; 303 } 304 305 /* Start communication to drop connections for interior edges (for cc analysis only) */ 306 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 307 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 308 if (nedfieldlocal) { 309 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 310 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 311 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 312 } else { 313 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 314 } 315 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 316 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 317 318 /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting 319 Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */ 320 if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners)); 321 322 /* drop connections with interior edges to avoid unneeded communications and memory movements */ 323 PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view")); 324 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 325 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 326 if (global) { 327 PetscInt rst; 328 329 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 330 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 331 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 332 } 333 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 334 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 335 } else { 336 PetscInt *tbz; 337 338 PetscCall(PetscMalloc1(ne, &tbz)); 339 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 340 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 342 for (i = 0, cum = 0; i < ne; i++) 343 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 344 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 345 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 346 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 347 PetscCall(PetscFree(tbz)); 348 } 349 350 /* Extract subdomain relevant rows of G */ 351 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 352 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 353 PetscCall(MatAIJExtractRows(G, lned, &lGall)); 354 /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */ 355 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 356 PetscCall(ISDestroy(&lned)); 357 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 358 PetscCall(MatDestroy(&lGall)); 359 PetscCall(MatISGetLocalMat(lGis, &lG)); 360 if (matis->allow_repeated) { /* multi-element support */ 361 Mat *lGn, B; 362 IS *is_rows, *tcols, tmap, nmap; 363 PetscInt subnv; 364 const PetscInt *subvidxs; 365 ISLocalToGlobalMapping mapn; 366 367 PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn)); 368 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows)); 369 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols)); 370 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 371 if (fl2g) { 372 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i])); 373 } else { 374 PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i])); 375 is_rows[i] = pcbddc->local_subs[i]; 376 } 377 PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)])); 378 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn)); 379 PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs)); 381 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i])); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs)); 383 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 384 } 385 386 /* Create new MATIS with repeated vertices */ 387 PetscCall(MatCreate(comm, &B)); 388 PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N)); 389 PetscCall(MatSetType(B, MATIS)); 390 PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE)); 391 PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap)); 392 PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap)); 393 PetscCall(ISDestroy(&tmap)); 394 PetscCall(ISGetLocalSize(nmap, &subnv)); 395 PetscCall(ISGetIndices(nmap, &subvidxs)); 396 PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap)); 397 PetscCall(ISRestoreIndices(nmap, &subvidxs)); 398 PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn)); 399 PetscCall(ISDestroy(&tmap)); 400 PetscCall(ISDestroy(&nmap)); 401 PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn)); 402 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 403 PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG)); 404 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 405 PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)])); 406 PetscCall(ISDestroy(&is_rows[i])); 407 PetscCall(ISDestroy(&tcols[i])); 408 } 409 PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG)); 410 PetscCall(PetscFree(lGn)); 411 PetscCall(PetscFree(is_rows)); 412 PetscCall(PetscFree(tcols)); 413 PetscCall(MatISSetLocalMat(B, lG)); 414 PetscCall(MatDestroy(&lG)); 415 416 PetscCall(MatDestroy(&lGis)); 417 lGis = B; 418 419 lGis->assembled = PETSC_TRUE; 420 } 421 PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view")); 422 423 /* SF for nodal dofs communications */ 424 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 425 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 426 PetscCall(PetscObjectReference((PetscObject)vl2g)); 427 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 428 PetscCall(PetscSFCreate(comm, &sfv)); 429 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 430 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 431 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 432 433 if (elements_corners) { 434 IS tmp; 435 Vec global, local; 436 Mat_IS *tGis = (Mat_IS *)lGis->data; 437 438 PetscCall(MatCreateVecs(lGis, &global, NULL)); 439 PetscCall(MatCreateVecs(tGis->A, &local, NULL)); 440 PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp)); 441 PetscCall(VecDestroy(&global)); 442 PetscCall(VecDestroy(&local)); 443 elements_corners = tmp; 444 } 445 446 /* Destroy temporary G */ 447 PetscCall(MatISGetLocalMat(lGis, &lG)); 448 PetscCall(PetscObjectReference((PetscObject)lG)); 449 PetscCall(MatDestroy(&G)); 450 PetscCall(MatDestroy(&lGis)); 451 452 if (print) { 453 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 454 PetscCall(MatView(lG, NULL)); 455 } 456 457 /* Save lG for values insertion in change of basis */ 458 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 459 460 /* Analyze the edge-nodes connections (duplicate lG) */ 461 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 462 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 463 PetscCall(PetscBTCreate(nv, &btv)); 464 PetscCall(PetscBTCreate(ne, &bte)); 465 PetscCall(PetscBTCreate(ne, &btb)); 466 PetscCall(PetscBTCreate(ne, &btbd)); 467 /* need to import the boundary specification to ensure the 468 proper detection of coarse edges' endpoints */ 469 if (pcbddc->DirichletBoundariesLocal) { 470 IS is; 471 472 if (fl2g) { 473 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 474 } else { 475 is = pcbddc->DirichletBoundariesLocal; 476 } 477 PetscCall(ISGetLocalSize(is, &cum)); 478 PetscCall(ISGetIndices(is, &idxs)); 479 for (i = 0; i < cum; i++) { 480 if (idxs[i] >= 0 && idxs[i] < ne) { 481 PetscCall(PetscBTSet(btb, idxs[i])); 482 PetscCall(PetscBTSet(btbd, idxs[i])); 483 } 484 } 485 PetscCall(ISRestoreIndices(is, &idxs)); 486 if (fl2g) PetscCall(ISDestroy(&is)); 487 } 488 if (pcbddc->NeumannBoundariesLocal) { 489 IS is; 490 491 if (fl2g) { 492 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 493 } else { 494 is = pcbddc->NeumannBoundariesLocal; 495 } 496 PetscCall(ISGetLocalSize(is, &cum)); 497 PetscCall(ISGetIndices(is, &idxs)); 498 for (i = 0; i < cum; i++) { 499 if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i])); 500 } 501 PetscCall(ISRestoreIndices(is, &idxs)); 502 if (fl2g) PetscCall(ISDestroy(&is)); 503 } 504 505 /* Count neighs per dof */ 506 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL)); 507 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL)); 508 509 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 510 for proper detection of coarse edges' endpoints */ 511 PetscCall(PetscBTCreate(ne, &btee)); 512 for (i = 0; i < ne; i++) { 513 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 514 } 515 PetscCall(PetscMalloc1(ne, &marks)); 516 if (!conforming) { 517 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 518 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 519 } 520 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 521 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 522 cum = 0; 523 for (i = 0; i < ne; i++) { 524 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 525 if (!PetscBTLookup(btee, i)) { 526 marks[cum++] = i; 527 continue; 528 } 529 /* set badly connected edge dofs as primal */ 530 if (!conforming) { 531 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 532 marks[cum++] = i; 533 PetscCall(PetscBTSet(bte, i)); 534 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 535 } else { 536 /* every edge dofs should be connected through a certain number of nodal dofs 537 to other edge dofs belonging to coarse edges 538 - at most 2 endpoints 539 - order-1 interior nodal dofs 540 - no undefined nodal dofs (nconn < order) 541 */ 542 PetscInt ends = 0, ints = 0, undef = 0; 543 for (j = ii[i]; j < ii[i + 1]; j++) { 544 PetscInt v = jj[j], k; 545 PetscInt nconn = iit[v + 1] - iit[v]; 546 for (k = iit[v]; k < iit[v + 1]; k++) 547 if (!PetscBTLookup(btee, jjt[k])) nconn--; 548 if (nconn > order) ends++; 549 else if (nconn == order) ints++; 550 else undef++; 551 } 552 if (undef || ends > 2 || ints != order - 1) { 553 marks[cum++] = i; 554 PetscCall(PetscBTSet(bte, i)); 555 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 556 } 557 } 558 } 559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 560 if (!order && ii[i + 1] != ii[i]) { 561 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 562 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 563 } 564 } 565 PetscCall(PetscBTDestroy(&btee)); 566 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 567 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 568 if (!conforming) { 569 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 570 PetscCall(MatDestroy(&lGt)); 571 } 572 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 573 574 /* identify splitpoints and corner candidates */ 575 PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots)); 576 PetscCall(PetscBTCreate(nv, &btvcand)); 577 if (elements_corners) { 578 PetscCall(ISGetLocalSize(elements_corners, &cum)); 579 PetscCall(ISGetIndices(elements_corners, &idxs)); 580 for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i])); 581 PetscCall(ISRestoreIndices(elements_corners, &idxs)); 582 } 583 584 if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */ 585 PetscSF emlsf, vmlsf; 586 PetscInt *eleaves, *vleaves, *meleaves, *mvleaves; 587 PetscInt cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl; 588 589 PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs)); 590 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 591 PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs)); 592 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 593 594 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf)); 595 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf)); 596 597 PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL)); 598 for (i = 0, j = 0; i < ne; i++) j += ecount[i]; 599 PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne); 600 PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j); 601 602 PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL)); 603 for (i = 0, j = 0; i < nv; i++) j += vcount[i]; 604 PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv); 605 PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j); 606 607 PetscCall(PetscMalloc1(ne, &eleaves)); 608 PetscCall(PetscMalloc1(nv, &vleaves)); 609 for (i = 0; i < ne; i++) eleaves[i] = PETSC_MAX_INT; 610 for (i = 0; i < nv; i++) vleaves[i] = PETSC_MAX_INT; 611 PetscCall(PetscMalloc1(emnl, &meleaves)); 612 PetscCall(PetscMalloc1(vmnl, &mvleaves)); 613 614 PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm)); 615 PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 616 for (i = 0; i < n_subs; i++) { 617 const PetscInt *idxs; 618 const PetscInt subid = cum_subs + i; 619 PetscInt ns; 620 621 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns)); 622 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 623 for (j = 0; j < ns; j++) { 624 const PetscInt e = idxs[j]; 625 626 eleaves[e] = subid; 627 for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid; 628 } 629 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 630 } 631 PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 632 PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 633 PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 634 PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 635 PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 636 PetscCall(PetscFree(eleaves)); 637 PetscCall(PetscFree(vleaves)); 638 639 PetscCall(PetscMalloc1(ne + 1, &eneighs)); 640 eneighs[0] = meleaves; 641 for (i = 0; i < ne; i++) { 642 PetscCall(PetscSortInt(ecount[i], eneighs[i])); 643 eneighs[i + 1] = eneighs[i] + ecount[i]; 644 } 645 PetscCall(PetscMalloc1(nv + 1, &vneighs)); 646 vneighs[0] = mvleaves; 647 for (i = 0; i < nv; i++) { 648 PetscCall(PetscSortInt(vcount[i], vneighs[i])); 649 vneighs[i + 1] = vneighs[i] + vcount[i]; 650 } 651 } else { 652 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs)); 653 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs)); 654 } 655 656 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 657 if (print) { 658 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 659 PetscCall(MatView(lGe, NULL)); 660 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 661 PetscCall(MatView(lGt, NULL)); 662 } 663 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 664 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 665 for (i = 0; i < nv; i++) { 666 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 667 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 668 if (!order) { /* variable order */ 669 PetscReal vorder = 0.; 670 671 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 672 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 673 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 674 ord = 1; 675 } 676 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 677 const PetscInt e = jj[j]; 678 679 if (PetscBTLookup(btbd, e)) { 680 bdir = PETSC_TRUE; 681 break; 682 } 683 if (vc != ecount[e]) { 684 sneighs = PETSC_FALSE; 685 } else { 686 const PetscInt *vn = vneighs[i], *en = eneighs[e]; 687 688 for (PetscInt k = 0; k < vc; k++) { 689 if (vn[k] != en[k]) { 690 sneighs = PETSC_FALSE; 691 break; 692 } 693 } 694 } 695 } 696 if (elements_corners) test = 0; 697 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 698 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 699 PetscCall(PetscBTSet(btv, i)); 700 } else if (test == ord) { 701 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 702 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 703 PetscCall(PetscBTSet(btv, i)); 704 } else if (!elements_corners) { 705 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 706 PetscCall(PetscBTSet(btvcand, i)); 707 } 708 } 709 } 710 PetscCall(PetscBTDestroy(&btbd)); 711 712 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 713 if (order != 1) { 714 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 715 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 716 for (i = 0; i < nv; i++) { 717 if (PetscBTLookup(btvcand, i)) { 718 PetscBool found = PETSC_FALSE; 719 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 720 PetscInt k, e = jj[j]; 721 if (PetscBTLookup(bte, e)) continue; 722 for (k = iit[e]; k < iit[e + 1]; k++) { 723 PetscInt v = jjt[k]; 724 if (v != i && PetscBTLookup(btvcand, v)) { 725 found = PETSC_TRUE; 726 break; 727 } 728 } 729 } 730 if (!found) { 731 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 732 PetscCall(PetscBTClear(btvcand, i)); 733 } else { 734 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 735 } 736 } 737 } 738 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 739 } 740 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 741 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 742 PetscCall(MatDestroy(&lGe)); 743 744 /* Get the local G^T explicitly */ 745 PetscCall(MatDestroy(&lGt)); 746 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 747 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 748 749 /* Mark shared nodal dofs */ 750 PetscCall(PetscBTCreate(nv, &btvi)); 751 for (i = 0; i < nv; i++) { 752 if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i)); 753 } 754 755 if (matis->allow_repeated) { 756 PetscCall(PetscFree(eneighs[0])); 757 PetscCall(PetscFree(vneighs[0])); 758 PetscCall(PetscFree(eneighs)); 759 PetscCall(PetscFree(vneighs)); 760 } 761 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 762 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 763 764 /* communicate corners and splitpoints */ 765 PetscCall(PetscMalloc1(nv, &vmarks)); 766 PetscCall(PetscArrayzero(sfvleaves, nv)); 767 PetscCall(PetscArrayzero(sfvroots, Lv)); 768 for (i = 0; i < nv; i++) 769 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 770 771 if (print) { 772 IS tbz; 773 774 cum = 0; 775 for (i = 0; i < nv; i++) 776 if (sfvleaves[i]) vmarks[cum++] = i; 777 778 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 779 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 780 PetscCall(ISView(tbz, NULL)); 781 PetscCall(ISDestroy(&tbz)); 782 } 783 784 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 785 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 786 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 787 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 788 789 /* Zero rows of lGt corresponding to identified corners 790 and interior nodal dofs */ 791 cum = 0; 792 for (i = 0; i < nv; i++) { 793 if (sfvleaves[i]) { 794 vmarks[cum++] = i; 795 PetscCall(PetscBTSet(btv, i)); 796 } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 797 } 798 PetscCall(PetscBTDestroy(&btvi)); 799 if (print) { 800 IS tbz; 801 802 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 803 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 804 PetscCall(ISView(tbz, NULL)); 805 PetscCall(ISDestroy(&tbz)); 806 } 807 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 808 PetscCall(PetscFree(vmarks)); 809 PetscCall(PetscSFDestroy(&sfv)); 810 PetscCall(PetscFree2(sfvleaves, sfvroots)); 811 812 /* Recompute G */ 813 PetscCall(MatDestroy(&lG)); 814 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 815 if (print) { 816 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 817 PetscCall(MatView(lG, NULL)); 818 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 819 PetscCall(MatView(lGt, NULL)); 820 } 821 822 /* Get primal dofs (if any) */ 823 cum = 0; 824 for (i = 0; i < ne; i++) { 825 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 826 } 827 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 828 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 829 if (print) { 830 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 831 PetscCall(ISView(primals, NULL)); 832 } 833 PetscCall(PetscBTDestroy(&bte)); 834 /* TODO: what if the user passed in some of them ? */ 835 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 836 PetscCall(ISDestroy(&primals)); 837 838 /* Compute edge connectivity */ 839 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 840 841 /* Symbolic conn = lG*lGt */ 842 if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */ 843 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 844 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 845 PetscCall(MatProductSetAlgorithm(conn, "default")); 846 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 847 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 848 PetscCall(MatProductSetFromOptions(conn)); 849 PetscCall(MatProductSymbolic(conn)); 850 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 851 if (fl2g) { 852 PetscBT btf; 853 PetscInt *iia, *jja, *iiu, *jju; 854 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 855 856 /* create CSR for all local dofs */ 857 PetscCall(PetscMalloc1(n + 1, &iia)); 858 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 859 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 860 iiu = pcbddc->mat_graph->xadj; 861 jju = pcbddc->mat_graph->adjncy; 862 } else if (pcbddc->use_local_adj) { 863 rest = PETSC_TRUE; 864 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 865 } else { 866 free = PETSC_TRUE; 867 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 868 iiu[0] = 0; 869 for (i = 0; i < n; i++) { 870 iiu[i + 1] = i + 1; 871 jju[i] = -1; 872 } 873 } 874 875 /* import sizes of CSR */ 876 iia[0] = 0; 877 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 878 879 /* overwrite entries corresponding to the Nedelec field */ 880 PetscCall(PetscBTCreate(n, &btf)); 881 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 882 for (i = 0; i < ne; i++) { 883 PetscCall(PetscBTSet(btf, idxs[i])); 884 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 885 } 886 887 /* iia in CSR */ 888 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 889 890 /* jja in CSR */ 891 PetscCall(PetscMalloc1(iia[n], &jja)); 892 for (i = 0; i < n; i++) 893 if (!PetscBTLookup(btf, i)) 894 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 895 896 /* map edge dofs connectivity */ 897 if (jj) { 898 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 899 for (i = 0; i < ne; i++) { 900 PetscInt e = idxs[i]; 901 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 902 } 903 } 904 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 905 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES)); 906 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 907 if (free) PetscCall(PetscFree2(iiu, jju)); 908 PetscCall(PetscBTDestroy(&btf)); 909 } else { 910 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES)); 911 } 912 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 913 PetscCall(MatDestroy(&conn)); 914 } 915 916 /* Analyze interface for edge dofs */ 917 PetscCall(PCBDDCAnalyzeInterface(pc)); 918 pcbddc->mat_graph->twodim = PETSC_FALSE; 919 920 /* Get coarse edges in the edge space */ 921 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 922 923 if (fl2g) { 924 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 925 PetscCall(PetscMalloc1(nee, &eedges)); 926 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 927 } else { 928 eedges = alleedges; 929 primals = allprimals; 930 } 931 932 /* Mark fine edge dofs with their coarse edge id */ 933 PetscCall(PetscArrayzero(marks, ne)); 934 PetscCall(ISGetLocalSize(primals, &cum)); 935 PetscCall(ISGetIndices(primals, &idxs)); 936 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 937 PetscCall(ISRestoreIndices(primals, &idxs)); 938 if (print) { 939 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 940 PetscCall(ISView(primals, NULL)); 941 } 942 943 maxsize = 0; 944 for (i = 0; i < nee; i++) { 945 PetscInt size, mark = i + 1; 946 947 PetscCall(ISGetLocalSize(eedges[i], &size)); 948 PetscCall(ISGetIndices(eedges[i], &idxs)); 949 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 950 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 951 maxsize = PetscMax(maxsize, size); 952 } 953 954 /* Find coarse edge endpoints */ 955 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 956 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 957 for (i = 0; i < nee; i++) { 958 PetscInt mark = i + 1, size; 959 960 PetscCall(ISGetLocalSize(eedges[i], &size)); 961 if (!size && nedfieldlocal) continue; 962 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 963 PetscCall(ISGetIndices(eedges[i], &idxs)); 964 if (print) { 965 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 966 PetscCall(ISView(eedges[i], NULL)); 967 } 968 for (j = 0; j < size; j++) { 969 PetscInt k, ee = idxs[j]; 970 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 971 for (k = ii[ee]; k < ii[ee + 1]; k++) { 972 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 973 if (PetscBTLookup(btv, jj[k])) { 974 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 975 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 976 PetscInt k2; 977 PetscBool corner = PETSC_FALSE; 978 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 979 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 980 /* it's a corner if either is connected with an edge dof belonging to a different cc or 981 if the edge dof lie on the natural part of the boundary */ 982 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 983 corner = PETSC_TRUE; 984 break; 985 } 986 } 987 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 988 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 989 PetscCall(PetscBTSet(btv, jj[k])); 990 } else { 991 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 992 } 993 } 994 } 995 } 996 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 997 } 998 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 999 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1000 PetscCall(PetscBTDestroy(&btb)); 1001 1002 /* Reset marked primal dofs */ 1003 PetscCall(ISGetLocalSize(primals, &cum)); 1004 PetscCall(ISGetIndices(primals, &idxs)); 1005 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 1006 PetscCall(ISRestoreIndices(primals, &idxs)); 1007 1008 /* Now use the initial lG */ 1009 PetscCall(MatDestroy(&lG)); 1010 PetscCall(MatDestroy(&lGt)); 1011 lG = lGinit; 1012 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 1013 1014 /* Compute extended cols indices */ 1015 PetscCall(PetscBTCreate(nv, &btvc)); 1016 PetscCall(PetscBTCreate(nee, &bter)); 1017 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1018 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 1019 i *= maxsize; 1020 PetscCall(PetscCalloc1(nee, &extcols)); 1021 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 1022 eerr = PETSC_FALSE; 1023 for (i = 0; i < nee; i++) { 1024 PetscInt size, found = 0; 1025 1026 cum = 0; 1027 PetscCall(ISGetLocalSize(eedges[i], &size)); 1028 if (!size && nedfieldlocal) continue; 1029 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1030 PetscCall(ISGetIndices(eedges[i], &idxs)); 1031 PetscCall(PetscBTMemzero(nv, btvc)); 1032 for (j = 0; j < size; j++) { 1033 PetscInt k, ee = idxs[j]; 1034 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1035 PetscInt vv = jj[k]; 1036 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 1037 else if (!PetscBTLookupSet(btvc, vv)) found++; 1038 } 1039 } 1040 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1041 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1042 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1043 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1044 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1045 /* it may happen that endpoints are not defined at this point 1046 if it is the case, mark this edge for a second pass */ 1047 if (cum != size - 1 || found != 2) { 1048 PetscCall(PetscBTSet(bter, i)); 1049 if (print) { 1050 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 1051 PetscCall(ISView(eedges[i], NULL)); 1052 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 1053 PetscCall(ISView(extcols[i], NULL)); 1054 } 1055 eerr = PETSC_TRUE; 1056 } 1057 } 1058 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 1059 PetscCall(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm)); 1060 if (done) { 1061 PetscInt *newprimals; 1062 1063 PetscCall(PetscMalloc1(ne, &newprimals)); 1064 PetscCall(ISGetLocalSize(primals, &cum)); 1065 PetscCall(ISGetIndices(primals, &idxs)); 1066 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 1067 PetscCall(ISRestoreIndices(primals, &idxs)); 1068 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1069 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 1070 for (i = 0; i < nee; i++) { 1071 PetscBool has_candidates = PETSC_FALSE; 1072 if (PetscBTLookup(bter, i)) { 1073 PetscInt size, mark = i + 1; 1074 1075 PetscCall(ISGetLocalSize(eedges[i], &size)); 1076 PetscCall(ISGetIndices(eedges[i], &idxs)); 1077 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1078 for (j = 0; j < size; j++) { 1079 PetscInt k, ee = idxs[j]; 1080 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 1081 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1082 /* set all candidates located on the edge as corners */ 1083 if (PetscBTLookup(btvcand, jj[k])) { 1084 PetscInt k2, vv = jj[k]; 1085 has_candidates = PETSC_TRUE; 1086 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 1087 PetscCall(PetscBTSet(btv, vv)); 1088 /* set all edge dofs connected to candidate as primals */ 1089 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 1090 if (marks[jjt[k2]] == mark) { 1091 PetscInt k3, ee2 = jjt[k2]; 1092 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 1093 newprimals[cum++] = ee2; 1094 /* finally set the new corners */ 1095 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 1096 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 1097 PetscCall(PetscBTSet(btv, jj[k3])); 1098 } 1099 } 1100 } 1101 } else { 1102 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 1103 } 1104 } 1105 } 1106 if (!has_candidates) { /* circular edge */ 1107 PetscInt k, ee = idxs[0], *tmarks; 1108 1109 PetscCall(PetscCalloc1(ne, &tmarks)); 1110 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 1111 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1112 PetscInt k2; 1113 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 1114 PetscCall(PetscBTSet(btv, jj[k])); 1115 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 1116 } 1117 for (j = 0; j < size; j++) { 1118 if (tmarks[idxs[j]] > 1) { 1119 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 1120 newprimals[cum++] = idxs[j]; 1121 } 1122 } 1123 PetscCall(PetscFree(tmarks)); 1124 } 1125 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1126 } 1127 PetscCall(ISDestroy(&extcols[i])); 1128 } 1129 PetscCall(PetscFree(extcols)); 1130 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1131 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 1132 if (fl2g) { 1133 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 1134 PetscCall(ISDestroy(&primals)); 1135 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1136 PetscCall(PetscFree(eedges)); 1137 } 1138 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1139 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 1140 PetscCall(PetscFree(newprimals)); 1141 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 1142 PetscCall(ISDestroy(&primals)); 1143 PetscCall(PCBDDCAnalyzeInterface(pc)); 1144 pcbddc->mat_graph->twodim = PETSC_FALSE; 1145 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1146 if (fl2g) { 1147 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 1148 PetscCall(PetscMalloc1(nee, &eedges)); 1149 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 1150 } else { 1151 eedges = alleedges; 1152 primals = allprimals; 1153 } 1154 PetscCall(PetscCalloc1(nee, &extcols)); 1155 1156 /* Mark again */ 1157 PetscCall(PetscArrayzero(marks, ne)); 1158 for (i = 0; i < nee; i++) { 1159 PetscInt size, mark = i + 1; 1160 1161 PetscCall(ISGetLocalSize(eedges[i], &size)); 1162 PetscCall(ISGetIndices(eedges[i], &idxs)); 1163 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1164 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1165 } 1166 if (print) { 1167 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1168 PetscCall(ISView(primals, NULL)); 1169 } 1170 1171 /* Recompute extended cols */ 1172 eerr = PETSC_FALSE; 1173 for (i = 0; i < nee; i++) { 1174 PetscInt size; 1175 1176 cum = 0; 1177 PetscCall(ISGetLocalSize(eedges[i], &size)); 1178 if (!size && nedfieldlocal) continue; 1179 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1180 PetscCall(ISGetIndices(eedges[i], &idxs)); 1181 for (j = 0; j < size; j++) { 1182 PetscInt k, ee = idxs[j]; 1183 for (k = ii[ee]; k < ii[ee + 1]; k++) 1184 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1185 } 1186 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1187 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1188 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1189 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1190 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1191 if (cum != size - 1) { 1192 if (print) { 1193 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1194 PetscCall(ISView(eedges[i], NULL)); 1195 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1196 PetscCall(ISView(extcols[i], NULL)); 1197 } 1198 eerr = PETSC_TRUE; 1199 } 1200 } 1201 } 1202 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1203 PetscCall(PetscFree2(extrow, gidxs)); 1204 PetscCall(PetscBTDestroy(&bter)); 1205 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1206 /* an error should not occur at this point */ 1207 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1208 1209 /* Check the number of endpoints */ 1210 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1211 PetscCall(PetscMalloc1(2 * nee, &corners)); 1212 PetscCall(PetscMalloc1(nee, &cedges)); 1213 for (i = 0; i < nee; i++) { 1214 PetscInt size, found = 0, gc[2]; 1215 1216 /* init with defaults */ 1217 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1218 PetscCall(ISGetLocalSize(eedges[i], &size)); 1219 if (!size && nedfieldlocal) continue; 1220 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1221 PetscCall(ISGetIndices(eedges[i], &idxs)); 1222 PetscCall(PetscBTMemzero(nv, btvc)); 1223 for (j = 0; j < size; j++) { 1224 PetscInt k, ee = idxs[j]; 1225 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1226 PetscInt vv = jj[k]; 1227 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1228 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i); 1229 corners[i * 2 + found++] = vv; 1230 } 1231 } 1232 } 1233 if (found != 2) { 1234 PetscInt e; 1235 if (fl2g) { 1236 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1237 } else { 1238 e = idxs[0]; 1239 } 1240 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1241 } 1242 1243 /* get primal dof index on this coarse edge */ 1244 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1245 if (gc[0] > gc[1]) { 1246 PetscInt swap = corners[2 * i]; 1247 corners[2 * i] = corners[2 * i + 1]; 1248 corners[2 * i + 1] = swap; 1249 } 1250 cedges[i] = idxs[size - 1]; 1251 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1252 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1253 } 1254 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1255 PetscCall(PetscBTDestroy(&btvc)); 1256 1257 if (PetscDefined(USE_DEBUG)) { 1258 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1259 not interfere with neighbouring coarse edges */ 1260 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1261 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1262 for (i = 0; i < nv; i++) { 1263 PetscInt emax = 0, eemax = 0; 1264 1265 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1266 PetscCall(PetscArrayzero(emarks, nee + 1)); 1267 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1268 for (j = 1; j < nee + 1; j++) { 1269 if (emax < emarks[j]) { 1270 emax = emarks[j]; 1271 eemax = j; 1272 } 1273 } 1274 /* not relevant for edges */ 1275 if (!eemax) continue; 1276 1277 for (j = ii[i]; j < ii[i + 1]; j++) { 1278 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1279 } 1280 } 1281 PetscCall(PetscFree(emarks)); 1282 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1283 } 1284 1285 /* Compute extended rows indices for edge blocks of the change of basis */ 1286 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1287 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1288 extmem *= maxsize; 1289 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1290 PetscCall(PetscMalloc1(nee, &extrows)); 1291 PetscCall(PetscCalloc1(nee, &extrowcum)); 1292 for (i = 0; i < nv; i++) { 1293 PetscInt mark = 0, size, start; 1294 1295 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1296 for (j = ii[i]; j < ii[i + 1]; j++) 1297 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1298 1299 /* not relevant */ 1300 if (!mark) continue; 1301 1302 /* import extended row */ 1303 mark--; 1304 start = mark * extmem + extrowcum[mark]; 1305 size = ii[i + 1] - ii[i]; 1306 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1307 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1308 extrowcum[mark] += size; 1309 } 1310 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1311 PetscCall(MatDestroy(&lGt)); 1312 PetscCall(PetscFree(marks)); 1313 1314 /* Compress extrows */ 1315 cum = 0; 1316 for (i = 0; i < nee; i++) { 1317 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1318 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1319 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1320 cum = PetscMax(cum, size); 1321 } 1322 PetscCall(PetscFree(extrowcum)); 1323 PetscCall(PetscBTDestroy(&btv)); 1324 PetscCall(PetscBTDestroy(&btvcand)); 1325 1326 /* Workspace for lapack inner calls and VecSetValues */ 1327 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1328 1329 /* Create change of basis matrix (preallocation can be improved) */ 1330 PetscCall(MatCreate(comm, &T)); 1331 PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap)); 1332 PetscCall(MatSetType(T, MATAIJ)); 1333 PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL)); 1334 PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL)); 1335 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1336 PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1337 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1338 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1339 1340 /* Defaults to identity */ 1341 for (i = pc->mat->rmap->rstart; i < pc->mat->rmap->rend; i++) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES)); 1342 1343 /* Create discrete gradient for the coarser level if needed */ 1344 PetscCall(MatDestroy(&pcbddc->nedcG)); 1345 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1346 if (pcbddc->current_level < pcbddc->max_levels) { 1347 ISLocalToGlobalMapping cel2g, cvl2g; 1348 IS wis, gwis; 1349 PetscInt cnv, cne; 1350 1351 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1352 if (fl2g) { 1353 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1354 } else { 1355 PetscCall(PetscObjectReference((PetscObject)wis)); 1356 pcbddc->nedclocal = wis; 1357 } 1358 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1359 PetscCall(ISDestroy(&wis)); 1360 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1361 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1362 PetscCall(ISDestroy(&wis)); 1363 PetscCall(ISDestroy(&gwis)); 1364 1365 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1366 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1367 PetscCall(ISDestroy(&wis)); 1368 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1369 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1370 PetscCall(ISDestroy(&wis)); 1371 PetscCall(ISDestroy(&gwis)); 1372 1373 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1374 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1375 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1376 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1377 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1378 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1379 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1380 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1381 } 1382 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1383 1384 #if defined(PRINT_GDET) 1385 inc = 0; 1386 lev = pcbddc->current_level; 1387 #endif 1388 1389 /* Insert values in the change of basis matrix */ 1390 for (i = 0; i < nee; i++) { 1391 Mat Gins = NULL, GKins = NULL; 1392 IS cornersis = NULL; 1393 PetscScalar cvals[2]; 1394 1395 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1396 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1397 if (Gins && GKins) { 1398 const PetscScalar *data; 1399 const PetscInt *rows, *cols; 1400 PetscInt nrh, nch, nrc, ncc; 1401 1402 PetscCall(ISGetIndices(eedges[i], &cols)); 1403 /* H1 */ 1404 PetscCall(ISGetIndices(extrows[i], &rows)); 1405 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1406 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1407 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1408 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1409 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1410 /* complement */ 1411 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1412 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1413 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1414 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1415 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1416 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1417 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1418 1419 /* coarse discrete gradient */ 1420 if (pcbddc->nedcG) { 1421 PetscInt cols[2]; 1422 1423 cols[0] = 2 * i; 1424 cols[1] = 2 * i + 1; 1425 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1426 } 1427 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1428 } 1429 PetscCall(ISDestroy(&extrows[i])); 1430 PetscCall(ISDestroy(&extcols[i])); 1431 PetscCall(ISDestroy(&cornersis)); 1432 PetscCall(MatDestroy(&Gins)); 1433 PetscCall(MatDestroy(&GKins)); 1434 } 1435 1436 /* for FDM element-by-element: first dof on the edge only constraint. Why? */ 1437 if (elements_corners && pcbddc->mat_graph->multi_element) { 1438 ISLocalToGlobalMapping map; 1439 MatNullSpace nnsp; 1440 Vec quad_vec; 1441 1442 PetscCall(MatCreateVecs(pc->pmat, &quad_vec, NULL)); 1443 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, 1, &quad_vec, &nnsp)); 1444 PetscCall(VecLockReadPop(quad_vec)); 1445 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 1446 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1447 for (i = 0; i < nee; i++) { 1448 const PetscInt *idxs; 1449 PetscScalar one = 1.0; 1450 1451 PetscCall(ISGetLocalSize(alleedges[i], &cum)); 1452 if (!cum) continue; 1453 PetscCall(ISGetIndices(alleedges[i], &idxs)); 1454 PetscCall(VecSetValuesLocal(quad_vec, 1, idxs, &one, INSERT_VALUES)); 1455 PetscCall(ISRestoreIndices(alleedges[i], &idxs)); 1456 } 1457 PetscCall(VecLockReadPush(quad_vec)); 1458 PetscCall(VecDestroy(&quad_vec)); 1459 PetscCall(MatSetNearNullSpace(pc->pmat, nnsp)); 1460 PetscCall(MatNullSpaceDestroy(&nnsp)); 1461 } 1462 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1463 1464 /* Start assembling */ 1465 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1466 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1467 1468 /* Free */ 1469 if (fl2g) { 1470 PetscCall(ISDestroy(&primals)); 1471 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1472 PetscCall(PetscFree(eedges)); 1473 } 1474 1475 /* hack mat_graph with primal dofs on the coarse edges */ 1476 { 1477 PCBDDCGraph graph = pcbddc->mat_graph; 1478 PetscInt *oqueue = graph->queue; 1479 PetscInt *ocptr = graph->cptr; 1480 PetscInt ncc, *idxs; 1481 1482 /* find first primal edge */ 1483 if (pcbddc->nedclocal) { 1484 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1485 } else { 1486 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1487 idxs = cedges; 1488 } 1489 cum = 0; 1490 while (cum < nee && cedges[cum] < 0) cum++; 1491 1492 /* adapt connected components */ 1493 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1494 graph->cptr[0] = 0; 1495 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1496 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1497 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1498 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1499 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1500 ncc++; 1501 lc--; 1502 cum++; 1503 while (cum < nee && cedges[cum] < 0) cum++; 1504 } 1505 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1506 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1507 ncc++; 1508 } 1509 graph->ncc = ncc; 1510 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1511 PetscCall(PetscFree2(ocptr, oqueue)); 1512 } 1513 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1514 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1515 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1516 1517 PetscCall(ISDestroy(&nedfieldlocal)); 1518 PetscCall(PetscFree(extrow)); 1519 PetscCall(PetscFree2(work, rwork)); 1520 PetscCall(PetscFree(corners)); 1521 PetscCall(PetscFree(cedges)); 1522 PetscCall(PetscFree(extrows)); 1523 PetscCall(PetscFree(extcols)); 1524 PetscCall(MatDestroy(&lG)); 1525 1526 /* Complete assembling */ 1527 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1528 PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view")); 1529 if (pcbddc->nedcG) { 1530 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1531 PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view")); 1532 } 1533 1534 PetscCall(ISDestroy(&elements_corners)); 1535 1536 /* set change of basis */ 1537 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE)); 1538 PetscCall(MatDestroy(&T)); 1539 PetscFunctionReturn(PETSC_SUCCESS); 1540 } 1541 1542 /* the near-null space of BDDC carries information on quadrature weights, 1543 and these can be collinear -> so cheat with MatNullSpaceCreate 1544 and create a suitable set of basis vectors first */ 1545 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1546 { 1547 PetscInt i; 1548 1549 PetscFunctionBegin; 1550 for (i = 0; i < nvecs; i++) { 1551 PetscInt first, last; 1552 1553 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1554 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1555 if (i >= first && i < last) { 1556 PetscScalar *data; 1557 PetscCall(VecGetArray(quad_vecs[i], &data)); 1558 if (!has_const) { 1559 data[i - first] = 1.; 1560 } else { 1561 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1562 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1563 } 1564 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1565 } 1566 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1567 } 1568 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1569 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1570 PetscInt first, last; 1571 PetscCall(VecLockReadPop(quad_vecs[i])); 1572 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1573 if (i >= first && i < last) { 1574 PetscScalar *data; 1575 PetscCall(VecGetArray(quad_vecs[i], &data)); 1576 if (!has_const) { 1577 data[i - first] = 0.; 1578 } else { 1579 data[2 * i - first] = 0.; 1580 data[2 * i - first + 1] = 0.; 1581 } 1582 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1583 } 1584 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1585 PetscCall(VecLockReadPush(quad_vecs[i])); 1586 } 1587 PetscFunctionReturn(PETSC_SUCCESS); 1588 } 1589 1590 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1591 { 1592 Mat loc_divudotp; 1593 Vec p, v, quad_vec; 1594 ISLocalToGlobalMapping map; 1595 PetscScalar *array; 1596 1597 PetscFunctionBegin; 1598 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1599 if (!transpose) { 1600 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1601 } else { 1602 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1603 } 1604 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp)); 1605 PetscCall(VecLockReadPop(quad_vec)); 1606 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1607 1608 /* compute local quad vec */ 1609 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1610 if (!transpose) { 1611 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1612 } else { 1613 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1614 } 1615 /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */ 1616 PetscCall(VecSet(p, 1.)); 1617 if (!transpose) { 1618 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1619 } else { 1620 PetscCall(MatMult(loc_divudotp, p, v)); 1621 } 1622 PetscCall(VecDestroy(&p)); 1623 if (vl2l) { 1624 Mat lA; 1625 VecScatter sc; 1626 Vec vins; 1627 1628 PetscCall(MatISGetLocalMat(A, &lA)); 1629 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1630 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1631 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1632 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1633 PetscCall(VecScatterDestroy(&sc)); 1634 PetscCall(VecDestroy(&v)); 1635 v = vins; 1636 } 1637 1638 /* mask summation of interface values */ 1639 PetscInt n, *mmask, *mask, *idxs, nmr, nr; 1640 const PetscInt *degree; 1641 PetscSF msf; 1642 1643 PetscCall(VecGetLocalSize(v, &n)); 1644 PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL)); 1645 PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf)); 1646 PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL)); 1647 PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs)); 1648 PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, °ree)); 1649 PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, °ree)); 1650 for (PetscInt i = 0, c = 0; i < nr; i++) { 1651 mmask[c] = 1; 1652 c += degree[i]; 1653 } 1654 PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1655 PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1656 PetscCall(VecGetArray(v, &array)); 1657 for (PetscInt i = 0; i < n; i++) { 1658 array[i] *= mask[i]; 1659 idxs[i] = i; 1660 } 1661 PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES)); 1662 PetscCall(VecRestoreArray(v, &array)); 1663 PetscCall(PetscFree3(mmask, mask, idxs)); 1664 PetscCall(VecDestroy(&v)); 1665 PetscCall(VecAssemblyBegin(quad_vec)); 1666 PetscCall(VecAssemblyEnd(quad_vec)); 1667 PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view")); 1668 PetscCall(VecLockReadPush(quad_vec)); 1669 PetscCall(VecDestroy(&quad_vec)); 1670 PetscFunctionReturn(PETSC_SUCCESS); 1671 } 1672 1673 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1674 { 1675 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1676 1677 PetscFunctionBegin; 1678 if (primalv) { 1679 if (pcbddc->user_primal_vertices_local) { 1680 IS list[2], newp; 1681 1682 list[0] = primalv; 1683 list[1] = pcbddc->user_primal_vertices_local; 1684 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1685 PetscCall(ISSortRemoveDups(newp)); 1686 PetscCall(ISDestroy(&list[1])); 1687 pcbddc->user_primal_vertices_local = newp; 1688 } else { 1689 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1690 } 1691 } 1692 PetscFunctionReturn(PETSC_SUCCESS); 1693 } 1694 1695 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1696 { 1697 PetscInt f, *comp = (PetscInt *)ctx; 1698 1699 PetscFunctionBegin; 1700 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1701 PetscFunctionReturn(PETSC_SUCCESS); 1702 } 1703 1704 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1705 { 1706 Vec local, global; 1707 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1708 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1709 PetscBool monolithic = PETSC_FALSE; 1710 1711 PetscFunctionBegin; 1712 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1713 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1714 PetscOptionsEnd(); 1715 /* need to convert from global to local topology information and remove references to information in global ordering */ 1716 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1717 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1718 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1719 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1720 if (monolithic) { /* just get block size to properly compute vertices */ 1721 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1722 goto boundary; 1723 } 1724 1725 if (pcbddc->user_provided_isfordofs) { 1726 if (pcbddc->n_ISForDofs) { 1727 PetscInt i; 1728 1729 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1730 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1731 PetscInt bs; 1732 1733 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1734 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1735 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1736 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1737 } 1738 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1739 pcbddc->n_ISForDofs = 0; 1740 PetscCall(PetscFree(pcbddc->ISForDofs)); 1741 } 1742 } else { 1743 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1744 DM dm; 1745 1746 PetscCall(MatGetDM(pc->pmat, &dm)); 1747 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1748 if (dm) { 1749 IS *fields; 1750 PetscInt nf, i; 1751 1752 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1753 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1754 for (i = 0; i < nf; i++) { 1755 PetscInt bs; 1756 1757 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1758 PetscCall(ISGetBlockSize(fields[i], &bs)); 1759 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1760 PetscCall(ISDestroy(&fields[i])); 1761 } 1762 PetscCall(PetscFree(fields)); 1763 pcbddc->n_ISForDofsLocal = nf; 1764 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1765 PetscContainer c; 1766 1767 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1768 if (c) { 1769 MatISLocalFields lf; 1770 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1771 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1772 } else { /* fallback, create the default fields if bs > 1 */ 1773 PetscInt i, n = matis->A->rmap->n; 1774 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1775 if (i > 1) { 1776 pcbddc->n_ISForDofsLocal = i; 1777 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1778 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1779 } 1780 } 1781 } 1782 } else { 1783 PetscInt i; 1784 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1785 } 1786 } 1787 1788 boundary: 1789 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1790 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1791 } else if (pcbddc->DirichletBoundariesLocal) { 1792 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1793 } 1794 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1795 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1796 } else if (pcbddc->NeumannBoundariesLocal) { 1797 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1798 } 1799 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1800 PetscCall(VecDestroy(&global)); 1801 PetscCall(VecDestroy(&local)); 1802 /* detect local disconnected subdomains if requested or needed */ 1803 if (pcbddc->detect_disconnected || matis->allow_repeated) { 1804 IS primalv = NULL; 1805 PetscInt nel; 1806 PetscBool filter = pcbddc->detect_disconnected_filter; 1807 1808 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1809 PetscCall(PetscFree(pcbddc->local_subs)); 1810 PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL)); 1811 if (matis->allow_repeated && nel) { 1812 const PetscInt *elsizes; 1813 1814 pcbddc->n_local_subs = nel; 1815 PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes)); 1816 PetscCall(PetscMalloc1(nel, &pcbddc->local_subs)); 1817 for (PetscInt i = 0, c = 0; i < nel; i++) { 1818 PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i])); 1819 c += elsizes[i]; 1820 } 1821 } else { 1822 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1823 } 1824 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1825 PetscCall(ISDestroy(&primalv)); 1826 } 1827 /* early stage corner detection */ 1828 { 1829 DM dm; 1830 1831 PetscCall(MatGetDM(pc->pmat, &dm)); 1832 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1833 if (dm) { 1834 PetscBool isda; 1835 1836 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1837 if (isda) { 1838 ISLocalToGlobalMapping l2l; 1839 IS corners; 1840 Mat lA; 1841 PetscBool gl, lo; 1842 1843 { 1844 Vec cvec; 1845 const PetscScalar *coords; 1846 PetscInt dof, n, cdim; 1847 PetscBool memc = PETSC_TRUE; 1848 1849 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1850 PetscCall(DMGetCoordinates(dm, &cvec)); 1851 PetscCall(VecGetLocalSize(cvec, &n)); 1852 PetscCall(VecGetBlockSize(cvec, &cdim)); 1853 n /= cdim; 1854 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1855 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1856 PetscCall(VecGetArrayRead(cvec, &coords)); 1857 #if defined(PETSC_USE_COMPLEX) 1858 memc = PETSC_FALSE; 1859 #endif 1860 if (dof != 1) memc = PETSC_FALSE; 1861 if (memc) { 1862 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1863 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1864 PetscReal *bcoords = pcbddc->mat_graph->coords; 1865 PetscInt i, b, d; 1866 1867 for (i = 0; i < n; i++) { 1868 for (b = 0; b < dof; b++) { 1869 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1870 } 1871 } 1872 } 1873 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1874 pcbddc->mat_graph->cdim = cdim; 1875 pcbddc->mat_graph->cnloc = dof * n; 1876 pcbddc->mat_graph->cloc = PETSC_FALSE; 1877 } 1878 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 1879 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 1880 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 1881 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 1882 lo = (PetscBool)(l2l && corners); 1883 PetscCall(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 1884 if (gl) { /* From PETSc's DMDA */ 1885 const PetscInt *idx; 1886 PetscInt dof, bs, *idxout, n; 1887 1888 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1889 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 1890 PetscCall(ISGetLocalSize(corners, &n)); 1891 PetscCall(ISGetIndices(corners, &idx)); 1892 if (bs == dof) { 1893 PetscCall(PetscMalloc1(n, &idxout)); 1894 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 1895 } else { /* the original DMDA local-to-local map have been modified */ 1896 PetscInt i, d; 1897 1898 PetscCall(PetscMalloc1(dof * n, &idxout)); 1899 for (i = 0; i < n; i++) 1900 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 1901 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 1902 1903 bs = 1; 1904 n *= dof; 1905 } 1906 PetscCall(ISRestoreIndices(corners, &idx)); 1907 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1908 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 1909 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 1910 PetscCall(ISDestroy(&corners)); 1911 pcbddc->corner_selected = PETSC_TRUE; 1912 pcbddc->corner_selection = PETSC_TRUE; 1913 } 1914 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1915 } 1916 } 1917 } 1918 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1919 DM dm; 1920 1921 PetscCall(MatGetDM(pc->pmat, &dm)); 1922 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1923 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1924 Vec vcoords; 1925 PetscSection section; 1926 PetscReal *coords; 1927 PetscInt d, cdim, nl, nf, **ctxs; 1928 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1929 /* debug coordinates */ 1930 PetscViewer viewer; 1931 PetscBool flg; 1932 PetscViewerFormat format; 1933 const char *prefix; 1934 1935 PetscCall(DMGetCoordinateDim(dm, &cdim)); 1936 PetscCall(DMGetLocalSection(dm, §ion)); 1937 PetscCall(PetscSectionGetNumFields(section, &nf)); 1938 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 1939 PetscCall(VecGetLocalSize(vcoords, &nl)); 1940 PetscCall(PetscMalloc1(nl * cdim, &coords)); 1941 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 1942 PetscCall(PetscMalloc1(nf, &ctxs[0])); 1943 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 1944 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 1945 1946 /* debug coordinates */ 1947 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1948 PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 1949 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 1950 for (d = 0; d < cdim; d++) { 1951 PetscInt i; 1952 const PetscScalar *v; 1953 char name[16]; 1954 1955 for (i = 0; i < nf; i++) ctxs[i][0] = d; 1956 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d)); 1957 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 1958 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 1959 if (flg) PetscCall(VecView(vcoords, viewer)); 1960 PetscCall(VecGetArrayRead(vcoords, &v)); 1961 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 1962 PetscCall(VecRestoreArrayRead(vcoords, &v)); 1963 } 1964 PetscCall(VecDestroy(&vcoords)); 1965 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 1966 PetscCall(PetscFree(coords)); 1967 PetscCall(PetscFree(ctxs[0])); 1968 PetscCall(PetscFree2(funcs, ctxs)); 1969 if (flg) { 1970 PetscCall(PetscViewerPopFormat(viewer)); 1971 PetscCall(PetscViewerDestroy(&viewer)); 1972 } 1973 } 1974 } 1975 PetscFunctionReturn(PETSC_SUCCESS); 1976 } 1977 1978 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1979 { 1980 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1981 IS nis; 1982 const PetscInt *idxs; 1983 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 1984 1985 PetscFunctionBegin; 1986 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 1987 if (mop == MPI_LAND) { 1988 /* init rootdata with true */ 1989 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 1990 } else { 1991 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 1992 } 1993 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 1994 PetscCall(ISGetLocalSize(*is, &nd)); 1995 PetscCall(ISGetIndices(*is, &idxs)); 1996 for (i = 0; i < nd; i++) 1997 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 1998 PetscCall(ISRestoreIndices(*is, &idxs)); 1999 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2000 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2001 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2002 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2003 if (mop == MPI_LAND) { 2004 PetscCall(PetscMalloc1(nd, &nidxs)); 2005 } else { 2006 PetscCall(PetscMalloc1(n, &nidxs)); 2007 } 2008 for (i = 0, nnd = 0; i < n; i++) 2009 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 2010 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 2011 PetscCall(ISDestroy(is)); 2012 *is = nis; 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 2017 { 2018 PC_IS *pcis = (PC_IS *)pc->data; 2019 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2020 2021 PetscFunctionBegin; 2022 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 2023 if (pcbddc->ChangeOfBasisMatrix) { 2024 Vec swap; 2025 2026 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 2027 swap = pcbddc->work_change; 2028 pcbddc->work_change = r; 2029 r = swap; 2030 } 2031 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2032 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2033 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2034 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 2035 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2036 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 2037 PetscCall(VecSet(z, 0.)); 2038 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2039 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2040 if (pcbddc->ChangeOfBasisMatrix) { 2041 pcbddc->work_change = r; 2042 PetscCall(VecCopy(z, pcbddc->work_change)); 2043 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 2044 } 2045 PetscFunctionReturn(PETSC_SUCCESS); 2046 } 2047 2048 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 2049 { 2050 PCBDDCBenignMatMult_ctx ctx; 2051 PetscBool apply_right, apply_left, reset_x; 2052 2053 PetscFunctionBegin; 2054 PetscCall(MatShellGetContext(A, &ctx)); 2055 if (transpose) { 2056 apply_right = ctx->apply_left; 2057 apply_left = ctx->apply_right; 2058 } else { 2059 apply_right = ctx->apply_right; 2060 apply_left = ctx->apply_left; 2061 } 2062 reset_x = PETSC_FALSE; 2063 if (apply_right) { 2064 const PetscScalar *ax; 2065 PetscInt nl, i; 2066 2067 PetscCall(VecGetLocalSize(x, &nl)); 2068 PetscCall(VecGetArrayRead(x, &ax)); 2069 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 2070 PetscCall(VecRestoreArrayRead(x, &ax)); 2071 for (i = 0; i < ctx->benign_n; i++) { 2072 PetscScalar sum, val; 2073 const PetscInt *idxs; 2074 PetscInt nz, j; 2075 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2076 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2077 sum = 0.; 2078 if (ctx->apply_p0) { 2079 val = ctx->work[idxs[nz - 1]]; 2080 for (j = 0; j < nz - 1; j++) { 2081 sum += ctx->work[idxs[j]]; 2082 ctx->work[idxs[j]] += val; 2083 } 2084 } else { 2085 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 2086 } 2087 ctx->work[idxs[nz - 1]] -= sum; 2088 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2089 } 2090 PetscCall(VecPlaceArray(x, ctx->work)); 2091 reset_x = PETSC_TRUE; 2092 } 2093 if (transpose) { 2094 PetscCall(MatMultTranspose(ctx->A, x, y)); 2095 } else { 2096 PetscCall(MatMult(ctx->A, x, y)); 2097 } 2098 if (reset_x) PetscCall(VecResetArray(x)); 2099 if (apply_left) { 2100 PetscScalar *ay; 2101 PetscInt i; 2102 2103 PetscCall(VecGetArray(y, &ay)); 2104 for (i = 0; i < ctx->benign_n; i++) { 2105 PetscScalar sum, val; 2106 const PetscInt *idxs; 2107 PetscInt nz, j; 2108 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2109 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2110 val = -ay[idxs[nz - 1]]; 2111 if (ctx->apply_p0) { 2112 sum = 0.; 2113 for (j = 0; j < nz - 1; j++) { 2114 sum += ay[idxs[j]]; 2115 ay[idxs[j]] += val; 2116 } 2117 ay[idxs[nz - 1]] += sum; 2118 } else { 2119 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 2120 ay[idxs[nz - 1]] = 0.; 2121 } 2122 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2123 } 2124 PetscCall(VecRestoreArray(y, &ay)); 2125 } 2126 PetscFunctionReturn(PETSC_SUCCESS); 2127 } 2128 2129 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2130 { 2131 PetscFunctionBegin; 2132 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 2133 PetscFunctionReturn(PETSC_SUCCESS); 2134 } 2135 2136 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2137 { 2138 PetscFunctionBegin; 2139 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 2140 PetscFunctionReturn(PETSC_SUCCESS); 2141 } 2142 2143 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2144 { 2145 PC_IS *pcis = (PC_IS *)pc->data; 2146 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2147 PCBDDCBenignMatMult_ctx ctx; 2148 2149 PetscFunctionBegin; 2150 if (!restore) { 2151 Mat A_IB, A_BI; 2152 PetscScalar *work; 2153 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2154 2155 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 2156 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 2157 PetscCall(PetscMalloc1(pcis->n, &work)); 2158 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 2159 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 2160 PetscCall(MatSetType(A_IB, MATSHELL)); 2161 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private)); 2162 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2163 PetscCall(PetscNew(&ctx)); 2164 PetscCall(MatShellSetContext(A_IB, ctx)); 2165 ctx->apply_left = PETSC_TRUE; 2166 ctx->apply_right = PETSC_FALSE; 2167 ctx->apply_p0 = PETSC_FALSE; 2168 ctx->benign_n = pcbddc->benign_n; 2169 if (reuse) { 2170 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2171 ctx->free = PETSC_FALSE; 2172 } else { /* TODO: could be optimized for successive solves */ 2173 ISLocalToGlobalMapping N_to_D; 2174 PetscInt i; 2175 2176 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 2177 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 2178 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 2179 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2180 ctx->free = PETSC_TRUE; 2181 } 2182 ctx->A = pcis->A_IB; 2183 ctx->work = work; 2184 PetscCall(MatSetUp(A_IB)); 2185 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2186 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2187 pcis->A_IB = A_IB; 2188 2189 /* A_BI as A_IB^T */ 2190 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2191 pcbddc->benign_original_mat = pcis->A_BI; 2192 pcis->A_BI = A_BI; 2193 } else { 2194 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2195 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2196 PetscCall(MatDestroy(&pcis->A_IB)); 2197 pcis->A_IB = ctx->A; 2198 ctx->A = NULL; 2199 PetscCall(MatDestroy(&pcis->A_BI)); 2200 pcis->A_BI = pcbddc->benign_original_mat; 2201 pcbddc->benign_original_mat = NULL; 2202 if (ctx->free) { 2203 PetscInt i; 2204 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2205 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2206 } 2207 PetscCall(PetscFree(ctx->work)); 2208 PetscCall(PetscFree(ctx)); 2209 } 2210 PetscFunctionReturn(PETSC_SUCCESS); 2211 } 2212 2213 /* used just in bddc debug mode */ 2214 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2215 { 2216 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2217 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2218 Mat An; 2219 2220 PetscFunctionBegin; 2221 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2222 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2223 if (is1) { 2224 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2225 PetscCall(MatDestroy(&An)); 2226 } else { 2227 *B = An; 2228 } 2229 PetscFunctionReturn(PETSC_SUCCESS); 2230 } 2231 2232 /* TODO: add reuse flag */ 2233 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2234 { 2235 Mat Bt; 2236 PetscScalar *a, *bdata; 2237 const PetscInt *ii, *ij; 2238 PetscInt m, n, i, nnz, *bii, *bij; 2239 PetscBool flg_row; 2240 2241 PetscFunctionBegin; 2242 PetscCall(MatGetSize(A, &n, &m)); 2243 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2244 PetscCall(MatSeqAIJGetArray(A, &a)); 2245 nnz = n; 2246 for (i = 0; i < ii[n]; i++) { 2247 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2248 } 2249 PetscCall(PetscMalloc1(n + 1, &bii)); 2250 PetscCall(PetscMalloc1(nnz, &bij)); 2251 PetscCall(PetscMalloc1(nnz, &bdata)); 2252 nnz = 0; 2253 bii[0] = 0; 2254 for (i = 0; i < n; i++) { 2255 PetscInt j; 2256 for (j = ii[i]; j < ii[i + 1]; j++) { 2257 PetscScalar entry = a[j]; 2258 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2259 bij[nnz] = ij[j]; 2260 bdata[nnz] = entry; 2261 nnz++; 2262 } 2263 } 2264 bii[i + 1] = nnz; 2265 } 2266 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2267 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2268 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2269 { 2270 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data; 2271 b->free_a = PETSC_TRUE; 2272 b->free_ij = PETSC_TRUE; 2273 } 2274 if (*B == A) PetscCall(MatDestroy(&A)); 2275 *B = Bt; 2276 PetscFunctionReturn(PETSC_SUCCESS); 2277 } 2278 2279 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2280 { 2281 Mat B = NULL; 2282 DM dm; 2283 IS is_dummy, *cc_n; 2284 ISLocalToGlobalMapping l2gmap_dummy; 2285 PCBDDCGraph graph; 2286 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2287 PetscInt i, n; 2288 PetscInt *xadj, *adjncy; 2289 PetscBool isplex = PETSC_FALSE; 2290 2291 PetscFunctionBegin; 2292 if (ncc) *ncc = 0; 2293 if (cc) *cc = NULL; 2294 if (primalv) *primalv = NULL; 2295 PetscCall(PCBDDCGraphCreate(&graph)); 2296 PetscCall(MatGetDM(pc->pmat, &dm)); 2297 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2298 if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, "")); 2299 if (filter) isplex = PETSC_FALSE; 2300 2301 if (isplex) { /* this code has been modified from plexpartition.c */ 2302 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2303 PetscInt *adj = NULL; 2304 IS cellNumbering; 2305 const PetscInt *cellNum; 2306 PetscBool useCone, useClosure; 2307 PetscSection section; 2308 PetscSegBuffer adjBuffer; 2309 PetscSF sfPoint; 2310 2311 PetscCall(DMConvert(dm, DMPLEX, &dm)); 2312 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2313 PetscCall(DMGetPointSF(dm, &sfPoint)); 2314 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2315 /* Build adjacency graph via a section/segbuffer */ 2316 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2317 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2318 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2319 /* Always use FVM adjacency to create partitioner graph */ 2320 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2321 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2322 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2323 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2324 for (n = 0, p = pStart; p < pEnd; p++) { 2325 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2326 if (nroots > 0) { 2327 if (cellNum[p] < 0) continue; 2328 } 2329 adjSize = PETSC_DETERMINE; 2330 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2331 for (a = 0; a < adjSize; ++a) { 2332 const PetscInt point = adj[a]; 2333 if (pStart <= point && point < pEnd) { 2334 PetscInt *PETSC_RESTRICT pBuf; 2335 PetscCall(PetscSectionAddDof(section, p, 1)); 2336 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2337 *pBuf = point; 2338 } 2339 } 2340 n++; 2341 } 2342 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2343 /* Derive CSR graph from section/segbuffer */ 2344 PetscCall(PetscSectionSetUp(section)); 2345 PetscCall(PetscSectionGetStorageSize(section, &size)); 2346 PetscCall(PetscMalloc1(n + 1, &xadj)); 2347 for (idx = 0, p = pStart; p < pEnd; p++) { 2348 if (nroots > 0) { 2349 if (cellNum[p] < 0) continue; 2350 } 2351 PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++])); 2352 } 2353 xadj[n] = size; 2354 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2355 /* Clean up */ 2356 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2357 PetscCall(PetscSectionDestroy(§ion)); 2358 PetscCall(PetscFree(adj)); 2359 graph->xadj = xadj; 2360 graph->adjncy = adjncy; 2361 } else { 2362 Mat A; 2363 PetscBool isseqaij, flg_row; 2364 2365 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2366 if (!A->rmap->N || !A->cmap->N) { 2367 PetscCall(PCBDDCGraphDestroy(&graph)); 2368 PetscFunctionReturn(PETSC_SUCCESS); 2369 } 2370 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2371 if (!isseqaij && filter) { 2372 PetscBool isseqdense; 2373 2374 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2375 if (!isseqdense) { 2376 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2377 } else { /* TODO: rectangular case and LDA */ 2378 PetscScalar *array; 2379 PetscReal chop = 1.e-6; 2380 2381 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2382 PetscCall(MatDenseGetArray(B, &array)); 2383 PetscCall(MatGetSize(B, &n, NULL)); 2384 for (i = 0; i < n; i++) { 2385 PetscInt j; 2386 for (j = i + 1; j < n; j++) { 2387 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2388 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2389 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2390 } 2391 } 2392 PetscCall(MatDenseRestoreArray(B, &array)); 2393 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2394 } 2395 } else { 2396 PetscCall(PetscObjectReference((PetscObject)A)); 2397 B = A; 2398 } 2399 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2400 2401 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2402 if (filter) { 2403 PetscScalar *data; 2404 PetscInt j, cum; 2405 2406 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2407 PetscCall(MatSeqAIJGetArray(B, &data)); 2408 cum = 0; 2409 for (i = 0; i < n; i++) { 2410 PetscInt t; 2411 2412 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2413 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2414 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2415 } 2416 t = xadj_filtered[i]; 2417 xadj_filtered[i] = cum; 2418 cum += t; 2419 } 2420 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2421 graph->xadj = xadj_filtered; 2422 graph->adjncy = adjncy_filtered; 2423 } else { 2424 graph->xadj = xadj; 2425 graph->adjncy = adjncy; 2426 } 2427 } 2428 /* compute local connected components using PCBDDCGraph */ 2429 graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */ 2430 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2431 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2432 PetscCall(ISDestroy(&is_dummy)); 2433 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT)); 2434 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2435 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2436 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2437 2438 /* partial clean up */ 2439 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2440 if (B) { 2441 PetscBool flg_row; 2442 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2443 PetscCall(MatDestroy(&B)); 2444 } 2445 if (isplex) { 2446 PetscCall(PetscFree(xadj)); 2447 PetscCall(PetscFree(adjncy)); 2448 } 2449 2450 /* get back data */ 2451 if (isplex) { 2452 if (ncc) *ncc = graph->ncc; 2453 if (cc || primalv) { 2454 Mat A; 2455 PetscBT btv, btvt, btvc; 2456 PetscSection subSection; 2457 PetscInt *ids, cum, cump, *cids, *pids; 2458 PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd; 2459 2460 PetscCall(DMGetDimension(dm, &dim)); 2461 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2462 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 2463 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd)); 2464 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 2465 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 2466 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2467 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2468 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2469 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2470 PetscCall(PetscBTCreate(pEnd - pStart, &btvc)); 2471 2472 /* First see if we find corners for the subdomains, i.e. a vertex 2473 shared by at least dim subdomain boundary faces. This does not 2474 cover all the possible cases with simplices but it is enough 2475 for tensor cells */ 2476 if (vStart != fStart && dim <= 3) { 2477 for (PetscInt c = cStart; c < cEnd; c++) { 2478 PetscInt nf, cnt = 0, mcnt = dim, *cfaces; 2479 const PetscInt *faces; 2480 2481 PetscCall(DMPlexGetConeSize(dm, c, &nf)); 2482 PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces)); 2483 PetscCall(DMPlexGetCone(dm, c, &faces)); 2484 for (PetscInt f = 0; f < nf; f++) { 2485 PetscInt nc, ff; 2486 2487 PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc)); 2488 PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL)); 2489 if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f]; 2490 } 2491 if (cnt >= mcnt) { 2492 PetscInt size, *closure = NULL; 2493 2494 PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2495 for (PetscInt k = 0; k < 2 * size; k += 2) { 2496 PetscInt v = closure[k]; 2497 if (v >= vStart && v < vEnd) { 2498 PetscInt vsize, *vclosure = NULL; 2499 2500 cnt = 0; 2501 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2502 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) { 2503 PetscInt f = vclosure[vk]; 2504 if (f >= fStart && f < fEnd) { 2505 PetscInt nc, ff; 2506 PetscBool valid = PETSC_FALSE; 2507 2508 for (PetscInt fk = 0; fk < nf; fk++) 2509 if (f == cfaces[fk]) valid = PETSC_TRUE; 2510 if (!valid) continue; 2511 PetscCall(DMPlexGetSupportSize(dm, f, &nc)); 2512 PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL)); 2513 if (nc == 1 && f == ff) cnt++; 2514 } 2515 } 2516 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart)); 2517 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2518 } 2519 } 2520 PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2521 } 2522 PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces)); 2523 } 2524 } 2525 2526 cids[0] = 0; 2527 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2528 PetscInt j; 2529 2530 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2531 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2532 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2533 2534 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2535 for (k = 0; k < 2 * size; k += 2) { 2536 PetscInt s, pp, p = closure[k], off, dof, cdof; 2537 2538 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2539 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2540 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2541 for (s = 0; s < dof - cdof; s++) { 2542 if (PetscBTLookupSet(btvt, off + s)) continue; 2543 if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2544 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2545 else pids[cump++] = off + s; /* cross-vertex */ 2546 } 2547 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2548 if (pp != p) { 2549 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2550 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2551 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2552 for (s = 0; s < dof - cdof; s++) { 2553 if (PetscBTLookupSet(btvt, off + s)) continue; 2554 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2555 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2556 else pids[cump++] = off + s; /* cross-vertex */ 2557 } 2558 } 2559 } 2560 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2561 } 2562 cids[i + 1] = cum; 2563 /* mark dofs as already assigned */ 2564 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2565 } 2566 if (cc) { 2567 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2568 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2569 *cc = cc_n; 2570 } 2571 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2572 PetscCall(PetscFree3(ids, cids, pids)); 2573 PetscCall(PetscBTDestroy(&btv)); 2574 PetscCall(PetscBTDestroy(&btvt)); 2575 PetscCall(PetscBTDestroy(&btvc)); 2576 PetscCall(DMDestroy(&dm)); 2577 } 2578 } else { 2579 if (ncc) *ncc = graph->ncc; 2580 if (cc) { 2581 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2582 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2583 *cc = cc_n; 2584 } 2585 } 2586 /* clean up graph */ 2587 graph->xadj = NULL; 2588 graph->adjncy = NULL; 2589 PetscCall(PCBDDCGraphDestroy(&graph)); 2590 PetscFunctionReturn(PETSC_SUCCESS); 2591 } 2592 2593 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2594 { 2595 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2596 PC_IS *pcis = (PC_IS *)pc->data; 2597 IS dirIS = NULL; 2598 PetscInt i; 2599 2600 PetscFunctionBegin; 2601 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2602 if (zerodiag) { 2603 Mat A; 2604 Vec vec3_N; 2605 PetscScalar *vals; 2606 const PetscInt *idxs; 2607 PetscInt nz, *count; 2608 2609 /* p0 */ 2610 PetscCall(VecSet(pcis->vec1_N, 0.)); 2611 PetscCall(PetscMalloc1(pcis->n, &vals)); 2612 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2613 PetscCall(ISGetIndices(zerodiag, &idxs)); 2614 for (i = 0; i < nz; i++) vals[i] = 1.; 2615 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2616 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2617 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2618 /* v_I */ 2619 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2620 for (i = 0; i < nz; i++) vals[i] = 0.; 2621 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2622 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2623 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2624 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2625 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2626 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2627 if (dirIS) { 2628 PetscInt n; 2629 2630 PetscCall(ISGetLocalSize(dirIS, &n)); 2631 PetscCall(ISGetIndices(dirIS, &idxs)); 2632 for (i = 0; i < n; i++) vals[i] = 0.; 2633 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2634 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2635 } 2636 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2637 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2638 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2639 PetscCall(VecSet(vec3_N, 0.)); 2640 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2641 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2642 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2643 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2644 PetscCall(PetscFree(vals)); 2645 PetscCall(VecDestroy(&vec3_N)); 2646 2647 /* there should not be any pressure dofs lying on the interface */ 2648 PetscCall(PetscCalloc1(pcis->n, &count)); 2649 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2650 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2651 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2652 PetscCall(ISGetIndices(zerodiag, &idxs)); 2653 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2654 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2655 PetscCall(PetscFree(count)); 2656 } 2657 PetscCall(ISDestroy(&dirIS)); 2658 2659 /* check PCBDDCBenignGetOrSetP0 */ 2660 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2661 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2662 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2663 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2664 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2665 for (i = 0; i < pcbddc->benign_n; i++) { 2666 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2667 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2668 } 2669 PetscFunctionReturn(PETSC_SUCCESS); 2670 } 2671 2672 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2673 { 2674 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2675 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2676 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2677 PetscInt nz, n, benign_n, bsp = 1; 2678 PetscInt *interior_dofs, n_interior_dofs, nneu; 2679 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2680 2681 PetscFunctionBegin; 2682 if (reuse) goto project_b0; 2683 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2684 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2685 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2686 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2687 has_null_pressures = PETSC_TRUE; 2688 have_null = PETSC_TRUE; 2689 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2690 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2691 Checks if all the pressure dofs in each subdomain have a zero diagonal 2692 If not, a change of basis on pressures is not needed 2693 since the local Schur complements are already SPD 2694 */ 2695 if (pcbddc->n_ISForDofsLocal) { 2696 IS iP = NULL; 2697 PetscInt p, *pp; 2698 PetscBool flg; 2699 2700 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2701 n = pcbddc->n_ISForDofsLocal; 2702 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2703 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2704 PetscOptionsEnd(); 2705 if (!flg) { 2706 n = 1; 2707 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2708 } 2709 2710 bsp = 0; 2711 for (p = 0; p < n; p++) { 2712 PetscInt bs; 2713 2714 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2715 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2716 bsp += bs; 2717 } 2718 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2719 bsp = 0; 2720 for (p = 0; p < n; p++) { 2721 const PetscInt *idxs; 2722 PetscInt b, bs, npl, *bidxs; 2723 2724 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2725 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2726 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2727 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2728 for (b = 0; b < bs; b++) { 2729 PetscInt i; 2730 2731 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2732 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2733 bsp++; 2734 } 2735 PetscCall(PetscFree(bidxs)); 2736 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2737 } 2738 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2739 2740 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2741 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2742 if (iP) { 2743 IS newpressures; 2744 2745 PetscCall(ISDifference(pressures, iP, &newpressures)); 2746 PetscCall(ISDestroy(&pressures)); 2747 pressures = newpressures; 2748 } 2749 PetscCall(ISSorted(pressures, &sorted)); 2750 if (!sorted) PetscCall(ISSort(pressures)); 2751 PetscCall(PetscFree(pp)); 2752 } 2753 2754 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2755 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2756 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2757 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2758 PetscCall(ISSorted(zerodiag, &sorted)); 2759 if (!sorted) PetscCall(ISSort(zerodiag)); 2760 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2761 zerodiag_save = zerodiag; 2762 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2763 if (!nz) { 2764 if (n) have_null = PETSC_FALSE; 2765 has_null_pressures = PETSC_FALSE; 2766 PetscCall(ISDestroy(&zerodiag)); 2767 } 2768 recompute_zerodiag = PETSC_FALSE; 2769 2770 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2771 zerodiag_subs = NULL; 2772 benign_n = 0; 2773 n_interior_dofs = 0; 2774 interior_dofs = NULL; 2775 nneu = 0; 2776 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2777 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2778 if (checkb) { /* need to compute interior nodes */ 2779 PetscInt n, i; 2780 PetscInt *count; 2781 ISLocalToGlobalMapping mapping; 2782 2783 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL)); 2784 PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL)); 2785 PetscCall(PetscMalloc1(n, &interior_dofs)); 2786 for (i = 0; i < n; i++) 2787 if (count[i] < 2) interior_dofs[n_interior_dofs++] = i; 2788 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL)); 2789 } 2790 if (has_null_pressures) { 2791 IS *subs; 2792 PetscInt nsubs, i, j, nl; 2793 const PetscInt *idxs; 2794 PetscScalar *array; 2795 Vec *work; 2796 2797 subs = pcbddc->local_subs; 2798 nsubs = pcbddc->n_local_subs; 2799 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2800 if (checkb) { 2801 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2802 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2803 PetscCall(ISGetIndices(zerodiag, &idxs)); 2804 /* work[0] = 1_p */ 2805 PetscCall(VecSet(work[0], 0.)); 2806 PetscCall(VecGetArray(work[0], &array)); 2807 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2808 PetscCall(VecRestoreArray(work[0], &array)); 2809 /* work[0] = 1_v */ 2810 PetscCall(VecSet(work[1], 1.)); 2811 PetscCall(VecGetArray(work[1], &array)); 2812 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2813 PetscCall(VecRestoreArray(work[1], &array)); 2814 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2815 } 2816 2817 if (nsubs > 1 || bsp > 1) { 2818 IS *is; 2819 PetscInt b, totb; 2820 2821 totb = bsp; 2822 is = bsp > 1 ? bzerodiag : &zerodiag; 2823 nsubs = PetscMax(nsubs, 1); 2824 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2825 for (b = 0; b < totb; b++) { 2826 for (i = 0; i < nsubs; i++) { 2827 ISLocalToGlobalMapping l2g; 2828 IS t_zerodiag_subs; 2829 PetscInt nl; 2830 2831 if (subs) { 2832 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2833 } else { 2834 IS tis; 2835 2836 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2837 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2838 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2839 PetscCall(ISDestroy(&tis)); 2840 } 2841 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2842 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2843 if (nl) { 2844 PetscBool valid = PETSC_TRUE; 2845 2846 if (checkb) { 2847 PetscCall(VecSet(matis->x, 0)); 2848 PetscCall(ISGetLocalSize(subs[i], &nl)); 2849 PetscCall(ISGetIndices(subs[i], &idxs)); 2850 PetscCall(VecGetArray(matis->x, &array)); 2851 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2852 PetscCall(VecRestoreArray(matis->x, &array)); 2853 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2854 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2855 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2856 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2857 PetscCall(VecGetArray(matis->y, &array)); 2858 for (j = 0; j < n_interior_dofs; j++) { 2859 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2860 valid = PETSC_FALSE; 2861 break; 2862 } 2863 } 2864 PetscCall(VecRestoreArray(matis->y, &array)); 2865 } 2866 if (valid && nneu) { 2867 const PetscInt *idxs; 2868 PetscInt nzb; 2869 2870 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2871 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2872 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2873 if (nzb) valid = PETSC_FALSE; 2874 } 2875 if (valid && pressures) { 2876 IS t_pressure_subs, tmp; 2877 PetscInt i1, i2; 2878 2879 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 2880 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 2881 PetscCall(ISGetLocalSize(tmp, &i1)); 2882 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 2883 if (i2 != i1) valid = PETSC_FALSE; 2884 PetscCall(ISDestroy(&t_pressure_subs)); 2885 PetscCall(ISDestroy(&tmp)); 2886 } 2887 if (valid) { 2888 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 2889 benign_n++; 2890 } else recompute_zerodiag = PETSC_TRUE; 2891 } 2892 PetscCall(ISDestroy(&t_zerodiag_subs)); 2893 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2894 } 2895 } 2896 } else { /* there's just one subdomain (or zero if they have not been detected */ 2897 PetscBool valid = PETSC_TRUE; 2898 2899 if (nneu) valid = PETSC_FALSE; 2900 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 2901 if (valid && checkb) { 2902 PetscCall(MatMult(matis->A, work[0], matis->x)); 2903 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 2904 PetscCall(VecGetArray(matis->x, &array)); 2905 for (j = 0; j < n_interior_dofs; j++) { 2906 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2907 valid = PETSC_FALSE; 2908 break; 2909 } 2910 } 2911 PetscCall(VecRestoreArray(matis->x, &array)); 2912 } 2913 if (valid) { 2914 benign_n = 1; 2915 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 2916 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2917 zerodiag_subs[0] = zerodiag; 2918 } 2919 } 2920 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 2921 } 2922 PetscCall(PetscFree(interior_dofs)); 2923 2924 if (!benign_n) { 2925 PetscInt n; 2926 2927 PetscCall(ISDestroy(&zerodiag)); 2928 recompute_zerodiag = PETSC_FALSE; 2929 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2930 if (n) have_null = PETSC_FALSE; 2931 } 2932 2933 /* final check for null pressures */ 2934 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 2935 2936 if (recompute_zerodiag) { 2937 PetscCall(ISDestroy(&zerodiag)); 2938 if (benign_n == 1) { 2939 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2940 zerodiag = zerodiag_subs[0]; 2941 } else { 2942 PetscInt i, nzn, *new_idxs; 2943 2944 nzn = 0; 2945 for (i = 0; i < benign_n; i++) { 2946 PetscInt ns; 2947 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2948 nzn += ns; 2949 } 2950 PetscCall(PetscMalloc1(nzn, &new_idxs)); 2951 nzn = 0; 2952 for (i = 0; i < benign_n; i++) { 2953 PetscInt ns, *idxs; 2954 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2955 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2956 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 2957 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2958 nzn += ns; 2959 } 2960 PetscCall(PetscSortInt(nzn, new_idxs)); 2961 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 2962 } 2963 have_null = PETSC_FALSE; 2964 } 2965 2966 /* determines if the coarse solver will be singular or not */ 2967 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2968 2969 /* Prepare matrix to compute no-net-flux */ 2970 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2971 Mat A, loc_divudotp; 2972 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 2973 IS row, col, isused = NULL; 2974 PetscInt M, N, n, st, n_isused; 2975 2976 if (pressures) { 2977 isused = pressures; 2978 } else { 2979 isused = zerodiag_save; 2980 } 2981 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 2982 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2983 PetscCall(MatGetLocalSize(A, &n, NULL)); 2984 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 2985 n_isused = 0; 2986 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 2987 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 2988 st = st - n_isused; 2989 if (n) { 2990 const PetscInt *gidxs; 2991 2992 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 2993 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 2994 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2995 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2996 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 2997 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 2998 } else { 2999 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 3000 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 3001 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 3002 } 3003 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 3004 PetscCall(ISGetSize(row, &M)); 3005 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 3006 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 3007 PetscCall(ISDestroy(&row)); 3008 PetscCall(ISDestroy(&col)); 3009 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 3010 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 3011 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3012 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 3013 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 3014 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 3015 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 3016 PetscCall(MatDestroy(&loc_divudotp)); 3017 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3018 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3019 } 3020 PetscCall(ISDestroy(&zerodiag_save)); 3021 PetscCall(ISDestroy(&pressures)); 3022 if (bzerodiag) { 3023 PetscInt i; 3024 3025 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 3026 PetscCall(PetscFree(bzerodiag)); 3027 } 3028 pcbddc->benign_n = benign_n; 3029 pcbddc->benign_zerodiag_subs = zerodiag_subs; 3030 3031 /* determines if the problem has subdomains with 0 pressure block */ 3032 have_null = (PetscBool)(!!pcbddc->benign_n); 3033 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 3034 3035 project_b0: 3036 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 3037 /* change of basis and p0 dofs */ 3038 if (pcbddc->benign_n) { 3039 PetscInt i, s, *nnz; 3040 3041 /* local change of basis for pressures */ 3042 PetscCall(MatDestroy(&pcbddc->benign_change)); 3043 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 3044 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 3045 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 3046 PetscCall(PetscMalloc1(n, &nnz)); 3047 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 3048 for (i = 0; i < pcbddc->benign_n; i++) { 3049 const PetscInt *idxs; 3050 PetscInt nzs, j; 3051 3052 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 3053 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3054 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 3055 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 3056 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3057 } 3058 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 3059 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3060 PetscCall(PetscFree(nnz)); 3061 /* set identity by default */ 3062 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 3063 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3064 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 3065 /* set change on pressures */ 3066 for (s = 0; s < pcbddc->benign_n; s++) { 3067 PetscScalar *array; 3068 const PetscInt *idxs; 3069 PetscInt nzs; 3070 3071 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 3072 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3073 for (i = 0; i < nzs - 1; i++) { 3074 PetscScalar vals[2]; 3075 PetscInt cols[2]; 3076 3077 cols[0] = idxs[i]; 3078 cols[1] = idxs[nzs - 1]; 3079 vals[0] = 1.; 3080 vals[1] = 1.; 3081 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 3082 } 3083 PetscCall(PetscMalloc1(nzs, &array)); 3084 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 3085 array[nzs - 1] = 1.; 3086 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 3087 /* store local idxs for p0 */ 3088 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 3089 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3090 PetscCall(PetscFree(array)); 3091 } 3092 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3093 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3094 3095 /* project if needed */ 3096 if (pcbddc->benign_change_explicit) { 3097 Mat M; 3098 3099 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 3100 PetscCall(MatDestroy(&pcbddc->local_mat)); 3101 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 3102 PetscCall(MatDestroy(&M)); 3103 } 3104 /* store global idxs for p0 */ 3105 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 3106 } 3107 *zerodiaglocal = zerodiag; 3108 PetscFunctionReturn(PETSC_SUCCESS); 3109 } 3110 3111 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3112 { 3113 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3114 PetscScalar *array; 3115 3116 PetscFunctionBegin; 3117 if (!pcbddc->benign_sf) { 3118 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 3119 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 3120 } 3121 if (get) { 3122 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 3123 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3124 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3125 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 3126 } else { 3127 PetscCall(VecGetArray(v, &array)); 3128 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3129 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3130 PetscCall(VecRestoreArray(v, &array)); 3131 } 3132 PetscFunctionReturn(PETSC_SUCCESS); 3133 } 3134 3135 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3136 { 3137 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3138 3139 PetscFunctionBegin; 3140 /* TODO: add error checking 3141 - avoid nested pop (or push) calls. 3142 - cannot push before pop. 3143 - cannot call this if pcbddc->local_mat is NULL 3144 */ 3145 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 3146 if (pop) { 3147 if (pcbddc->benign_change_explicit) { 3148 IS is_p0; 3149 MatReuse reuse; 3150 3151 /* extract B_0 */ 3152 reuse = MAT_INITIAL_MATRIX; 3153 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 3154 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 3155 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 3156 /* remove rows and cols from local problem */ 3157 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 3158 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 3159 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 3160 PetscCall(ISDestroy(&is_p0)); 3161 } else { 3162 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 3163 PetscScalar *vals; 3164 PetscInt i, n, *idxs_ins; 3165 3166 PetscCall(VecGetLocalSize(matis->y, &n)); 3167 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 3168 if (!pcbddc->benign_B0) { 3169 PetscInt *nnz; 3170 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 3171 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 3172 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 3173 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 3174 for (i = 0; i < pcbddc->benign_n; i++) { 3175 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 3176 nnz[i] = n - nnz[i]; 3177 } 3178 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 3179 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3180 PetscCall(PetscFree(nnz)); 3181 } 3182 3183 for (i = 0; i < pcbddc->benign_n; i++) { 3184 PetscScalar *array; 3185 PetscInt *idxs, j, nz, cum; 3186 3187 PetscCall(VecSet(matis->x, 0.)); 3188 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 3189 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3190 for (j = 0; j < nz; j++) vals[j] = 1.; 3191 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 3192 PetscCall(VecAssemblyBegin(matis->x)); 3193 PetscCall(VecAssemblyEnd(matis->x)); 3194 PetscCall(VecSet(matis->y, 0.)); 3195 PetscCall(MatMult(matis->A, matis->x, matis->y)); 3196 PetscCall(VecGetArray(matis->y, &array)); 3197 cum = 0; 3198 for (j = 0; j < n; j++) { 3199 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3200 vals[cum] = array[j]; 3201 idxs_ins[cum] = j; 3202 cum++; 3203 } 3204 } 3205 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 3206 PetscCall(VecRestoreArray(matis->y, &array)); 3207 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3208 } 3209 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3210 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3211 PetscCall(PetscFree2(idxs_ins, vals)); 3212 } 3213 } else { /* push */ 3214 3215 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 3216 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 3217 PetscScalar *B0_vals; 3218 PetscInt *B0_cols, B0_ncol; 3219 3220 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3221 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 3222 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 3223 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 3224 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3225 } 3226 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3227 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3228 } 3229 PetscFunctionReturn(PETSC_SUCCESS); 3230 } 3231 3232 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3233 { 3234 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3235 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3236 PetscBLASInt B_dummyint, B_neigs, B_ierr, B_lwork; 3237 PetscBLASInt *B_iwork, *B_ifail; 3238 PetscScalar *work, lwork; 3239 PetscScalar *St, *S, *eigv; 3240 PetscScalar *Sarray, *Starray; 3241 PetscReal *eigs, thresh, lthresh, uthresh; 3242 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 3243 PetscBool allocated_S_St, upart; 3244 #if defined(PETSC_USE_COMPLEX) 3245 PetscReal *rwork; 3246 #endif 3247 3248 PetscFunctionBegin; 3249 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3250 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3251 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3252 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3253 sub_schurs->is_posdef); 3254 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3255 3256 if (pcbddc->dbg_flag) { 3257 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3258 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3259 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3260 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3261 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3262 } 3263 3264 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3265 3266 /* max size of subsets */ 3267 mss = 0; 3268 for (i = 0; i < sub_schurs->n_subs; i++) { 3269 PetscInt subset_size; 3270 3271 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3272 mss = PetscMax(mss, subset_size); 3273 } 3274 3275 /* min/max and threshold */ 3276 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3277 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3278 nmax = PetscMax(nmin, nmax); 3279 allocated_S_St = PETSC_FALSE; 3280 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3281 allocated_S_St = PETSC_TRUE; 3282 } 3283 3284 /* allocate lapack workspace */ 3285 cum = cum2 = 0; 3286 maxneigs = 0; 3287 for (i = 0; i < sub_schurs->n_subs; i++) { 3288 PetscInt n, subset_size; 3289 3290 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3291 n = PetscMin(subset_size, nmax); 3292 cum += subset_size; 3293 cum2 += subset_size * n; 3294 maxneigs = PetscMax(maxneigs, n); 3295 } 3296 lwork = 0; 3297 if (mss) { 3298 PetscScalar sdummy = 0.; 3299 PetscBLASInt B_itype = 1; 3300 PetscBLASInt B_N = mss, idummy = 0; 3301 PetscReal rdummy = 0., zero = 0.0; 3302 PetscReal eps = 0.0; /* dlamch? */ 3303 3304 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3305 B_lwork = -1; 3306 /* some implementations may complain about NULL pointers, even if we are querying */ 3307 S = &sdummy; 3308 St = &sdummy; 3309 eigs = &rdummy; 3310 eigv = &sdummy; 3311 B_iwork = &idummy; 3312 B_ifail = &idummy; 3313 #if defined(PETSC_USE_COMPLEX) 3314 rwork = &rdummy; 3315 #endif 3316 thresh = 1.0; 3317 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3318 #if defined(PETSC_USE_COMPLEX) 3319 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3320 #else 3321 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3322 #endif 3323 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr); 3324 PetscCall(PetscFPTrapPop()); 3325 } 3326 3327 nv = 0; 3328 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3329 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3330 } 3331 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3332 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3333 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3334 #if defined(PETSC_USE_COMPLEX) 3335 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3336 #endif 3337 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3338 &pcbddc->adaptive_constraints_data)); 3339 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3340 3341 maxneigs = 0; 3342 cum = cumarray = 0; 3343 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3344 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3345 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3346 const PetscInt *idxs; 3347 3348 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3349 for (cum = 0; cum < nv; cum++) { 3350 pcbddc->adaptive_constraints_n[cum] = 1; 3351 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3352 pcbddc->adaptive_constraints_data[cum] = 1.0; 3353 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3354 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3355 } 3356 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3357 } 3358 3359 if (mss) { /* multilevel */ 3360 if (sub_schurs->gdsw) { 3361 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3362 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3363 } else { 3364 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3365 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3366 } 3367 } 3368 3369 lthresh = pcbddc->adaptive_threshold[0]; 3370 uthresh = pcbddc->adaptive_threshold[1]; 3371 upart = pcbddc->use_deluxe_scaling; 3372 for (i = 0; i < sub_schurs->n_subs; i++) { 3373 const PetscInt *idxs; 3374 PetscReal upper, lower; 3375 PetscInt j, subset_size, eigs_start = 0; 3376 PetscBLASInt B_N; 3377 PetscBool same_data = PETSC_FALSE; 3378 PetscBool scal = PETSC_FALSE; 3379 3380 if (upart) { 3381 upper = PETSC_MAX_REAL; 3382 lower = uthresh; 3383 } else { 3384 if (sub_schurs->gdsw) { 3385 upper = uthresh; 3386 lower = PETSC_MIN_REAL; 3387 } else { 3388 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3389 upper = 1. / uthresh; 3390 lower = 0.; 3391 } 3392 } 3393 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3394 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3395 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3396 /* this is experimental: we assume the dofs have been properly grouped to have 3397 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3398 if (!sub_schurs->is_posdef) { 3399 Mat T; 3400 3401 for (j = 0; j < subset_size; j++) { 3402 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3403 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3404 PetscCall(MatScale(T, -1.0)); 3405 PetscCall(MatDestroy(&T)); 3406 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3407 PetscCall(MatScale(T, -1.0)); 3408 PetscCall(MatDestroy(&T)); 3409 if (sub_schurs->change_primal_sub) { 3410 PetscInt nz, k; 3411 const PetscInt *idxs; 3412 3413 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3414 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3415 for (k = 0; k < nz; k++) { 3416 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3417 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3418 } 3419 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3420 } 3421 scal = PETSC_TRUE; 3422 break; 3423 } 3424 } 3425 } 3426 3427 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3428 if (sub_schurs->is_symmetric) { 3429 PetscInt j, k; 3430 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3431 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3432 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3433 } 3434 for (j = 0; j < subset_size; j++) { 3435 for (k = j; k < subset_size; k++) { 3436 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3437 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3438 } 3439 } 3440 } else { 3441 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3442 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3443 } 3444 } else { 3445 S = Sarray + cumarray; 3446 St = Starray + cumarray; 3447 } 3448 /* see if we can save some work */ 3449 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3450 3451 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3452 B_neigs = 0; 3453 } else { 3454 PetscBLASInt B_itype = 1; 3455 PetscBLASInt B_IL, B_IU; 3456 PetscReal eps = -1.0; /* dlamch? */ 3457 PetscInt nmin_s; 3458 PetscBool compute_range; 3459 3460 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3461 B_neigs = 0; 3462 compute_range = (PetscBool)!same_data; 3463 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3464 3465 if (pcbddc->dbg_flag) { 3466 PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof; 3467 3468 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3469 PetscCall( 3470 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc)); 3471 } 3472 3473 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3474 if (compute_range) { 3475 /* ask for eigenvalues larger than thresh */ 3476 if (sub_schurs->is_posdef) { 3477 #if defined(PETSC_USE_COMPLEX) 3478 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3479 #else 3480 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3481 #endif 3482 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3483 } else { /* no theory so far, but it works nicely */ 3484 PetscInt recipe = 0, recipe_m = 1; 3485 PetscReal bb[2]; 3486 3487 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3488 switch (recipe) { 3489 case 0: 3490 if (scal) { 3491 bb[0] = PETSC_MIN_REAL; 3492 bb[1] = lthresh; 3493 } else { 3494 bb[0] = uthresh; 3495 bb[1] = PETSC_MAX_REAL; 3496 } 3497 #if defined(PETSC_USE_COMPLEX) 3498 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3499 #else 3500 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3501 #endif 3502 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3503 break; 3504 case 1: 3505 bb[0] = PETSC_MIN_REAL; 3506 bb[1] = lthresh * lthresh; 3507 #if defined(PETSC_USE_COMPLEX) 3508 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3509 #else 3510 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3511 #endif 3512 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3513 if (!scal) { 3514 PetscBLASInt B_neigs2 = 0; 3515 3516 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3517 bb[1] = PETSC_MAX_REAL; 3518 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3519 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3520 #if defined(PETSC_USE_COMPLEX) 3521 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3522 #else 3523 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3524 #endif 3525 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3526 B_neigs += B_neigs2; 3527 } 3528 break; 3529 case 2: 3530 if (scal) { 3531 bb[0] = PETSC_MIN_REAL; 3532 bb[1] = 0; 3533 #if defined(PETSC_USE_COMPLEX) 3534 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3535 #else 3536 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3537 #endif 3538 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3539 } else { 3540 PetscBLASInt B_neigs2 = 0; 3541 PetscBool do_copy = PETSC_FALSE; 3542 3543 lthresh = PetscMax(lthresh, 0.0); 3544 if (lthresh > 0.0) { 3545 bb[0] = PETSC_MIN_REAL; 3546 bb[1] = lthresh * lthresh; 3547 3548 do_copy = PETSC_TRUE; 3549 #if defined(PETSC_USE_COMPLEX) 3550 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3551 #else 3552 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3553 #endif 3554 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3555 } 3556 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3557 bb[1] = PETSC_MAX_REAL; 3558 if (do_copy) { 3559 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3560 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3561 } 3562 #if defined(PETSC_USE_COMPLEX) 3563 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3564 #else 3565 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3566 #endif 3567 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3568 B_neigs += B_neigs2; 3569 } 3570 break; 3571 case 3: 3572 if (scal) { 3573 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3574 } else { 3575 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3576 } 3577 if (!scal) { 3578 bb[0] = uthresh; 3579 bb[1] = PETSC_MAX_REAL; 3580 #if defined(PETSC_USE_COMPLEX) 3581 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3582 #else 3583 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3584 #endif 3585 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3586 } 3587 if (recipe_m > 0 && B_N - B_neigs > 0) { 3588 PetscBLASInt B_neigs2 = 0; 3589 3590 B_IL = 1; 3591 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3592 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3593 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3594 #if defined(PETSC_USE_COMPLEX) 3595 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3596 #else 3597 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3598 #endif 3599 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3600 B_neigs += B_neigs2; 3601 } 3602 break; 3603 case 4: 3604 bb[0] = PETSC_MIN_REAL; 3605 bb[1] = lthresh; 3606 #if defined(PETSC_USE_COMPLEX) 3607 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3608 #else 3609 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3610 #endif 3611 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3612 { 3613 PetscBLASInt B_neigs2 = 0; 3614 3615 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3616 bb[1] = PETSC_MAX_REAL; 3617 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3618 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3619 #if defined(PETSC_USE_COMPLEX) 3620 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3621 #else 3622 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3623 #endif 3624 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3625 B_neigs += B_neigs2; 3626 } 3627 break; 3628 case 5: /* same as before: first compute all eigenvalues, then filter */ 3629 #if defined(PETSC_USE_COMPLEX) 3630 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3631 #else 3632 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3633 #endif 3634 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3635 { 3636 PetscInt e, k, ne; 3637 for (e = 0, ne = 0; e < B_neigs; e++) { 3638 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3639 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3640 eigs[ne] = eigs[e]; 3641 ne++; 3642 } 3643 } 3644 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3645 B_neigs = ne; 3646 } 3647 break; 3648 default: 3649 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3650 } 3651 } 3652 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3653 B_IU = PetscMax(1, PetscMin(B_N, nmax)); 3654 B_IL = 1; 3655 #if defined(PETSC_USE_COMPLEX) 3656 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3657 #else 3658 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3659 #endif 3660 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3661 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3662 PetscInt k; 3663 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3664 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3665 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3666 nmin = nmax; 3667 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3668 for (k = 0; k < nmax; k++) { 3669 eigs[k] = 1. / PETSC_SMALL; 3670 eigv[k * (subset_size + 1)] = 1.0; 3671 } 3672 } 3673 PetscCall(PetscFPTrapPop()); 3674 if (B_ierr) { 3675 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3676 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3677 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3678 } 3679 3680 if (B_neigs > nmax) { 3681 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3682 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3683 B_neigs = nmax; 3684 } 3685 3686 nmin_s = PetscMin(nmin, B_N); 3687 if (B_neigs < nmin_s) { 3688 PetscBLASInt B_neigs2 = 0; 3689 3690 if (upart) { 3691 if (scal) { 3692 B_IU = nmin_s; 3693 B_IL = B_neigs + 1; 3694 } else { 3695 B_IL = B_N - nmin_s + 1; 3696 B_IU = B_N - B_neigs; 3697 } 3698 } else { 3699 B_IL = B_neigs + 1; 3700 B_IU = nmin_s; 3701 } 3702 if (pcbddc->dbg_flag) { 3703 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3704 } 3705 if (sub_schurs->is_symmetric) { 3706 PetscInt j, k; 3707 for (j = 0; j < subset_size; j++) { 3708 for (k = j; k < subset_size; k++) { 3709 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3710 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3711 } 3712 } 3713 } else { 3714 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3715 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3716 } 3717 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3718 #if defined(PETSC_USE_COMPLEX) 3719 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3720 #else 3721 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3722 #endif 3723 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3724 PetscCall(PetscFPTrapPop()); 3725 B_neigs += B_neigs2; 3726 } 3727 if (B_ierr) { 3728 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3729 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3730 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3731 } 3732 if (pcbddc->dbg_flag) { 3733 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3734 for (j = 0; j < B_neigs; j++) { 3735 if (!sub_schurs->gdsw) { 3736 if (eigs[j] == 0.0) { 3737 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3738 } else { 3739 if (upart) { 3740 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3741 } else { 3742 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1. / eigs[j + eigs_start]))); 3743 } 3744 } 3745 } else { 3746 double pg = (double)eigs[j + eigs_start]; 3747 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3748 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3749 } 3750 } 3751 } 3752 } 3753 /* change the basis back to the original one */ 3754 if (sub_schurs->change) { 3755 Mat change, phi, phit; 3756 3757 if (pcbddc->dbg_flag > 2) { 3758 PetscInt ii; 3759 for (ii = 0; ii < B_neigs; ii++) { 3760 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3761 for (j = 0; j < B_N; j++) { 3762 #if defined(PETSC_USE_COMPLEX) 3763 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3764 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3765 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3766 #else 3767 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3768 #endif 3769 } 3770 } 3771 } 3772 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3773 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3774 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi)); 3775 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3776 PetscCall(MatDestroy(&phit)); 3777 PetscCall(MatDestroy(&phi)); 3778 } 3779 maxneigs = PetscMax(B_neigs, maxneigs); 3780 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3781 if (B_neigs) { 3782 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3783 3784 if (pcbddc->dbg_flag > 1) { 3785 PetscInt ii; 3786 for (ii = 0; ii < B_neigs; ii++) { 3787 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3788 for (j = 0; j < B_N; j++) { 3789 #if defined(PETSC_USE_COMPLEX) 3790 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3791 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3792 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3793 #else 3794 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3795 #endif 3796 } 3797 } 3798 } 3799 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3800 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3801 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3802 cum++; 3803 } 3804 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3805 /* shift for next computation */ 3806 cumarray += subset_size * subset_size; 3807 } 3808 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3809 3810 if (mss) { 3811 if (sub_schurs->gdsw) { 3812 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3813 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3814 } else { 3815 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3816 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3817 /* destroy matrices (junk) */ 3818 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3819 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3820 } 3821 } 3822 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3823 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3824 #if defined(PETSC_USE_COMPLEX) 3825 PetscCall(PetscFree(rwork)); 3826 #endif 3827 if (pcbddc->dbg_flag) { 3828 PetscInt maxneigs_r; 3829 PetscCall(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3830 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3831 } 3832 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3833 PetscFunctionReturn(PETSC_SUCCESS); 3834 } 3835 3836 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3837 { 3838 Mat coarse_submat; 3839 3840 PetscFunctionBegin; 3841 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3842 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3843 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3844 3845 /* Setup local neumann solver ksp_R */ 3846 /* PCBDDCSetUpLocalScatters should be called first! */ 3847 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3848 3849 /* 3850 Setup local correction and local part of coarse basis. 3851 Gives back the dense local part of the coarse matrix in column major ordering 3852 */ 3853 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat)); 3854 3855 /* Compute total number of coarse nodes and setup coarse solver */ 3856 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat)); 3857 PetscCall(MatDestroy(&coarse_submat)); 3858 PetscFunctionReturn(PETSC_SUCCESS); 3859 } 3860 3861 PetscErrorCode PCBDDCResetCustomization(PC pc) 3862 { 3863 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3864 3865 PetscFunctionBegin; 3866 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3867 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3868 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3869 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3870 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3871 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3872 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3873 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3874 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3875 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3876 PetscFunctionReturn(PETSC_SUCCESS); 3877 } 3878 3879 PetscErrorCode PCBDDCResetTopography(PC pc) 3880 { 3881 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3882 PetscInt i; 3883 3884 PetscFunctionBegin; 3885 PetscCall(MatDestroy(&pcbddc->nedcG)); 3886 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3887 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3888 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3889 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3890 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3891 PetscCall(VecDestroy(&pcbddc->work_change)); 3892 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3893 PetscCall(MatDestroy(&pcbddc->divudotp)); 3894 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3895 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3896 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3897 pcbddc->n_local_subs = 0; 3898 PetscCall(PetscFree(pcbddc->local_subs)); 3899 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3900 pcbddc->graphanalyzed = PETSC_FALSE; 3901 pcbddc->recompute_topography = PETSC_TRUE; 3902 pcbddc->corner_selected = PETSC_FALSE; 3903 PetscFunctionReturn(PETSC_SUCCESS); 3904 } 3905 3906 PetscErrorCode PCBDDCResetSolvers(PC pc) 3907 { 3908 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3909 3910 PetscFunctionBegin; 3911 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3912 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3913 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3914 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3915 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3916 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3917 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3918 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3919 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3920 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3921 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3922 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3923 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3924 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3925 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3926 PetscCall(KSPReset(pcbddc->ksp_D)); 3927 PetscCall(KSPReset(pcbddc->ksp_R)); 3928 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3929 PetscCall(MatDestroy(&pcbddc->local_mat)); 3930 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3931 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 3932 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3933 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3934 PetscCall(MatDestroy(&pcbddc->benign_change)); 3935 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3936 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 3937 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3938 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3939 if (pcbddc->benign_zerodiag_subs) { 3940 PetscInt i; 3941 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3942 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3943 } 3944 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3945 PetscFunctionReturn(PETSC_SUCCESS); 3946 } 3947 3948 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3949 { 3950 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3951 PC_IS *pcis = (PC_IS *)pc->data; 3952 VecType impVecType; 3953 PetscInt n_constraints, n_R, old_size; 3954 3955 PetscFunctionBegin; 3956 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3957 n_R = pcis->n - pcbddc->n_vertices; 3958 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 3959 /* local work vectors (try to avoid unneeded work)*/ 3960 /* R nodes */ 3961 old_size = -1; 3962 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 3963 if (n_R != old_size) { 3964 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3965 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3966 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 3967 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 3968 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 3969 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 3970 } 3971 /* local primal dofs */ 3972 old_size = -1; 3973 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 3974 if (pcbddc->local_primal_size != old_size) { 3975 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3976 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 3977 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 3978 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 3979 } 3980 /* local explicit constraints */ 3981 old_size = -1; 3982 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 3983 if (n_constraints && n_constraints != old_size) { 3984 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3985 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 3986 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 3987 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 3988 } 3989 PetscFunctionReturn(PETSC_SUCCESS); 3990 } 3991 3992 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode) 3993 { 3994 PetscBool flg; 3995 const PetscScalar *a; 3996 3997 PetscFunctionBegin; 3998 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg)); 3999 if (flg) { 4000 PetscCall(MatDenseGetArrayRead(S, &a)); 4001 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE)); 4002 PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode)); 4003 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE)); 4004 PetscCall(MatDenseRestoreArrayRead(S, &a)); 4005 } else { 4006 const PetscInt *ii, *jj; 4007 PetscInt n; 4008 PetscInt buf[8192], *bufc = NULL; 4009 PetscBool freeb = PETSC_FALSE; 4010 Mat Sm = S; 4011 4012 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg)); 4013 if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm)); 4014 else PetscCall(PetscObjectReference((PetscObject)S)); 4015 PetscCall(MatSeqAIJGetArrayRead(Sm, &a)); 4016 PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4017 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure"); 4018 if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) { 4019 bufc = buf; 4020 } else { 4021 PetscCall(PetscMalloc1(nc, &bufc)); 4022 freeb = PETSC_TRUE; 4023 } 4024 4025 for (PetscInt i = 0; i < n; i++) { 4026 const PetscInt nci = ii[i + 1] - ii[i]; 4027 4028 for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]]; 4029 PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode)); 4030 } 4031 PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4032 PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a)); 4033 PetscCall(MatDestroy(&Sm)); 4034 if (freeb) PetscCall(PetscFree(bufc)); 4035 } 4036 PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY)); 4037 PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY)); 4038 PetscFunctionReturn(PETSC_SUCCESS); 4039 } 4040 4041 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat) 4042 { 4043 Mat_SeqAIJ *aij; 4044 PetscInt *ii, *jj; 4045 PetscScalar *aa; 4046 PetscInt nnz = 0, m, nc; 4047 const PetscScalar *a; 4048 const PetscScalar zero = 0.0; 4049 4050 PetscFunctionBegin; 4051 PetscCall(MatGetLocalSize(D, &m, &nc)); 4052 PetscCall(MatDenseGetArrayRead(D, &a)); 4053 PetscCall(PetscMalloc1(m + 1, &ii)); 4054 PetscCall(PetscMalloc1(m * nc, &jj)); 4055 PetscCall(PetscMalloc1(m * nc, &aa)); 4056 ii[0] = 0; 4057 for (PetscInt k = 0; k < m; k++) { 4058 for (PetscInt s = 0; s < nc; s++) { 4059 const PetscInt c = s + k * nc; 4060 const PetscScalar v = a[k + s * m]; 4061 4062 if (PetscUnlikely(j[c] < 0 || v == zero)) continue; 4063 jj[nnz] = j[c]; 4064 aa[nnz] = a[k + s * m]; 4065 nnz++; 4066 } 4067 ii[k + 1] = nnz; 4068 } 4069 4070 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat)); 4071 PetscCall(MatDenseRestoreArrayRead(D, &a)); 4072 4073 aij = (Mat_SeqAIJ *)(*mat)->data; 4074 aij->free_a = PETSC_TRUE; 4075 aij->free_ij = PETSC_TRUE; 4076 PetscFunctionReturn(PETSC_SUCCESS); 4077 } 4078 4079 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */ 4080 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B) 4081 { 4082 PetscInt n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL; 4083 const PetscBool allowzeropivot = PETSC_FALSE; 4084 PetscBool zeropivotdetected = PETSC_FALSE; 4085 const PetscReal shift = 0.0; 4086 PetscInt ipvt[5], *ii, *jj, *indi, *indj; 4087 PetscScalar work[25], *v_work = NULL, *aa, *diag; 4088 PetscLogDouble flops = 0.0; 4089 4090 PetscFunctionBegin; 4091 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices"); 4092 for (PetscInt i = 0; i < nblocks; i++) { 4093 ncnt += bsizes[i]; 4094 ncnt2 += PetscSqr(bsizes[i]); 4095 } 4096 PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n); 4097 for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]); 4098 if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots)); 4099 4100 PetscCall(PetscMalloc1(n + 1, &ii)); 4101 PetscCall(PetscMalloc1(ncnt2, &jj)); 4102 PetscCall(PetscCalloc1(ncnt2, &aa)); 4103 4104 ncnt = 0; 4105 ii[0] = 0; 4106 indi = ii; 4107 indj = jj; 4108 diag = aa; 4109 for (PetscInt i = 0; i < nblocks; i++) { 4110 const PetscInt bs = bsizes[i]; 4111 4112 for (PetscInt k = 0; k < bs; k++) { 4113 indi[k + 1] = indi[k] + bs; 4114 for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j; 4115 } 4116 PetscCall(MatGetValues(A, bs, indj, bs, indj, diag)); 4117 switch (bs) { 4118 case 1: 4119 *diag = 1.0 / (*diag); 4120 break; 4121 case 2: 4122 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 4123 break; 4124 case 3: 4125 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 4126 break; 4127 case 4: 4128 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 4129 break; 4130 case 5: 4131 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4132 break; 4133 case 6: 4134 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 4135 break; 4136 case 7: 4137 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 4138 break; 4139 default: 4140 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4141 } 4142 ncnt += bs; 4143 flops += 2.0 * PetscPowInt(bs, 3) / 3.0; 4144 diag += bs * bs; 4145 indj += bs * bs; 4146 indi += bs; 4147 } 4148 PetscCall(PetscLogFlops(flops)); 4149 PetscCall(PetscFree2(v_work, v_pivots)); 4150 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B)); 4151 { 4152 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data; 4153 aij->free_a = PETSC_TRUE; 4154 aij->free_ij = PETSC_TRUE; 4155 } 4156 PetscFunctionReturn(PETSC_SUCCESS); 4157 } 4158 4159 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B) 4160 { 4161 const PetscScalar *rarr; 4162 PetscScalar *larr; 4163 PetscSF vsf; 4164 PetscInt n, rld, lld; 4165 4166 PetscFunctionBegin; 4167 PetscCall(MatGetSize(A, NULL, &n)); 4168 PetscCall(MatDenseGetLDA(A, &rld)); 4169 PetscCall(MatDenseGetLDA(B, &lld)); 4170 PetscCall(MatDenseGetArrayRead(A, &rarr)); 4171 PetscCall(MatDenseGetArrayWrite(B, &larr)); 4172 PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf)); 4173 PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4174 PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4175 PetscCall(MatDenseRestoreArrayRead(A, &rarr)); 4176 PetscCall(MatDenseRestoreArrayWrite(B, &larr)); 4177 PetscCall(PetscSFDestroy(&vsf)); 4178 PetscFunctionReturn(PETSC_SUCCESS); 4179 } 4180 4181 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat) 4182 { 4183 PC_IS *pcis = (PC_IS *)pc->data; 4184 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4185 PCBDDCGraph graph = pcbddc->mat_graph; 4186 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4187 /* submatrices of local problem */ 4188 Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL; 4189 /* submatrices of local coarse problem */ 4190 Mat S_CV = NULL, S_VC = NULL, S_CC = NULL; 4191 /* working matrices */ 4192 Mat C_CR; 4193 4194 /* additional working stuff */ 4195 PC pc_R; 4196 IS is_R, is_V, is_C; 4197 const PetscInt *idx_V, *idx_C; 4198 Mat F, Brhs = NULL; 4199 Vec dummy_vec; 4200 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 4201 PetscInt *idx_V_B; 4202 PetscInt lda_rhs, n_vertices, n_constraints, *p0_lidx_I; 4203 PetscInt n_eff_vertices, n_eff_constraints; 4204 PetscInt i, n_R, n_D, n_B; 4205 PetscScalar one = 1.0, m_one = -1.0; 4206 4207 /* Multi-element support */ 4208 PetscBool multi_element = graph->multi_element; 4209 PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL; 4210 PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL; 4211 IS is_C_perm = NULL; 4212 PetscInt n_C_bss = 0, *C_bss = NULL; 4213 Mat coarse_phi_multi; 4214 4215 PetscFunctionBegin; 4216 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 4217 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4218 4219 /* Set Non-overlapping dimensions */ 4220 n_vertices = pcbddc->n_vertices; 4221 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 4222 n_B = pcis->n_B; 4223 n_D = pcis->n - n_B; 4224 n_R = pcis->n - n_vertices; 4225 4226 /* vertices in boundary numbering */ 4227 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 4228 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 4229 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 4230 4231 /* these two cases still need to be optimized */ 4232 if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE; 4233 4234 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 4235 if (multi_element) { 4236 PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 4237 4238 PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat)); 4239 PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size)); 4240 PetscCall(MatSetType(*coarse_submat, MATSEQAIJ)); 4241 PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 4242 PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 4243 4244 /* group vertices and constraints by subdomain id */ 4245 const PetscInt *vidxs = pcbddc->primal_indices_local_idxs; 4246 const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices; 4247 PetscInt *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz; 4248 PetscInt n_el = PetscMax(graph->n_local_subs, 1); 4249 4250 PetscCall(PetscCalloc1(2 * n_el, &count_eff)); 4251 PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V)); 4252 PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C)); 4253 for (PetscInt i = 0; i < n_vertices; i++) { 4254 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4255 4256 V_to_eff_V[i] = count_eff[s]; 4257 count_eff[s] += 1; 4258 } 4259 for (PetscInt i = 0; i < n_constraints; i++) { 4260 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1; 4261 4262 C_to_eff_C[i] = count_eff[s]; 4263 count_eff[s] += 1; 4264 } 4265 4266 /* preallocation */ 4267 PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz)); 4268 for (PetscInt i = 0; i < n_vertices; i++) { 4269 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4270 4271 nnz[i] = count_eff[s] + count_eff[s + 1]; 4272 } 4273 for (PetscInt i = 0; i < n_constraints; i++) { 4274 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub; 4275 4276 nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1]; 4277 } 4278 PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz)); 4279 PetscCall(PetscFree(nnz)); 4280 4281 n_eff_vertices = 0; 4282 n_eff_constraints = 0; 4283 for (PetscInt i = 0; i < n_el; i++) { 4284 n_eff_vertices = PetscMax(n_eff_vertices, count_eff[2 * i]); 4285 n_eff_constraints = PetscMax(n_eff_constraints, count_eff[2 * i + 1]); 4286 count_eff[2 * i] = 0; 4287 count_eff[2 * i + 1] = 0; 4288 } 4289 4290 const PetscInt *idx; 4291 PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C)); 4292 4293 for (PetscInt i = 0; i < n_vertices; i++) { 4294 const PetscInt e = graph->nodes[vidxs[i]].local_sub; 4295 const PetscInt s = 2 * e; 4296 4297 V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i; 4298 count_eff[s] += 1; 4299 } 4300 for (PetscInt i = 0; i < n_constraints; i++) { 4301 const PetscInt e = graph->nodes[cidxs[i]].local_sub; 4302 const PetscInt s = 2 * e + 1; 4303 4304 C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i; 4305 count_eff[s] += 1; 4306 } 4307 4308 PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J)); 4309 PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J)); 4310 PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J)); 4311 PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J)); 4312 for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1; 4313 for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1; 4314 for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1; 4315 for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1; 4316 4317 PetscCall(ISGetIndices(pcbddc->is_R_local, &idx)); 4318 for (PetscInt i = 0; i < n_R; i++) { 4319 const PetscInt e = graph->nodes[idx[i]].local_sub; 4320 const PetscInt s = 2 * e; 4321 PetscInt j; 4322 4323 for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4324 for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4325 } 4326 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx)); 4327 PetscCall(ISGetIndices(pcis->is_B_local, &idx)); 4328 for (PetscInt i = 0; i < n_B; i++) { 4329 const PetscInt e = graph->nodes[idx[i]].local_sub; 4330 const PetscInt s = 2 * e; 4331 PetscInt j; 4332 4333 for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4334 for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4335 } 4336 PetscCall(ISRestoreIndices(pcis->is_B_local, &idx)); 4337 4338 /* permutation and blocksizes for block invert of S_CC */ 4339 PetscInt *idxp; 4340 4341 PetscCall(PetscMalloc1(n_constraints, &idxp)); 4342 PetscCall(PetscMalloc1(n_el, &C_bss)); 4343 n_C_bss = 0; 4344 for (PetscInt e = 0, cnt = 0; e < n_el; e++) { 4345 const PetscInt nc = count_eff[2 * e + 1]; 4346 4347 if (nc) C_bss[n_C_bss++] = nc; 4348 for (PetscInt c = 0; c < nc; c++) { idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; } 4349 cnt += nc; 4350 } 4351 4352 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm)); 4353 4354 PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C)); 4355 PetscCall(PetscFree(count_eff)); 4356 } else { 4357 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat)); 4358 n_eff_constraints = n_constraints; 4359 n_eff_vertices = n_vertices; 4360 } 4361 4362 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 4363 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 4364 PetscCall(PCSetUp(pc_R)); 4365 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 4366 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 4367 lda_rhs = n_R; 4368 need_benign_correction = PETSC_FALSE; 4369 if (isLU || isCHOL) { 4370 PetscCall(PCFactorGetMatrix(pc_R, &F)); 4371 } else if (sub_schurs && sub_schurs->reuse_solver) { 4372 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4373 MatFactorType type; 4374 4375 F = reuse_solver->F; 4376 PetscCall(MatGetFactorType(F, &type)); 4377 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 4378 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 4379 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 4380 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 4381 } else F = NULL; 4382 4383 /* determine if we can use a sparse right-hand side */ 4384 sparserhs = PETSC_FALSE; 4385 if (F && !multi_element) { 4386 MatSolverType solver; 4387 4388 PetscCall(MatFactorGetSolverType(F, &solver)); 4389 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 4390 } 4391 4392 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 4393 dummy_vec = NULL; 4394 if (need_benign_correction && lda_rhs != n_R && F) { 4395 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 4396 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 4397 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 4398 } 4399 4400 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4401 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4402 4403 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R)); 4404 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V)); 4405 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C)); 4406 PetscCall(ISGetIndices(is_V, &idx_V)); 4407 PetscCall(ISGetIndices(is_C, &idx_C)); 4408 4409 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4410 if (n_constraints) { 4411 Mat C_B; 4412 4413 /* Extract constraints on R nodes: C_{CR} */ 4414 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 4415 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4416 4417 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4418 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4419 if (!sparserhs) { 4420 PetscScalar *marr; 4421 4422 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs)); 4423 PetscCall(MatDenseGetArrayWrite(Brhs, &marr)); 4424 for (i = 0; i < n_constraints; i++) { 4425 const PetscScalar *row_cmat_values; 4426 const PetscInt *row_cmat_indices; 4427 PetscInt size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i; 4428 4429 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4430 for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j]; 4431 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4432 } 4433 PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr)); 4434 } else { 4435 Mat tC_CR; 4436 4437 PetscCall(MatScale(C_CR, -1.0)); 4438 if (lda_rhs != n_R) { 4439 PetscScalar *aa; 4440 PetscInt r, *ii, *jj; 4441 PetscBool done; 4442 4443 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4444 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4445 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 4446 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 4447 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4448 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4449 } else { 4450 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4451 tC_CR = C_CR; 4452 } 4453 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 4454 PetscCall(MatDestroy(&tC_CR)); 4455 } 4456 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R)); 4457 if (F) { 4458 if (need_benign_correction) { 4459 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4460 4461 /* rhs is already zero on interior dofs, no need to change the rhs */ 4462 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 4463 } 4464 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 4465 if (need_benign_correction) { 4466 PetscScalar *marr; 4467 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4468 4469 /* XXX multi_element? */ 4470 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4471 if (lda_rhs != n_R) { 4472 for (i = 0; i < n_eff_constraints; i++) { 4473 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4474 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4475 PetscCall(VecResetArray(dummy_vec)); 4476 } 4477 } else { 4478 for (i = 0; i < n_eff_constraints; i++) { 4479 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4480 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4481 PetscCall(VecResetArray(pcbddc->vec1_R)); 4482 } 4483 } 4484 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4485 } 4486 } else { 4487 const PetscScalar *barr; 4488 PetscScalar *marr; 4489 4490 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4491 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4492 for (i = 0; i < n_eff_constraints; i++) { 4493 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4494 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4495 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4496 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4497 PetscCall(VecResetArray(pcbddc->vec1_R)); 4498 PetscCall(VecResetArray(pcbddc->vec2_R)); 4499 } 4500 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4501 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4502 } 4503 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 4504 PetscCall(MatDestroy(&Brhs)); 4505 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4506 if (!pcbddc->switch_static) { 4507 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2)); 4508 for (i = 0; i < n_eff_constraints; i++) { 4509 Vec r, b; 4510 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 4511 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 4512 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4513 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4514 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 4515 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 4516 } 4517 if (multi_element) { 4518 Mat T; 4519 4520 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4521 PetscCall(MatDestroy(&local_auxmat2_R)); 4522 local_auxmat2_R = T; 4523 PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T)); 4524 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4525 pcbddc->local_auxmat2 = T; 4526 } 4527 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4528 } else { 4529 if (multi_element) { 4530 Mat T; 4531 4532 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4533 PetscCall(MatDestroy(&local_auxmat2_R)); 4534 local_auxmat2_R = T; 4535 } 4536 if (lda_rhs != n_R) { 4537 PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 4538 } else { 4539 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4540 pcbddc->local_auxmat2 = local_auxmat2_R; 4541 } 4542 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4543 } 4544 PetscCall(MatScale(S_CC, m_one)); 4545 if (multi_element) { 4546 Mat T, T2; 4547 IS isp, ispi; 4548 4549 isp = is_C_perm; 4550 4551 PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi)); 4552 PetscCall(MatPermute(S_CC, isp, isp, &T)); 4553 PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2)); 4554 PetscCall(MatDestroy(&T)); 4555 PetscCall(MatDestroy(&S_CC)); 4556 PetscCall(MatPermute(T2, ispi, ispi, &S_CC)); 4557 PetscCall(MatDestroy(&T2)); 4558 PetscCall(ISDestroy(&ispi)); 4559 } else { 4560 if (isCHOL) { 4561 PetscCall(MatCholeskyFactor(S_CC, NULL, NULL)); 4562 } else { 4563 PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL)); 4564 } 4565 PetscCall(MatSeqDenseInvertFactors_Private(S_CC)); 4566 } 4567 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4568 PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1)); 4569 PetscCall(MatDestroy(&C_B)); 4570 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES)); 4571 } 4572 4573 /* Get submatrices from subdomain matrix */ 4574 if (n_vertices) { 4575 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4576 PetscBool oldpin; 4577 #endif 4578 IS is_aux; 4579 4580 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4581 IS tis; 4582 4583 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4584 PetscCall(ISSort(tis)); 4585 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4586 PetscCall(ISDestroy(&tis)); 4587 } else { 4588 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4589 } 4590 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4591 oldpin = pcbddc->local_mat->boundtocpu; 4592 #endif 4593 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4594 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4595 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4596 /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4597 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4598 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4599 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4600 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4601 #endif 4602 PetscCall(ISDestroy(&is_aux)); 4603 } 4604 PetscCall(ISDestroy(&is_C_perm)); 4605 PetscCall(PetscFree(C_bss)); 4606 4607 p0_lidx_I = NULL; 4608 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4609 const PetscInt *idxs; 4610 4611 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4612 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4613 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4614 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4615 } 4616 4617 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4618 4619 /* Matrices of coarse basis functions (local) */ 4620 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4621 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4622 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4623 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4624 if (!multi_element) { 4625 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B)); 4626 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D)); 4627 coarse_phi_multi = NULL; 4628 } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */ 4629 IS is_rows[2] = {pcbddc->is_R_local, NULL}; 4630 IS is_cols[2] = {is_V, is_C}; 4631 4632 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1])); 4633 PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi)); 4634 PetscCall(ISDestroy(&is_rows[1])); 4635 } 4636 4637 /* vertices */ 4638 if (n_vertices) { 4639 PetscBool restoreavr = PETSC_FALSE; 4640 Mat A_RRmA_RV = NULL; 4641 4642 PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4643 PetscCall(MatDestroy(&A_VV)); 4644 4645 if (n_R) { 4646 Mat A_RV_bcorr = NULL, S_VV; 4647 4648 PetscCall(MatScale(A_RV, m_one)); 4649 if (need_benign_correction) { 4650 ISLocalToGlobalMapping RtoN; 4651 IS is_p0; 4652 PetscInt *idxs_p0, n; 4653 4654 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4655 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4656 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4657 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4658 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4659 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4660 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4661 PetscCall(ISDestroy(&is_p0)); 4662 } 4663 4664 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV)); 4665 if (!sparserhs || need_benign_correction) { 4666 if (lda_rhs == n_R && !multi_element) { 4667 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4668 } else { 4669 Mat T; 4670 PetscScalar *av, *array; 4671 const PetscInt *xadj, *adjncy; 4672 PetscInt n; 4673 PetscBool flg_row; 4674 4675 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T)); 4676 PetscCall(MatDenseGetArrayWrite(T, &array)); 4677 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4678 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4679 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4680 for (i = 0; i < n; i++) { 4681 PetscInt j; 4682 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j]; 4683 } 4684 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4685 PetscCall(MatDenseRestoreArrayWrite(T, &array)); 4686 PetscCall(MatDestroy(&A_RV)); 4687 A_RV = T; 4688 } 4689 if (need_benign_correction) { 4690 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4691 PetscScalar *marr; 4692 4693 /* XXX multi_element */ 4694 PetscCall(MatDenseGetArray(A_RV, &marr)); 4695 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4696 4697 | 0 0 0 | (V) 4698 L = | 0 0 -1 | (P-p0) 4699 | 0 0 -1 | (p0) 4700 4701 */ 4702 for (i = 0; i < reuse_solver->benign_n; i++) { 4703 const PetscScalar *vals; 4704 const PetscInt *idxs, *idxs_zero; 4705 PetscInt n, j, nz; 4706 4707 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4708 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4709 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4710 for (j = 0; j < n; j++) { 4711 PetscScalar val = vals[j]; 4712 PetscInt k, col = idxs[j]; 4713 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4714 } 4715 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4716 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4717 } 4718 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4719 } 4720 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4721 Brhs = A_RV; 4722 } else { 4723 Mat tA_RVT, A_RVT; 4724 4725 if (!pcbddc->symmetric_primal) { 4726 /* A_RV already scaled by -1 */ 4727 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4728 } else { 4729 restoreavr = PETSC_TRUE; 4730 PetscCall(MatScale(A_VR, -1.0)); 4731 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4732 A_RVT = A_VR; 4733 } 4734 if (lda_rhs != n_R) { 4735 PetscScalar *aa; 4736 PetscInt r, *ii, *jj; 4737 PetscBool done; 4738 4739 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4740 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4741 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4742 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4743 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4744 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4745 } else { 4746 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4747 tA_RVT = A_RVT; 4748 } 4749 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4750 PetscCall(MatDestroy(&tA_RVT)); 4751 PetscCall(MatDestroy(&A_RVT)); 4752 } 4753 if (F) { 4754 /* need to correct the rhs */ 4755 if (need_benign_correction) { 4756 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4757 PetscScalar *marr; 4758 4759 PetscCall(MatDenseGetArray(Brhs, &marr)); 4760 if (lda_rhs != n_R) { 4761 for (i = 0; i < n_eff_vertices; i++) { 4762 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4763 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4764 PetscCall(VecResetArray(dummy_vec)); 4765 } 4766 } else { 4767 for (i = 0; i < n_eff_vertices; i++) { 4768 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4769 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4770 PetscCall(VecResetArray(pcbddc->vec1_R)); 4771 } 4772 } 4773 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4774 } 4775 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4776 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4777 /* need to correct the solution */ 4778 if (need_benign_correction) { 4779 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4780 PetscScalar *marr; 4781 4782 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4783 if (lda_rhs != n_R) { 4784 for (i = 0; i < n_eff_vertices; i++) { 4785 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4786 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4787 PetscCall(VecResetArray(dummy_vec)); 4788 } 4789 } else { 4790 for (i = 0; i < n_eff_vertices; i++) { 4791 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4792 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4793 PetscCall(VecResetArray(pcbddc->vec1_R)); 4794 } 4795 } 4796 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4797 } 4798 } else { 4799 const PetscScalar *barr; 4800 PetscScalar *marr; 4801 4802 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4803 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4804 for (i = 0; i < n_eff_vertices; i++) { 4805 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4806 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4807 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4808 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4809 PetscCall(VecResetArray(pcbddc->vec1_R)); 4810 PetscCall(VecResetArray(pcbddc->vec2_R)); 4811 } 4812 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4813 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4814 } 4815 PetscCall(MatDestroy(&A_RV)); 4816 PetscCall(MatDestroy(&Brhs)); 4817 /* S_VV and S_CV */ 4818 if (n_constraints) { 4819 Mat B; 4820 4821 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B)); 4822 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B)); 4823 4824 /* S_CV = pcbddc->local_auxmat1 * B */ 4825 if (multi_element) { 4826 Mat T; 4827 4828 PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T)); 4829 PetscCall(MatDestroy(&B)); 4830 B = T; 4831 } 4832 PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV)); 4833 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4834 PetscCall(MatProductSetFromOptions(S_CV)); 4835 PetscCall(MatProductSymbolic(S_CV)); 4836 PetscCall(MatProductNumeric(S_CV)); 4837 PetscCall(MatProductClear(S_CV)); 4838 PetscCall(MatDestroy(&B)); 4839 4840 /* B = local_auxmat2_R * S_CV */ 4841 PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B)); 4842 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4843 PetscCall(MatProductSetFromOptions(B)); 4844 PetscCall(MatProductSymbolic(B)); 4845 PetscCall(MatProductNumeric(B)); 4846 4847 PetscCall(MatScale(S_CV, m_one)); 4848 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES)); 4849 4850 if (multi_element) { 4851 Mat T; 4852 4853 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4854 PetscCall(MatDestroy(&A_RRmA_RV)); 4855 A_RRmA_RV = T; 4856 } 4857 PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */ 4858 PetscCall(MatDestroy(&B)); 4859 } else if (multi_element) { 4860 Mat T; 4861 4862 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4863 PetscCall(MatDestroy(&A_RRmA_RV)); 4864 A_RRmA_RV = T; 4865 } 4866 4867 if (lda_rhs != n_R) { 4868 Mat T; 4869 4870 PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T)); 4871 PetscCall(MatDestroy(&A_RRmA_RV)); 4872 A_RRmA_RV = T; 4873 } 4874 4875 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4876 if (need_benign_correction) { /* XXX SPARSE */ 4877 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4878 PetscScalar *sums; 4879 const PetscScalar *marr; 4880 4881 PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr)); 4882 PetscCall(PetscMalloc1(n_vertices, &sums)); 4883 for (i = 0; i < reuse_solver->benign_n; i++) { 4884 const PetscScalar *vals; 4885 const PetscInt *idxs, *idxs_zero; 4886 PetscInt n, j, nz; 4887 4888 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4889 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4890 for (j = 0; j < n_vertices; j++) { 4891 sums[j] = 0.; 4892 for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R]; 4893 } 4894 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4895 for (j = 0; j < n; j++) { 4896 PetscScalar val = vals[j]; 4897 for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES)); 4898 } 4899 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4900 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4901 } 4902 PetscCall(PetscFree(sums)); 4903 PetscCall(MatDestroy(&A_RV_bcorr)); 4904 PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr)); 4905 } 4906 4907 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV)); 4908 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4909 PetscCall(MatDestroy(&S_VV)); 4910 } 4911 4912 /* coarse basis functions */ 4913 if (coarse_phi_multi) { 4914 Mat Vid; 4915 4916 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid)); 4917 PetscCall(MatShift_Basic(Vid, 1.0)); 4918 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV)); 4919 PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid)); 4920 PetscCall(MatDestroy(&Vid)); 4921 } else { 4922 if (A_RRmA_RV) { 4923 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B)); 4924 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4925 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D)); 4926 if (pcbddc->benign_n) { 4927 for (i = 0; i < n_vertices; i++) { PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); } 4928 } 4929 } 4930 } 4931 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES)); 4932 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 4933 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 4934 } 4935 PetscCall(MatDestroy(&A_RRmA_RV)); 4936 } 4937 PetscCall(MatDestroy(&A_RV)); 4938 PetscCall(VecDestroy(&dummy_vec)); 4939 4940 if (n_constraints) { 4941 Mat B, B2; 4942 4943 PetscCall(MatScale(S_CC, m_one)); 4944 PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B)); 4945 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4946 PetscCall(MatProductSetFromOptions(B)); 4947 PetscCall(MatProductSymbolic(B)); 4948 PetscCall(MatProductNumeric(B)); 4949 4950 if (n_vertices) { 4951 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4952 PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC)); 4953 } else { 4954 if (lda_rhs != n_R) { 4955 Mat tB; 4956 4957 PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB)); 4958 PetscCall(MatDestroy(&B)); 4959 B = tB; 4960 } 4961 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC)); 4962 } 4963 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES)); 4964 } 4965 4966 /* coarse basis functions */ 4967 if (coarse_phi_multi) { 4968 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B)); 4969 } else { 4970 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 4971 PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2)); 4972 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2)); 4973 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4974 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 4975 PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2)); 4976 if (pcbddc->benign_n) { 4977 for (i = 0; i < n_constraints; i++) { PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); } 4978 } 4979 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2)); 4980 } 4981 } 4982 PetscCall(MatDestroy(&B)); 4983 } 4984 4985 /* assemble sparse coarse basis functions */ 4986 if (coarse_phi_multi) { 4987 Mat T; 4988 4989 PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T)); 4990 PetscCall(MatDestroy(&coarse_phi_multi)); 4991 PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B)); 4992 if (pcbddc->switch_static || pcbddc->dbg_flag) { PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); } 4993 PetscCall(MatDestroy(&T)); 4994 } 4995 PetscCall(MatDestroy(&local_auxmat2_R)); 4996 PetscCall(PetscFree(p0_lidx_I)); 4997 4998 /* coarse matrix entries relative to B_0 */ 4999 if (pcbddc->benign_n) { 5000 Mat B0_B, B0_BPHI; 5001 IS is_dummy; 5002 const PetscScalar *data; 5003 PetscInt j; 5004 5005 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5006 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5007 PetscCall(ISDestroy(&is_dummy)); 5008 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5009 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5010 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 5011 for (j = 0; j < pcbddc->benign_n; j++) { 5012 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5013 for (i = 0; i < pcbddc->local_primal_size; i++) { 5014 PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5015 PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5016 } 5017 } 5018 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 5019 PetscCall(MatDestroy(&B0_B)); 5020 PetscCall(MatDestroy(&B0_BPHI)); 5021 } 5022 5023 /* compute other basis functions for non-symmetric problems */ 5024 if (!pcbddc->symmetric_primal) { 5025 Mat B_V = NULL, B_C = NULL; 5026 PetscScalar *marray, *work; 5027 5028 /* TODO multi_element MatDenseScatter */ 5029 if (n_constraints) { 5030 Mat S_CCT, C_CRT; 5031 5032 PetscCall(MatScale(S_CC, m_one)); 5033 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 5034 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 5035 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C)); 5036 PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C)); 5037 PetscCall(MatDestroy(&S_CCT)); 5038 if (n_vertices) { 5039 Mat S_VCT; 5040 5041 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 5042 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V)); 5043 PetscCall(MatDestroy(&S_VCT)); 5044 PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V)); 5045 } 5046 PetscCall(MatDestroy(&C_CRT)); 5047 } else { 5048 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 5049 } 5050 if (n_vertices && n_R) { 5051 PetscScalar *av, *marray; 5052 const PetscInt *xadj, *adjncy; 5053 PetscInt n; 5054 PetscBool flg_row; 5055 5056 /* B_V = B_V - A_VR^T */ 5057 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 5058 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5059 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 5060 PetscCall(MatDenseGetArray(B_V, &marray)); 5061 for (i = 0; i < n; i++) { 5062 PetscInt j; 5063 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 5064 } 5065 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5066 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5067 PetscCall(MatDestroy(&A_VR)); 5068 } 5069 5070 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 5071 PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work)); 5072 if (n_vertices) { 5073 PetscCall(MatDenseGetArray(B_V, &marray)); 5074 for (i = 0; i < n_vertices; i++) { 5075 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 5076 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5077 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5078 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5079 PetscCall(VecResetArray(pcbddc->vec1_R)); 5080 PetscCall(VecResetArray(pcbddc->vec2_R)); 5081 } 5082 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5083 } 5084 if (B_C) { 5085 PetscCall(MatDenseGetArray(B_C, &marray)); 5086 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 5087 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 5088 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5089 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5090 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5091 PetscCall(VecResetArray(pcbddc->vec1_R)); 5092 PetscCall(VecResetArray(pcbddc->vec2_R)); 5093 } 5094 PetscCall(MatDenseRestoreArray(B_C, &marray)); 5095 } 5096 /* coarse basis functions */ 5097 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B)); 5098 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D)); 5099 for (i = 0; i < pcbddc->local_primal_size; i++) { 5100 Vec v; 5101 5102 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 5103 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 5104 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5105 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5106 if (i < n_vertices) { 5107 PetscScalar one = 1.0; 5108 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 5109 PetscCall(VecAssemblyBegin(v)); 5110 PetscCall(VecAssemblyEnd(v)); 5111 } 5112 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 5113 5114 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5115 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 5116 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5117 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5118 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 5119 } 5120 PetscCall(VecResetArray(pcbddc->vec1_R)); 5121 } 5122 PetscCall(MatDestroy(&B_V)); 5123 PetscCall(MatDestroy(&B_C)); 5124 PetscCall(PetscFree(work)); 5125 } else { 5126 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 5127 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 5128 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 5129 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 5130 } 5131 PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5132 PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5133 5134 /* free memory */ 5135 PetscCall(PetscFree(V_to_eff_V)); 5136 PetscCall(PetscFree(C_to_eff_C)); 5137 PetscCall(PetscFree(R_eff_V_J)); 5138 PetscCall(PetscFree(R_eff_C_J)); 5139 PetscCall(PetscFree(B_eff_V_J)); 5140 PetscCall(PetscFree(B_eff_C_J)); 5141 PetscCall(ISDestroy(&is_R)); 5142 PetscCall(ISRestoreIndices(is_V, &idx_V)); 5143 PetscCall(ISRestoreIndices(is_C, &idx_C)); 5144 PetscCall(ISDestroy(&is_V)); 5145 PetscCall(ISDestroy(&is_C)); 5146 PetscCall(PetscFree(idx_V_B)); 5147 PetscCall(MatDestroy(&S_CV)); 5148 PetscCall(MatDestroy(&S_VC)); 5149 PetscCall(MatDestroy(&S_CC)); 5150 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 5151 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 5152 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 5153 5154 /* Checking coarse_sub_mat and coarse basis functions */ 5155 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5156 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5157 if (pcbddc->dbg_flag) { 5158 Mat AUXMAT, TM1, TM2, TM3, TM4; 5159 Mat coarse_phi_D, coarse_phi_B; 5160 Mat coarse_psi_D, coarse_psi_B; 5161 Mat A_II, A_BB, A_IB, A_BI; 5162 Mat C_B, CPHI; 5163 IS is_dummy; 5164 Vec mones; 5165 MatType checkmattype = MATSEQAIJ; 5166 PetscReal real_value; 5167 5168 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5169 Mat A; 5170 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 5171 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 5172 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 5173 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 5174 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 5175 PetscCall(MatDestroy(&A)); 5176 } else { 5177 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 5178 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 5179 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 5180 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 5181 } 5182 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 5183 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 5184 if (!pcbddc->symmetric_primal) { 5185 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 5186 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 5187 } 5188 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5189 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 5190 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5191 if (!pcbddc->symmetric_primal) { 5192 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5193 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5194 PetscCall(MatDestroy(&AUXMAT)); 5195 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5196 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5197 PetscCall(MatDestroy(&AUXMAT)); 5198 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5199 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5200 PetscCall(MatDestroy(&AUXMAT)); 5201 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5202 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5203 PetscCall(MatDestroy(&AUXMAT)); 5204 } else { 5205 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5206 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5207 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5208 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5209 PetscCall(MatDestroy(&AUXMAT)); 5210 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5211 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5212 PetscCall(MatDestroy(&AUXMAT)); 5213 } 5214 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 5215 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 5216 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 5217 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 5218 if (pcbddc->benign_n) { 5219 Mat B0_B, B0_BPHI; 5220 const PetscScalar *data2; 5221 PetscScalar *data; 5222 PetscInt j; 5223 5224 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5225 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5226 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5227 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5228 PetscCall(MatDenseGetArray(TM1, &data)); 5229 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 5230 for (j = 0; j < pcbddc->benign_n; j++) { 5231 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5232 for (i = 0; i < pcbddc->local_primal_size; i++) { 5233 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 5234 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 5235 } 5236 } 5237 PetscCall(MatDenseRestoreArray(TM1, &data)); 5238 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 5239 PetscCall(MatDestroy(&B0_B)); 5240 PetscCall(ISDestroy(&is_dummy)); 5241 PetscCall(MatDestroy(&B0_BPHI)); 5242 } 5243 PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN)); 5244 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 5245 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5246 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5247 5248 /* check constraints */ 5249 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 5250 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 5251 if (!pcbddc->benign_n) { /* TODO: add benign case */ 5252 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5253 } else { 5254 PetscScalar *data; 5255 Mat tmat; 5256 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 5257 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 5258 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 5259 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5260 PetscCall(MatDestroy(&tmat)); 5261 } 5262 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 5263 PetscCall(VecSet(mones, -1.0)); 5264 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5265 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5266 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5267 if (!pcbddc->symmetric_primal) { 5268 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 5269 PetscCall(VecSet(mones, -1.0)); 5270 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5271 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5272 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5273 } 5274 PetscCall(MatDestroy(&C_B)); 5275 PetscCall(MatDestroy(&CPHI)); 5276 PetscCall(ISDestroy(&is_dummy)); 5277 PetscCall(VecDestroy(&mones)); 5278 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5279 PetscCall(MatDestroy(&A_II)); 5280 PetscCall(MatDestroy(&A_BB)); 5281 PetscCall(MatDestroy(&A_IB)); 5282 PetscCall(MatDestroy(&A_BI)); 5283 PetscCall(MatDestroy(&TM1)); 5284 PetscCall(MatDestroy(&TM2)); 5285 PetscCall(MatDestroy(&TM3)); 5286 PetscCall(MatDestroy(&TM4)); 5287 PetscCall(MatDestroy(&coarse_phi_D)); 5288 PetscCall(MatDestroy(&coarse_phi_B)); 5289 if (!pcbddc->symmetric_primal) { 5290 PetscCall(MatDestroy(&coarse_psi_D)); 5291 PetscCall(MatDestroy(&coarse_psi_B)); 5292 } 5293 } 5294 5295 #if 0 5296 { 5297 PetscViewer viewer; 5298 char filename[256]; 5299 5300 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 5301 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 5302 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 5303 PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat")); 5304 PetscCall(MatView(*coarse_submat,viewer)); 5305 if (pcbddc->coarse_phi_B) { 5306 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 5307 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 5308 } 5309 if (pcbddc->coarse_phi_D) { 5310 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 5311 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 5312 } 5313 if (pcbddc->coarse_psi_B) { 5314 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 5315 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 5316 } 5317 if (pcbddc->coarse_psi_D) { 5318 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 5319 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 5320 } 5321 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 5322 PetscCall(MatView(pcbddc->local_mat,viewer)); 5323 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 5324 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 5325 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 5326 PetscCall(ISView(pcis->is_I_local,viewer)); 5327 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 5328 PetscCall(ISView(pcis->is_B_local,viewer)); 5329 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 5330 PetscCall(ISView(pcbddc->is_R_local,viewer)); 5331 PetscCall(PetscViewerDestroy(&viewer)); 5332 } 5333 #endif 5334 5335 /* device support */ 5336 { 5337 PetscBool iscuda, iship, iskokkos; 5338 MatType mtype = NULL; 5339 5340 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, "")); 5341 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, "")); 5342 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, "")); 5343 if (iskokkos) { 5344 if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE; 5345 else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE; 5346 } 5347 if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP); 5348 else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP; 5349 else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA; 5350 if (mtype) { 5351 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 5352 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 5353 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 5354 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 5355 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 5356 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 5357 } 5358 } 5359 PetscFunctionReturn(PETSC_SUCCESS); 5360 } 5361 5362 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 5363 { 5364 Mat *work_mat; 5365 IS isrow_s, iscol_s; 5366 PetscBool rsorted, csorted; 5367 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 5368 5369 PetscFunctionBegin; 5370 PetscCall(ISSorted(isrow, &rsorted)); 5371 PetscCall(ISSorted(iscol, &csorted)); 5372 PetscCall(ISGetLocalSize(isrow, &rsize)); 5373 PetscCall(ISGetLocalSize(iscol, &csize)); 5374 5375 if (!rsorted) { 5376 const PetscInt *idxs; 5377 PetscInt *idxs_sorted, i; 5378 5379 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 5380 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 5381 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 5382 PetscCall(ISGetIndices(isrow, &idxs)); 5383 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 5384 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 5385 PetscCall(ISRestoreIndices(isrow, &idxs)); 5386 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 5387 } else { 5388 PetscCall(PetscObjectReference((PetscObject)isrow)); 5389 isrow_s = isrow; 5390 } 5391 5392 if (!csorted) { 5393 if (isrow == iscol) { 5394 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 5395 iscol_s = isrow_s; 5396 } else { 5397 const PetscInt *idxs; 5398 PetscInt *idxs_sorted, i; 5399 5400 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 5401 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 5402 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 5403 PetscCall(ISGetIndices(iscol, &idxs)); 5404 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 5405 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5406 PetscCall(ISRestoreIndices(iscol, &idxs)); 5407 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 5408 } 5409 } else { 5410 PetscCall(PetscObjectReference((PetscObject)iscol)); 5411 iscol_s = iscol; 5412 } 5413 5414 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 5415 5416 if (!rsorted || !csorted) { 5417 Mat new_mat; 5418 IS is_perm_r, is_perm_c; 5419 5420 if (!rsorted) { 5421 PetscInt *idxs_r, i; 5422 PetscCall(PetscMalloc1(rsize, &idxs_r)); 5423 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 5424 PetscCall(PetscFree(idxs_perm_r)); 5425 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 5426 } else { 5427 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 5428 } 5429 PetscCall(ISSetPermutation(is_perm_r)); 5430 5431 if (!csorted) { 5432 if (isrow_s == iscol_s) { 5433 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5434 is_perm_c = is_perm_r; 5435 } else { 5436 PetscInt *idxs_c, i; 5437 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 5438 PetscCall(PetscMalloc1(csize, &idxs_c)); 5439 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 5440 PetscCall(PetscFree(idxs_perm_c)); 5441 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 5442 } 5443 } else { 5444 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 5445 } 5446 PetscCall(ISSetPermutation(is_perm_c)); 5447 5448 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 5449 PetscCall(MatDestroy(&work_mat[0])); 5450 work_mat[0] = new_mat; 5451 PetscCall(ISDestroy(&is_perm_r)); 5452 PetscCall(ISDestroy(&is_perm_c)); 5453 } 5454 5455 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5456 *B = work_mat[0]; 5457 PetscCall(MatDestroyMatrices(1, &work_mat)); 5458 PetscCall(ISDestroy(&isrow_s)); 5459 PetscCall(ISDestroy(&iscol_s)); 5460 PetscFunctionReturn(PETSC_SUCCESS); 5461 } 5462 5463 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5464 { 5465 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5466 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5467 Mat new_mat, lA; 5468 IS is_local, is_global; 5469 PetscInt local_size; 5470 PetscBool isseqaij, issym, isset; 5471 5472 PetscFunctionBegin; 5473 PetscCall(MatDestroy(&pcbddc->local_mat)); 5474 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 5475 if (pcbddc->mat_graph->multi_element) { 5476 Mat *mats, *bdiags; 5477 IS *gsubs; 5478 PetscInt nsubs = pcbddc->n_local_subs; 5479 5480 PetscCall(PetscCalloc1(nsubs * nsubs, &mats)); 5481 PetscCall(PetscMalloc1(nsubs, &gsubs)); 5482 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i])); 5483 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags)); 5484 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i])); 5485 PetscCall(PetscFree(gsubs)); 5486 5487 for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i]; 5488 PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat)); 5489 PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat)); 5490 PetscCall(MatDestroySubMatrices(nsubs, &bdiags)); 5491 PetscCall(PetscFree(mats)); 5492 } else { 5493 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5494 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5495 PetscCall(ISDestroy(&is_local)); 5496 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 5497 PetscCall(ISDestroy(&is_global)); 5498 } 5499 if (pcbddc->dbg_flag) { 5500 Vec x, x_change; 5501 PetscReal error; 5502 5503 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 5504 PetscCall(VecSetRandom(x, NULL)); 5505 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 5506 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5507 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5508 PetscCall(MatMult(new_mat, matis->x, matis->y)); 5509 if (!pcbddc->change_interior) { 5510 const PetscScalar *x, *y, *v; 5511 PetscReal lerror = 0.; 5512 PetscInt i; 5513 5514 PetscCall(VecGetArrayRead(matis->x, &x)); 5515 PetscCall(VecGetArrayRead(matis->y, &y)); 5516 PetscCall(VecGetArrayRead(matis->counter, &v)); 5517 for (i = 0; i < local_size; i++) 5518 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 5519 PetscCall(VecRestoreArrayRead(matis->x, &x)); 5520 PetscCall(VecRestoreArrayRead(matis->y, &y)); 5521 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 5522 PetscCall(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 5523 if (error > PETSC_SMALL) { 5524 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5525 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 5526 } else { 5527 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 5528 } 5529 } 5530 } 5531 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5532 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5533 PetscCall(VecAXPY(x, -1.0, x_change)); 5534 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 5535 if (error > PETSC_SMALL) { 5536 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5537 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 5538 } else { 5539 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 5540 } 5541 } 5542 PetscCall(VecDestroy(&x)); 5543 PetscCall(VecDestroy(&x_change)); 5544 } 5545 5546 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5547 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 5548 5549 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5550 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 5551 if (isseqaij) { 5552 PetscCall(MatDestroy(&pcbddc->local_mat)); 5553 PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5554 if (lA) { 5555 Mat work; 5556 PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5557 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5558 PetscCall(MatDestroy(&work)); 5559 } 5560 } else { 5561 Mat work_mat; 5562 5563 PetscCall(MatDestroy(&pcbddc->local_mat)); 5564 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5565 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5566 PetscCall(MatDestroy(&work_mat)); 5567 if (lA) { 5568 Mat work; 5569 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5570 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5571 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5572 PetscCall(MatDestroy(&work)); 5573 } 5574 } 5575 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 5576 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 5577 PetscCall(MatDestroy(&new_mat)); 5578 PetscFunctionReturn(PETSC_SUCCESS); 5579 } 5580 5581 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5582 { 5583 PC_IS *pcis = (PC_IS *)pc->data; 5584 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5585 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5586 PetscInt *idx_R_local = NULL; 5587 PetscInt n_vertices, i, j, n_R, n_D, n_B; 5588 PetscInt vbs, bs; 5589 PetscBT bitmask = NULL; 5590 5591 PetscFunctionBegin; 5592 /* 5593 No need to setup local scatters if 5594 - primal space is unchanged 5595 AND 5596 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5597 AND 5598 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5599 */ 5600 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 5601 /* destroy old objects */ 5602 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5603 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5604 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5605 /* Set Non-overlapping dimensions */ 5606 n_B = pcis->n_B; 5607 n_D = pcis->n - n_B; 5608 n_vertices = pcbddc->n_vertices; 5609 5610 /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5611 5612 /* create auxiliary bitmask and allocate workspace */ 5613 if (!sub_schurs || !sub_schurs->reuse_solver) { 5614 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5615 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5616 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5617 5618 for (i = 0, n_R = 0; i < pcis->n; i++) { 5619 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5620 } 5621 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5622 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5623 5624 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5625 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5626 } 5627 5628 /* Block code */ 5629 vbs = 1; 5630 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5631 if (bs > 1 && !(n_vertices % bs)) { 5632 PetscBool is_blocked = PETSC_TRUE; 5633 PetscInt *vary; 5634 if (!sub_schurs || !sub_schurs->reuse_solver) { 5635 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5636 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5637 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5638 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5639 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5640 for (i = 0; i < pcis->n / bs; i++) { 5641 if (vary[i] != 0 && vary[i] != bs) { 5642 is_blocked = PETSC_FALSE; 5643 break; 5644 } 5645 } 5646 PetscCall(PetscFree(vary)); 5647 } else { 5648 /* Verify directly the R set */ 5649 for (i = 0; i < n_R / bs; i++) { 5650 PetscInt j, node = idx_R_local[bs * i]; 5651 for (j = 1; j < bs; j++) { 5652 if (node != idx_R_local[bs * i + j] - j) { 5653 is_blocked = PETSC_FALSE; 5654 break; 5655 } 5656 } 5657 } 5658 } 5659 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5660 vbs = bs; 5661 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5662 } 5663 } 5664 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5665 if (sub_schurs && sub_schurs->reuse_solver) { 5666 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5667 5668 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5669 PetscCall(ISDestroy(&reuse_solver->is_R)); 5670 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5671 reuse_solver->is_R = pcbddc->is_R_local; 5672 } else { 5673 PetscCall(PetscFree(idx_R_local)); 5674 } 5675 5676 /* print some info if requested */ 5677 if (pcbddc->dbg_flag) { 5678 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5679 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5680 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5681 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5682 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5683 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5684 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5685 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5686 } 5687 5688 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5689 if (!sub_schurs || !sub_schurs->reuse_solver) { 5690 IS is_aux1, is_aux2; 5691 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5692 5693 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5694 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5695 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5696 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5697 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5698 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5699 for (i = 0, j = 0; i < n_R; i++) { 5700 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5701 } 5702 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5703 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5704 for (i = 0, j = 0; i < n_B; i++) { 5705 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5706 } 5707 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5708 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5709 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5710 PetscCall(ISDestroy(&is_aux1)); 5711 PetscCall(ISDestroy(&is_aux2)); 5712 5713 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5714 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5715 for (i = 0, j = 0; i < n_R; i++) { 5716 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5717 } 5718 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5719 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5720 PetscCall(ISDestroy(&is_aux1)); 5721 } 5722 PetscCall(PetscBTDestroy(&bitmask)); 5723 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5724 } else { 5725 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5726 IS tis; 5727 PetscInt schur_size; 5728 5729 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5730 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5731 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5732 PetscCall(ISDestroy(&tis)); 5733 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5734 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5735 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5736 PetscCall(ISDestroy(&tis)); 5737 } 5738 } 5739 PetscFunctionReturn(PETSC_SUCCESS); 5740 } 5741 5742 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5743 { 5744 MatNullSpace NullSpace; 5745 Mat dmat; 5746 const Vec *nullvecs; 5747 Vec v, v2, *nullvecs2; 5748 VecScatter sct = NULL; 5749 PetscContainer c; 5750 PetscScalar *ddata; 5751 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5752 PetscBool nnsp_has_cnst; 5753 5754 PetscFunctionBegin; 5755 if (!is && !B) { /* MATIS */ 5756 Mat_IS *matis = (Mat_IS *)A->data; 5757 5758 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5759 sct = matis->cctx; 5760 PetscCall(PetscObjectReference((PetscObject)sct)); 5761 } else { 5762 PetscCall(MatGetNullSpace(B, &NullSpace)); 5763 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5764 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5765 } 5766 PetscCall(MatGetNullSpace(A, &NullSpace)); 5767 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5768 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5769 5770 PetscCall(MatCreateVecs(A, &v, NULL)); 5771 PetscCall(MatCreateVecs(B, &v2, NULL)); 5772 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5773 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs)); 5774 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5775 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5776 PetscCall(VecGetBlockSize(v2, &bs)); 5777 PetscCall(VecGetSize(v2, &N)); 5778 PetscCall(VecGetLocalSize(v2, &n)); 5779 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5780 for (k = 0; k < nnsp_size; k++) { 5781 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5782 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5783 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5784 } 5785 if (nnsp_has_cnst) { 5786 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5787 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5788 } 5789 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5790 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5791 5792 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5793 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c)); 5794 PetscCall(PetscContainerSetPointer(c, ddata)); 5795 PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault)); 5796 PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c)); 5797 PetscCall(PetscContainerDestroy(&c)); 5798 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5799 PetscCall(MatDestroy(&dmat)); 5800 5801 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5802 PetscCall(PetscFree(nullvecs2)); 5803 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5804 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5805 PetscCall(VecDestroy(&v)); 5806 PetscCall(VecDestroy(&v2)); 5807 PetscCall(VecScatterDestroy(&sct)); 5808 PetscFunctionReturn(PETSC_SUCCESS); 5809 } 5810 5811 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5812 { 5813 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5814 PC_IS *pcis = (PC_IS *)pc->data; 5815 PC pc_temp; 5816 Mat A_RR; 5817 MatNullSpace nnsp; 5818 MatReuse reuse; 5819 PetscScalar m_one = -1.0; 5820 PetscReal value; 5821 PetscInt n_D, n_R; 5822 PetscBool issbaij, opts, isset, issym; 5823 void (*f)(void) = NULL; 5824 char dir_prefix[256], neu_prefix[256], str_level[16]; 5825 size_t len; 5826 5827 PetscFunctionBegin; 5828 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5829 /* approximate solver, propagate NearNullSpace if needed */ 5830 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5831 MatNullSpace gnnsp1, gnnsp2; 5832 PetscBool lhas, ghas; 5833 5834 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5835 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5836 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5837 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5838 PetscCall(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5839 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5840 } 5841 5842 /* compute prefixes */ 5843 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5844 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5845 if (!pcbddc->current_level) { 5846 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5847 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5848 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5849 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5850 } else { 5851 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)pcbddc->current_level)); 5852 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5853 len -= 15; /* remove "pc_bddc_coarse_" */ 5854 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5855 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5856 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5857 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5858 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5859 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5860 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5861 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5862 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5863 } 5864 5865 /* DIRICHLET PROBLEM */ 5866 if (dirichlet) { 5867 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5868 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5869 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5870 if (pcbddc->dbg_flag) { 5871 Mat A_IIn; 5872 5873 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5874 PetscCall(MatDestroy(&pcis->A_II)); 5875 pcis->A_II = A_IIn; 5876 } 5877 } 5878 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5879 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 5880 5881 /* Matrix for Dirichlet problem is pcis->A_II */ 5882 n_D = pcis->n - pcis->n_B; 5883 opts = PETSC_FALSE; 5884 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5885 opts = PETSC_TRUE; 5886 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 5887 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel)); 5888 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 5889 /* default */ 5890 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 5891 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 5892 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 5893 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5894 if (issbaij) { 5895 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5896 } else { 5897 PetscCall(PCSetType(pc_temp, PCLU)); 5898 } 5899 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 5900 } 5901 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 5902 PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view")); 5903 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 5904 /* Allow user's customization */ 5905 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5906 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5907 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5908 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 5909 } 5910 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5911 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5912 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5913 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5914 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5915 const PetscInt *idxs; 5916 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5917 5918 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 5919 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 5920 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5921 for (i = 0; i < nl; i++) { 5922 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5923 } 5924 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 5925 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5926 PetscCall(PetscFree(scoords)); 5927 } 5928 if (sub_schurs && sub_schurs->reuse_solver) { 5929 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5930 5931 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 5932 } 5933 5934 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5935 if (!n_D) { 5936 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5937 PetscCall(PCSetType(pc_temp, PCNONE)); 5938 } 5939 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5940 /* set ksp_D into pcis data */ 5941 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5942 PetscCall(KSPDestroy(&pcis->ksp_D)); 5943 pcis->ksp_D = pcbddc->ksp_D; 5944 } 5945 5946 /* NEUMANN PROBLEM */ 5947 A_RR = NULL; 5948 if (neumann) { 5949 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5950 PetscInt ibs, mbs; 5951 PetscBool issbaij, reuse_neumann_solver, isset, issym; 5952 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5953 5954 reuse_neumann_solver = PETSC_FALSE; 5955 if (sub_schurs && sub_schurs->reuse_solver) { 5956 IS iP; 5957 5958 reuse_neumann_solver = PETSC_TRUE; 5959 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 5960 if (iP) reuse_neumann_solver = PETSC_FALSE; 5961 } 5962 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5963 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 5964 if (pcbddc->ksp_R) { /* already created ksp */ 5965 PetscInt nn_R; 5966 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 5967 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5968 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 5969 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5970 PetscCall(KSPReset(pcbddc->ksp_R)); 5971 PetscCall(MatDestroy(&A_RR)); 5972 reuse = MAT_INITIAL_MATRIX; 5973 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5974 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5975 PetscCall(MatDestroy(&A_RR)); 5976 reuse = MAT_INITIAL_MATRIX; 5977 } else { /* safe to reuse the matrix */ 5978 reuse = MAT_REUSE_MATRIX; 5979 } 5980 } 5981 /* last check */ 5982 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5983 PetscCall(MatDestroy(&A_RR)); 5984 reuse = MAT_INITIAL_MATRIX; 5985 } 5986 } else { /* first time, so we need to create the matrix */ 5987 reuse = MAT_INITIAL_MATRIX; 5988 } 5989 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5990 TODO: Get Rid of these conversions */ 5991 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 5992 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 5993 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 5994 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5995 if (matis->A == pcbddc->local_mat) { 5996 PetscCall(MatDestroy(&pcbddc->local_mat)); 5997 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5998 } else { 5999 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6000 } 6001 } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */ 6002 if (matis->A == pcbddc->local_mat) { 6003 PetscCall(MatDestroy(&pcbddc->local_mat)); 6004 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 6005 } else { 6006 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6007 } 6008 } 6009 /* extract A_RR */ 6010 if (reuse_neumann_solver) { 6011 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6012 6013 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 6014 PetscCall(MatDestroy(&A_RR)); 6015 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 6016 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 6017 } else { 6018 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 6019 } 6020 } else { 6021 PetscCall(MatDestroy(&A_RR)); 6022 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 6023 PetscCall(PetscObjectReference((PetscObject)A_RR)); 6024 } 6025 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 6026 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 6027 } 6028 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 6029 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 6030 opts = PETSC_FALSE; 6031 if (!pcbddc->ksp_R) { /* create object if not present */ 6032 opts = PETSC_TRUE; 6033 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 6034 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel)); 6035 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 6036 /* default */ 6037 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 6038 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 6039 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6040 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 6041 if (issbaij) { 6042 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 6043 } else { 6044 PetscCall(PCSetType(pc_temp, PCLU)); 6045 } 6046 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 6047 } 6048 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 6049 PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view")); 6050 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 6051 if (opts) { /* Allow user's customization once */ 6052 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 6053 } 6054 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6055 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 6056 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 6057 } 6058 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6059 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6060 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 6061 if (f && pcbddc->mat_graph->cloc && !nnsp) { 6062 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 6063 const PetscInt *idxs; 6064 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 6065 6066 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 6067 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 6068 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 6069 for (i = 0; i < nl; i++) { 6070 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 6071 } 6072 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 6073 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 6074 PetscCall(PetscFree(scoords)); 6075 } 6076 6077 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 6078 if (!n_R) { 6079 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6080 PetscCall(PCSetType(pc_temp, PCNONE)); 6081 } 6082 /* Reuse solver if it is present */ 6083 if (reuse_neumann_solver) { 6084 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6085 6086 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 6087 } 6088 PetscCall(KSPSetUp(pcbddc->ksp_R)); 6089 } 6090 6091 if (pcbddc->dbg_flag) { 6092 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6093 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6094 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 6095 } 6096 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 6097 6098 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 6099 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 6100 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 6101 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 6102 /* check Dirichlet and Neumann solvers */ 6103 if (pcbddc->dbg_flag) { 6104 if (dirichlet) { /* Dirichlet */ 6105 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 6106 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 6107 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 6108 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 6109 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 6110 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 6111 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value)); 6112 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6113 } 6114 if (neumann) { /* Neumann */ 6115 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 6116 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 6117 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 6118 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 6119 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 6120 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 6121 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value)); 6122 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6123 } 6124 } 6125 /* free Neumann problem's matrix */ 6126 PetscCall(MatDestroy(&A_RR)); 6127 PetscFunctionReturn(PETSC_SUCCESS); 6128 } 6129 6130 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 6131 { 6132 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6133 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6134 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 6135 6136 PetscFunctionBegin; 6137 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 6138 if (!pcbddc->switch_static) { 6139 if (applytranspose && pcbddc->local_auxmat1) { 6140 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 6141 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6142 } 6143 if (!reuse_solver) { 6144 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6145 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6146 } else { 6147 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6148 6149 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6150 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6151 } 6152 } else { 6153 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6154 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6155 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6156 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6157 if (applytranspose && pcbddc->local_auxmat1) { 6158 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 6159 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6160 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6161 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6162 } 6163 } 6164 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6165 if (!reuse_solver || pcbddc->switch_static) { 6166 if (applytranspose) { 6167 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6168 } else { 6169 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6170 } 6171 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 6172 } else { 6173 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6174 6175 if (applytranspose) { 6176 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6177 } else { 6178 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6179 } 6180 } 6181 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6182 PetscCall(VecSet(inout_B, 0.)); 6183 if (!pcbddc->switch_static) { 6184 if (!reuse_solver) { 6185 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6186 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6187 } else { 6188 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6189 6190 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6191 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6192 } 6193 if (!applytranspose && pcbddc->local_auxmat1) { 6194 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6195 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 6196 } 6197 } else { 6198 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6199 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6200 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6201 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6202 if (!applytranspose && pcbddc->local_auxmat1) { 6203 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6204 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 6205 } 6206 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6207 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6208 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6209 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6210 } 6211 PetscFunctionReturn(PETSC_SUCCESS); 6212 } 6213 6214 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 6215 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 6216 { 6217 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6218 PC_IS *pcis = (PC_IS *)pc->data; 6219 const PetscScalar zero = 0.0; 6220 6221 PetscFunctionBegin; 6222 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 6223 if (!pcbddc->benign_apply_coarse_only) { 6224 if (applytranspose) { 6225 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 6226 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6227 } else { 6228 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 6229 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6230 } 6231 } else { 6232 PetscCall(VecSet(pcbddc->vec1_P, zero)); 6233 } 6234 6235 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 6236 if (pcbddc->benign_n) { 6237 PetscScalar *array; 6238 PetscInt j; 6239 6240 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6241 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 6242 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6243 } 6244 6245 /* start communications from local primal nodes to rhs of coarse solver */ 6246 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 6247 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 6248 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 6249 6250 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 6251 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6252 if (pcbddc->coarse_ksp) { 6253 Mat coarse_mat; 6254 Vec rhs, sol; 6255 MatNullSpace nullsp; 6256 PetscBool isbddc = PETSC_FALSE; 6257 6258 if (pcbddc->benign_have_null) { 6259 PC coarse_pc; 6260 6261 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6262 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 6263 /* we need to propagate to coarser levels the need for a possible benign correction */ 6264 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 6265 PC_BDDC *coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6266 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 6267 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 6268 } 6269 } 6270 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 6271 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 6272 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 6273 if (applytranspose) { 6274 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 6275 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 6276 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6277 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 6278 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6279 } else { 6280 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 6281 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 6282 PC coarse_pc; 6283 6284 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 6285 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6286 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 6287 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 6288 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 6289 } else { 6290 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 6291 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6292 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6293 } 6294 } 6295 /* we don't need the benign correction at coarser levels anymore */ 6296 if (pcbddc->benign_have_null && isbddc) { 6297 PC coarse_pc; 6298 PC_BDDC *coarsepcbddc; 6299 6300 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6301 coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6302 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 6303 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 6304 } 6305 } 6306 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6307 6308 /* Local solution on R nodes */ 6309 if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 6310 /* communications from coarse sol to local primal nodes */ 6311 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 6312 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 6313 6314 /* Sum contributions from the two levels */ 6315 if (!pcbddc->benign_apply_coarse_only) { 6316 if (applytranspose) { 6317 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6318 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6319 } else { 6320 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6321 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6322 } 6323 /* store p0 */ 6324 if (pcbddc->benign_n) { 6325 PetscScalar *array; 6326 PetscInt j; 6327 6328 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6329 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 6330 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6331 } 6332 } else { /* expand the coarse solution */ 6333 if (applytranspose) { 6334 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 6335 } else { 6336 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 6337 } 6338 } 6339 PetscFunctionReturn(PETSC_SUCCESS); 6340 } 6341 6342 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 6343 { 6344 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6345 Vec from, to; 6346 const PetscScalar *array; 6347 6348 PetscFunctionBegin; 6349 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6350 from = pcbddc->coarse_vec; 6351 to = pcbddc->vec1_P; 6352 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6353 Vec tvec; 6354 6355 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6356 PetscCall(VecResetArray(tvec)); 6357 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 6358 PetscCall(VecGetArrayRead(tvec, &array)); 6359 PetscCall(VecPlaceArray(from, array)); 6360 PetscCall(VecRestoreArrayRead(tvec, &array)); 6361 } 6362 } else { /* from local to global -> put data in coarse right-hand side */ 6363 from = pcbddc->vec1_P; 6364 to = pcbddc->coarse_vec; 6365 } 6366 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6367 PetscFunctionReturn(PETSC_SUCCESS); 6368 } 6369 6370 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6371 { 6372 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6373 Vec from, to; 6374 const PetscScalar *array; 6375 6376 PetscFunctionBegin; 6377 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6378 from = pcbddc->coarse_vec; 6379 to = pcbddc->vec1_P; 6380 } else { /* from local to global -> put data in coarse right-hand side */ 6381 from = pcbddc->vec1_P; 6382 to = pcbddc->coarse_vec; 6383 } 6384 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6385 if (smode == SCATTER_FORWARD) { 6386 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6387 Vec tvec; 6388 6389 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6390 PetscCall(VecGetArrayRead(to, &array)); 6391 PetscCall(VecPlaceArray(tvec, array)); 6392 PetscCall(VecRestoreArrayRead(to, &array)); 6393 } 6394 } else { 6395 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6396 PetscCall(VecResetArray(from)); 6397 } 6398 } 6399 PetscFunctionReturn(PETSC_SUCCESS); 6400 } 6401 6402 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6403 { 6404 PC_IS *pcis = (PC_IS *)pc->data; 6405 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6406 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6407 /* one and zero */ 6408 PetscScalar one = 1.0, zero = 0.0; 6409 /* space to store constraints and their local indices */ 6410 PetscScalar *constraints_data; 6411 PetscInt *constraints_idxs, *constraints_idxs_B; 6412 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 6413 PetscInt *constraints_n; 6414 /* iterators */ 6415 PetscInt i, j, k, total_counts, total_counts_cc, cum; 6416 /* BLAS integers */ 6417 PetscBLASInt lwork, lierr; 6418 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 6419 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 6420 /* reuse */ 6421 PetscInt olocal_primal_size, olocal_primal_size_cc; 6422 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 6423 /* change of basis */ 6424 PetscBool qr_needed; 6425 PetscBT change_basis, qr_needed_idx; 6426 /* auxiliary stuff */ 6427 PetscInt *nnz, *is_indices; 6428 PetscInt ncc; 6429 /* some quantities */ 6430 PetscInt n_vertices, total_primal_vertices, valid_constraints; 6431 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 6432 PetscReal tol; /* tolerance for retaining eigenmodes */ 6433 6434 PetscFunctionBegin; 6435 tol = PetscSqrtReal(PETSC_SMALL); 6436 /* Destroy Mat objects computed previously */ 6437 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6438 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6439 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6440 /* save info on constraints from previous setup (if any) */ 6441 olocal_primal_size = pcbddc->local_primal_size; 6442 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6443 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 6444 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 6445 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 6446 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 6447 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6448 6449 if (!pcbddc->adaptive_selection) { 6450 IS ISForVertices, *ISForFaces, *ISForEdges; 6451 MatNullSpace nearnullsp; 6452 const Vec *nearnullvecs; 6453 Vec *localnearnullsp; 6454 PetscScalar *array; 6455 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 6456 PetscBool nnsp_has_cnst; 6457 /* LAPACK working arrays for SVD or POD */ 6458 PetscBool skip_lapack, boolforchange; 6459 PetscScalar *work; 6460 PetscReal *singular_vals; 6461 #if defined(PETSC_USE_COMPLEX) 6462 PetscReal *rwork; 6463 #endif 6464 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 6465 PetscBLASInt dummy_int = 1; 6466 PetscScalar dummy_scalar = 1.; 6467 PetscBool use_pod = PETSC_FALSE; 6468 6469 /* MKL SVD with same input gives different results on different processes! */ 6470 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6471 use_pod = PETSC_TRUE; 6472 #endif 6473 /* Get index sets for faces, edges and vertices from graph */ 6474 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 6475 o_nf = n_ISForFaces; 6476 o_ne = n_ISForEdges; 6477 n_vertices = 0; 6478 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 6479 /* print some info */ 6480 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6481 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6482 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 6483 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6484 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6485 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 6486 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 6487 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 6488 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6489 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6490 } 6491 6492 if (!pcbddc->use_vertices) n_vertices = 0; 6493 if (!pcbddc->use_edges) n_ISForEdges = 0; 6494 if (!pcbddc->use_faces) n_ISForFaces = 0; 6495 6496 /* check if near null space is attached to global mat */ 6497 if (pcbddc->use_nnsp) { 6498 PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 6499 } else nearnullsp = NULL; 6500 6501 if (nearnullsp) { 6502 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 6503 /* remove any stored info */ 6504 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6505 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6506 /* store information for BDDC solver reuse */ 6507 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6508 pcbddc->onearnullspace = nearnullsp; 6509 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 6510 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 6511 } else { /* if near null space is not provided BDDC uses constants by default */ 6512 nnsp_size = 0; 6513 nnsp_has_cnst = PETSC_TRUE; 6514 } 6515 /* get max number of constraints on a single cc */ 6516 max_constraints = nnsp_size; 6517 if (nnsp_has_cnst) max_constraints++; 6518 6519 /* 6520 Evaluate maximum storage size needed by the procedure 6521 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6522 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6523 There can be multiple constraints per connected component 6524 */ 6525 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 6526 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 6527 6528 total_counts = n_ISForFaces + n_ISForEdges; 6529 total_counts *= max_constraints; 6530 total_counts += n_vertices; 6531 PetscCall(PetscBTCreate(total_counts, &change_basis)); 6532 6533 total_counts = 0; 6534 max_size_of_constraint = 0; 6535 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 6536 IS used_is; 6537 if (i < n_ISForEdges) { 6538 used_is = ISForEdges[i]; 6539 } else { 6540 used_is = ISForFaces[i - n_ISForEdges]; 6541 } 6542 PetscCall(ISGetSize(used_is, &j)); 6543 total_counts += j; 6544 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 6545 } 6546 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 6547 6548 /* get local part of global near null space vectors */ 6549 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 6550 for (k = 0; k < nnsp_size; k++) { 6551 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 6552 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6553 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6554 } 6555 6556 /* whether or not to skip lapack calls */ 6557 skip_lapack = PETSC_TRUE; 6558 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6559 6560 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6561 if (!skip_lapack) { 6562 PetscScalar temp_work; 6563 6564 if (use_pod) { 6565 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6566 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 6567 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 6568 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 6569 #if defined(PETSC_USE_COMPLEX) 6570 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 6571 #endif 6572 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6573 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6574 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 6575 lwork = -1; 6576 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6577 #if !defined(PETSC_USE_COMPLEX) 6578 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 6579 #else 6580 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 6581 #endif 6582 PetscCall(PetscFPTrapPop()); 6583 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr); 6584 } else { 6585 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6586 /* SVD */ 6587 PetscInt max_n, min_n; 6588 max_n = max_size_of_constraint; 6589 min_n = max_constraints; 6590 if (max_size_of_constraint < max_constraints) { 6591 min_n = max_size_of_constraint; 6592 max_n = max_constraints; 6593 } 6594 PetscCall(PetscMalloc1(min_n, &singular_vals)); 6595 #if defined(PETSC_USE_COMPLEX) 6596 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 6597 #endif 6598 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6599 lwork = -1; 6600 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 6601 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 6602 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 6603 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6604 #if !defined(PETSC_USE_COMPLEX) 6605 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6606 #else 6607 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6608 #endif 6609 PetscCall(PetscFPTrapPop()); 6610 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr); 6611 #else 6612 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6613 #endif /* on missing GESVD */ 6614 } 6615 /* Allocate optimal workspace */ 6616 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6617 PetscCall(PetscMalloc1(lwork, &work)); 6618 } 6619 /* Now we can loop on constraining sets */ 6620 total_counts = 0; 6621 constraints_idxs_ptr[0] = 0; 6622 constraints_data_ptr[0] = 0; 6623 /* vertices */ 6624 if (n_vertices) { 6625 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6626 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6627 for (i = 0; i < n_vertices; i++) { 6628 constraints_n[total_counts] = 1; 6629 constraints_data[total_counts] = 1.0; 6630 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6631 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6632 total_counts++; 6633 } 6634 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6635 } 6636 6637 /* edges and faces */ 6638 total_counts_cc = total_counts; 6639 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6640 IS used_is; 6641 PetscBool idxs_copied = PETSC_FALSE; 6642 6643 if (ncc < n_ISForEdges) { 6644 used_is = ISForEdges[ncc]; 6645 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6646 } else { 6647 used_is = ISForFaces[ncc - n_ISForEdges]; 6648 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6649 } 6650 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6651 6652 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6653 if (!size_of_constraint) continue; 6654 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6655 if (nnsp_has_cnst) { 6656 PetscScalar quad_value; 6657 6658 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6659 idxs_copied = PETSC_TRUE; 6660 6661 if (!pcbddc->use_nnsp_true) { 6662 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6663 } else { 6664 quad_value = 1.0; 6665 } 6666 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6667 temp_constraints++; 6668 total_counts++; 6669 } 6670 for (k = 0; k < nnsp_size; k++) { 6671 PetscReal real_value; 6672 PetscScalar *ptr_to_data; 6673 6674 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6675 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6676 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6677 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6678 /* check if array is null on the connected component */ 6679 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6680 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6681 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6682 temp_constraints++; 6683 total_counts++; 6684 if (!idxs_copied) { 6685 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6686 idxs_copied = PETSC_TRUE; 6687 } 6688 } 6689 } 6690 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6691 valid_constraints = temp_constraints; 6692 if (!pcbddc->use_nnsp_true && temp_constraints) { 6693 if (temp_constraints == 1) { /* just normalize the constraint */ 6694 PetscScalar norm, *ptr_to_data; 6695 6696 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6697 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6698 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6699 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6700 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6701 } else { /* perform SVD */ 6702 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6703 6704 if (use_pod) { 6705 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6706 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6707 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6708 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6709 from that computed using LAPACKgesvd 6710 -> This is due to a different computation of eigenvectors in LAPACKheev 6711 -> The quality of the POD-computed basis will be the same */ 6712 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6713 /* Store upper triangular part of correlation matrix */ 6714 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6715 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6716 for (j = 0; j < temp_constraints; j++) { 6717 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6718 } 6719 /* compute eigenvalues and eigenvectors of correlation matrix */ 6720 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6721 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6722 #if !defined(PETSC_USE_COMPLEX) 6723 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6724 #else 6725 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6726 #endif 6727 PetscCall(PetscFPTrapPop()); 6728 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr); 6729 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6730 j = 0; 6731 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6732 total_counts = total_counts - j; 6733 valid_constraints = temp_constraints - j; 6734 /* scale and copy POD basis into used quadrature memory */ 6735 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6736 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6737 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6738 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6739 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6740 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6741 if (j < temp_constraints) { 6742 PetscInt ii; 6743 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6744 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6745 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6746 PetscCall(PetscFPTrapPop()); 6747 for (k = 0; k < temp_constraints - j; k++) { 6748 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6749 } 6750 } 6751 } else { 6752 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6753 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6754 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6755 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6756 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6757 #if !defined(PETSC_USE_COMPLEX) 6758 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6759 #else 6760 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6761 #endif 6762 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 6763 PetscCall(PetscFPTrapPop()); 6764 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6765 k = temp_constraints; 6766 if (k > size_of_constraint) k = size_of_constraint; 6767 j = 0; 6768 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6769 valid_constraints = k - j; 6770 total_counts = total_counts - temp_constraints + valid_constraints; 6771 #else 6772 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6773 #endif /* on missing GESVD */ 6774 } 6775 } 6776 } 6777 /* update pointers information */ 6778 if (valid_constraints) { 6779 constraints_n[total_counts_cc] = valid_constraints; 6780 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6781 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6782 /* set change_of_basis flag */ 6783 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6784 total_counts_cc++; 6785 } 6786 } 6787 /* free workspace */ 6788 if (!skip_lapack) { 6789 PetscCall(PetscFree(work)); 6790 #if defined(PETSC_USE_COMPLEX) 6791 PetscCall(PetscFree(rwork)); 6792 #endif 6793 PetscCall(PetscFree(singular_vals)); 6794 PetscCall(PetscFree(correlation_mat)); 6795 PetscCall(PetscFree(temp_basis)); 6796 } 6797 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6798 PetscCall(PetscFree(localnearnullsp)); 6799 /* free index sets of faces, edges and vertices */ 6800 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6801 } else { 6802 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6803 6804 total_counts = 0; 6805 n_vertices = 0; 6806 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6807 max_constraints = 0; 6808 total_counts_cc = 0; 6809 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6810 total_counts += pcbddc->adaptive_constraints_n[i]; 6811 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6812 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6813 } 6814 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6815 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6816 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6817 constraints_data = pcbddc->adaptive_constraints_data; 6818 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6819 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6820 total_counts_cc = 0; 6821 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6822 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6823 } 6824 6825 max_size_of_constraint = 0; 6826 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6827 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6828 /* Change of basis */ 6829 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6830 if (pcbddc->use_change_of_basis) { 6831 for (i = 0; i < sub_schurs->n_subs; i++) { 6832 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6833 } 6834 } 6835 } 6836 pcbddc->local_primal_size = total_counts; 6837 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6838 6839 /* map constraints_idxs in boundary numbering */ 6840 if (pcbddc->use_change_of_basis) { 6841 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6842 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6843 } 6844 6845 /* Create constraint matrix */ 6846 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6847 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6848 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6849 6850 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6851 /* determine if a QR strategy is needed for change of basis */ 6852 qr_needed = pcbddc->use_qr_single; 6853 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6854 total_primal_vertices = 0; 6855 pcbddc->local_primal_size_cc = 0; 6856 for (i = 0; i < total_counts_cc; i++) { 6857 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6858 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6859 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6860 pcbddc->local_primal_size_cc += 1; 6861 } else if (PetscBTLookup(change_basis, i)) { 6862 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6863 pcbddc->local_primal_size_cc += constraints_n[i]; 6864 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6865 PetscCall(PetscBTSet(qr_needed_idx, i)); 6866 qr_needed = PETSC_TRUE; 6867 } 6868 } else { 6869 pcbddc->local_primal_size_cc += 1; 6870 } 6871 } 6872 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6873 pcbddc->n_vertices = total_primal_vertices; 6874 /* permute indices in order to have a sorted set of vertices */ 6875 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 6876 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 6877 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 6878 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 6879 6880 /* nonzero structure of constraint matrix */ 6881 /* and get reference dof for local constraints */ 6882 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 6883 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 6884 6885 j = total_primal_vertices; 6886 total_counts = total_primal_vertices; 6887 cum = total_primal_vertices; 6888 for (i = n_vertices; i < total_counts_cc; i++) { 6889 if (!PetscBTLookup(change_basis, i)) { 6890 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6891 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6892 cum++; 6893 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6894 for (k = 0; k < constraints_n[i]; k++) { 6895 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6896 nnz[j + k] = size_of_constraint; 6897 } 6898 j += constraints_n[i]; 6899 } 6900 } 6901 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 6902 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6903 PetscCall(PetscFree(nnz)); 6904 6905 /* set values in constraint matrix */ 6906 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 6907 total_counts = total_primal_vertices; 6908 for (i = n_vertices; i < total_counts_cc; i++) { 6909 if (!PetscBTLookup(change_basis, i)) { 6910 PetscInt *cols; 6911 6912 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6913 cols = constraints_idxs + constraints_idxs_ptr[i]; 6914 for (k = 0; k < constraints_n[i]; k++) { 6915 PetscInt row = total_counts + k; 6916 PetscScalar *vals; 6917 6918 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 6919 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 6920 } 6921 total_counts += constraints_n[i]; 6922 } 6923 } 6924 /* assembling */ 6925 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6926 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6927 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 6928 6929 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6930 if (pcbddc->use_change_of_basis) { 6931 /* dual and primal dofs on a single cc */ 6932 PetscInt dual_dofs, primal_dofs; 6933 /* working stuff for GEQRF */ 6934 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 6935 PetscBLASInt lqr_work; 6936 /* working stuff for UNGQR */ 6937 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 6938 PetscBLASInt lgqr_work; 6939 /* working stuff for TRTRS */ 6940 PetscScalar *trs_rhs = NULL; 6941 PetscBLASInt Blas_NRHS; 6942 /* pointers for values insertion into change of basis matrix */ 6943 PetscInt *start_rows, *start_cols; 6944 PetscScalar *start_vals; 6945 /* working stuff for values insertion */ 6946 PetscBT is_primal; 6947 PetscInt *aux_primal_numbering_B; 6948 /* matrix sizes */ 6949 PetscInt global_size, local_size; 6950 /* temporary change of basis */ 6951 Mat localChangeOfBasisMatrix; 6952 /* extra space for debugging */ 6953 PetscScalar *dbg_work = NULL; 6954 6955 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 6956 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 6957 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 6958 /* nonzeros for local mat */ 6959 PetscCall(PetscMalloc1(pcis->n, &nnz)); 6960 if (!pcbddc->benign_change || pcbddc->fake_change) { 6961 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 6962 } else { 6963 const PetscInt *ii; 6964 PetscInt n; 6965 PetscBool flg_row; 6966 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6967 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 6968 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6969 } 6970 for (i = n_vertices; i < total_counts_cc; i++) { 6971 if (PetscBTLookup(change_basis, i)) { 6972 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6973 if (PetscBTLookup(qr_needed_idx, i)) { 6974 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 6975 } else { 6976 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6977 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 6978 } 6979 } 6980 } 6981 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 6982 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6983 PetscCall(PetscFree(nnz)); 6984 /* Set interior change in the matrix */ 6985 if (!pcbddc->benign_change || pcbddc->fake_change) { 6986 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 6987 } else { 6988 const PetscInt *ii, *jj; 6989 PetscScalar *aa; 6990 PetscInt n; 6991 PetscBool flg_row; 6992 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6993 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 6994 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 6995 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 6996 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6997 } 6998 6999 if (pcbddc->dbg_flag) { 7000 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 7001 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 7002 } 7003 7004 /* Now we loop on the constraints which need a change of basis */ 7005 /* 7006 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 7007 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 7008 7009 Basic blocks of change of basis matrix T computed: 7010 7011 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 7012 7013 | 1 0 ... 0 s_1/S | 7014 | 0 1 ... 0 s_2/S | 7015 | ... | 7016 | 0 ... 1 s_{n-1}/S | 7017 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 7018 7019 with S = \sum_{i=1}^n s_i^2 7020 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 7021 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 7022 7023 - QR decomposition of constraints otherwise 7024 */ 7025 if (qr_needed && max_size_of_constraint) { 7026 /* space to store Q */ 7027 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 7028 /* array to store scaling factors for reflectors */ 7029 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 7030 /* first we issue queries for optimal work */ 7031 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7032 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 7033 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7034 lqr_work = -1; 7035 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 7036 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr); 7037 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 7038 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 7039 lgqr_work = -1; 7040 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7041 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 7042 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 7043 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7044 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 7045 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 7046 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr); 7047 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 7048 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 7049 /* array to store rhs and solution of triangular solver */ 7050 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 7051 /* allocating workspace for check */ 7052 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 7053 } 7054 /* array to store whether a node is primal or not */ 7055 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 7056 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 7057 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 7058 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 7059 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 7060 PetscCall(PetscFree(aux_primal_numbering_B)); 7061 7062 /* loop on constraints and see whether or not they need a change of basis and compute it */ 7063 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 7064 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 7065 if (PetscBTLookup(change_basis, total_counts)) { 7066 /* get constraint info */ 7067 primal_dofs = constraints_n[total_counts]; 7068 dual_dofs = size_of_constraint - primal_dofs; 7069 7070 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 7071 7072 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 7073 7074 /* copy quadrature constraints for change of basis check */ 7075 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7076 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 7077 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7078 7079 /* compute QR decomposition of constraints */ 7080 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7081 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7082 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7083 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7084 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 7085 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr); 7086 PetscCall(PetscFPTrapPop()); 7087 7088 /* explicitly compute R^-T */ 7089 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 7090 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 7091 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7092 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 7093 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7094 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7095 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7096 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 7097 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr); 7098 PetscCall(PetscFPTrapPop()); 7099 7100 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 7101 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7102 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7103 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7104 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7105 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7106 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 7107 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr); 7108 PetscCall(PetscFPTrapPop()); 7109 7110 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 7111 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 7112 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 7113 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7114 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7115 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7116 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7117 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7118 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 7119 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7120 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 7121 PetscCall(PetscFPTrapPop()); 7122 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7123 7124 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 7125 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 7126 /* insert cols for primal dofs */ 7127 for (j = 0; j < primal_dofs; j++) { 7128 start_vals = &qr_basis[j * size_of_constraint]; 7129 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7130 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7131 } 7132 /* insert cols for dual dofs */ 7133 for (j = 0, k = 0; j < dual_dofs; k++) { 7134 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 7135 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 7136 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7137 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7138 j++; 7139 } 7140 } 7141 7142 /* check change of basis */ 7143 if (pcbddc->dbg_flag) { 7144 PetscInt ii, jj; 7145 PetscBool valid_qr = PETSC_TRUE; 7146 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 7147 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7148 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 7149 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7150 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 7151 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 7152 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7153 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 7154 PetscCall(PetscFPTrapPop()); 7155 for (jj = 0; jj < size_of_constraint; jj++) { 7156 for (ii = 0; ii < primal_dofs; ii++) { 7157 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 7158 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 7159 } 7160 } 7161 if (!valid_qr) { 7162 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 7163 for (jj = 0; jj < size_of_constraint; jj++) { 7164 for (ii = 0; ii < primal_dofs; ii++) { 7165 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 7166 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7167 } 7168 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 7169 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7170 } 7171 } 7172 } 7173 } else { 7174 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 7175 } 7176 } 7177 } else { /* simple transformation block */ 7178 PetscInt row, col; 7179 PetscScalar val, norm; 7180 7181 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7182 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 7183 for (j = 0; j < size_of_constraint; j++) { 7184 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 7185 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7186 if (!PetscBTLookup(is_primal, row_B)) { 7187 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 7188 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 7189 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 7190 } else { 7191 for (k = 0; k < size_of_constraint; k++) { 7192 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7193 if (row != col) { 7194 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 7195 } else { 7196 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 7197 } 7198 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 7199 } 7200 } 7201 } 7202 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 7203 } 7204 } else { 7205 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 7206 } 7207 } 7208 7209 /* free workspace */ 7210 if (qr_needed) { 7211 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 7212 PetscCall(PetscFree(trs_rhs)); 7213 PetscCall(PetscFree(qr_tau)); 7214 PetscCall(PetscFree(qr_work)); 7215 PetscCall(PetscFree(gqr_work)); 7216 PetscCall(PetscFree(qr_basis)); 7217 } 7218 PetscCall(PetscBTDestroy(&is_primal)); 7219 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7220 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7221 7222 /* assembling of global change of variable */ 7223 if (!pcbddc->fake_change) { 7224 Mat tmat; 7225 PetscInt bs; 7226 7227 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 7228 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 7229 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 7230 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 7231 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 7232 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 7233 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix)); 7234 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ)); 7235 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 7236 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs)); 7237 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size)); 7238 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE)); 7239 PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 7240 PetscCall(MatDestroy(&tmat)); 7241 PetscCall(VecSet(pcis->vec1_global, 0.0)); 7242 PetscCall(VecSet(pcis->vec1_N, 1.0)); 7243 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7244 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7245 PetscCall(VecReciprocal(pcis->vec1_global)); 7246 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 7247 7248 /* check */ 7249 if (pcbddc->dbg_flag) { 7250 PetscReal error; 7251 Vec x, x_change; 7252 7253 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 7254 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 7255 PetscCall(VecSetRandom(x, NULL)); 7256 PetscCall(VecCopy(x, pcis->vec1_global)); 7257 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7258 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7259 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 7260 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7261 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7262 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 7263 PetscCall(VecAXPY(x, -1.0, x_change)); 7264 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 7265 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 7266 PetscCall(VecDestroy(&x)); 7267 PetscCall(VecDestroy(&x_change)); 7268 } 7269 /* adapt sub_schurs computed (if any) */ 7270 if (pcbddc->use_deluxe_scaling) { 7271 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 7272 7273 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 7274 if (sub_schurs && sub_schurs->S_Ej_all) { 7275 Mat S_new, tmat; 7276 IS is_all_N, is_V_Sall = NULL; 7277 7278 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 7279 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 7280 if (pcbddc->deluxe_zerorows) { 7281 ISLocalToGlobalMapping NtoSall; 7282 IS is_V; 7283 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 7284 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 7285 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 7286 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 7287 PetscCall(ISDestroy(&is_V)); 7288 } 7289 PetscCall(ISDestroy(&is_all_N)); 7290 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7291 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 7292 PetscCall(PetscObjectReference((PetscObject)S_new)); 7293 if (pcbddc->deluxe_zerorows) { 7294 const PetscScalar *array; 7295 const PetscInt *idxs_V, *idxs_all; 7296 PetscInt i, n_V; 7297 7298 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7299 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 7300 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 7301 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 7302 PetscCall(VecGetArrayRead(pcis->D, &array)); 7303 for (i = 0; i < n_V; i++) { 7304 PetscScalar val; 7305 PetscInt idx; 7306 7307 idx = idxs_V[i]; 7308 val = array[idxs_all[idxs_V[i]]]; 7309 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 7310 } 7311 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 7312 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 7313 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 7314 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 7315 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 7316 } 7317 sub_schurs->S_Ej_all = S_new; 7318 PetscCall(MatDestroy(&S_new)); 7319 if (sub_schurs->sum_S_Ej_all) { 7320 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7321 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7322 PetscCall(PetscObjectReference((PetscObject)S_new)); 7323 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7324 sub_schurs->sum_S_Ej_all = S_new; 7325 PetscCall(MatDestroy(&S_new)); 7326 } 7327 PetscCall(ISDestroy(&is_V_Sall)); 7328 PetscCall(MatDestroy(&tmat)); 7329 } 7330 /* destroy any change of basis context in sub_schurs */ 7331 if (sub_schurs && sub_schurs->change) { 7332 PetscInt i; 7333 7334 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 7335 PetscCall(PetscFree(sub_schurs->change)); 7336 } 7337 } 7338 if (pcbddc->switch_static) { /* need to save the local change */ 7339 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7340 } else { 7341 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7342 } 7343 /* determine if any process has changed the pressures locally */ 7344 pcbddc->change_interior = pcbddc->benign_have_null; 7345 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7346 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7347 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7348 pcbddc->use_qr_single = qr_needed; 7349 } 7350 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7351 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7352 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7353 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7354 } else { 7355 Mat benign_global = NULL; 7356 if (pcbddc->benign_have_null) { 7357 Mat M; 7358 7359 pcbddc->change_interior = PETSC_TRUE; 7360 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 7361 PetscCall(VecReciprocal(pcis->vec1_N)); 7362 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 7363 if (pcbddc->benign_change) { 7364 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 7365 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 7366 } else { 7367 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 7368 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 7369 } 7370 PetscCall(MatISSetLocalMat(benign_global, M)); 7371 PetscCall(MatDestroy(&M)); 7372 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 7373 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 7374 } 7375 if (pcbddc->user_ChangeOfBasisMatrix) { 7376 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix)); 7377 PetscCall(MatDestroy(&benign_global)); 7378 } else if (pcbddc->benign_have_null) { 7379 pcbddc->ChangeOfBasisMatrix = benign_global; 7380 } 7381 } 7382 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7383 IS is_global; 7384 const PetscInt *gidxs; 7385 7386 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 7387 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 7388 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 7389 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 7390 PetscCall(ISDestroy(&is_global)); 7391 } 7392 } 7393 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 7394 7395 if (!pcbddc->fake_change) { 7396 /* add pressure dofs to set of primal nodes for numbering purposes */ 7397 for (i = 0; i < pcbddc->benign_n; i++) { 7398 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7399 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7400 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7401 pcbddc->local_primal_size_cc++; 7402 pcbddc->local_primal_size++; 7403 } 7404 7405 /* check if a new primal space has been introduced (also take into account benign trick) */ 7406 pcbddc->new_primal_space_local = PETSC_TRUE; 7407 if (olocal_primal_size == pcbddc->local_primal_size) { 7408 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7409 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7410 if (!pcbddc->new_primal_space_local) { 7411 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7412 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7413 } 7414 } 7415 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7416 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7417 } 7418 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 7419 7420 /* flush dbg viewer */ 7421 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7422 7423 /* free workspace */ 7424 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7425 PetscCall(PetscBTDestroy(&change_basis)); 7426 if (!pcbddc->adaptive_selection) { 7427 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 7428 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 7429 } else { 7430 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 7431 PetscCall(PetscFree(constraints_n)); 7432 PetscCall(PetscFree(constraints_idxs_B)); 7433 } 7434 PetscFunctionReturn(PETSC_SUCCESS); 7435 } 7436 7437 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7438 { 7439 ISLocalToGlobalMapping map; 7440 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7441 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 7442 PetscInt i, N; 7443 PetscBool rcsr = PETSC_FALSE; 7444 7445 PetscFunctionBegin; 7446 if (pcbddc->recompute_topography) { 7447 pcbddc->graphanalyzed = PETSC_FALSE; 7448 /* Reset previously computed graph */ 7449 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7450 /* Init local Graph struct */ 7451 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 7452 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 7453 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 7454 7455 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 7456 /* Check validity of the csr graph passed in by the user */ 7457 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 7458 pcbddc->mat_graph->nvtxs); 7459 7460 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7461 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7462 PetscInt *xadj, *adjncy; 7463 PetscInt nvtxs; 7464 PetscBool flg_row; 7465 Mat A; 7466 7467 PetscCall(PetscObjectReference((PetscObject)matis->A)); 7468 A = matis->A; 7469 for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) { 7470 Mat AtA; 7471 7472 PetscCall(MatProductCreate(A, A, NULL, &AtA)); 7473 PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_")); 7474 PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB)); 7475 PetscCall(MatProductSetFromOptions(AtA)); 7476 PetscCall(MatProductSymbolic(AtA)); 7477 PetscCall(MatProductClear(AtA)); 7478 /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */ 7479 AtA->assembled = PETSC_TRUE; 7480 PetscCall(MatDestroy(&A)); 7481 A = AtA; 7482 } 7483 PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7484 if (flg_row) { 7485 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 7486 pcbddc->computed_rowadj = PETSC_TRUE; 7487 PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7488 rcsr = PETSC_TRUE; 7489 } 7490 PetscCall(MatDestroy(&A)); 7491 } 7492 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7493 7494 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7495 PetscReal *lcoords; 7496 PetscInt n; 7497 MPI_Datatype dimrealtype; 7498 7499 /* TODO: support for blocked */ 7500 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 7501 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7502 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 7503 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype)); 7504 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7505 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7506 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7507 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7508 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7509 7510 pcbddc->mat_graph->coords = lcoords; 7511 pcbddc->mat_graph->cloc = PETSC_TRUE; 7512 pcbddc->mat_graph->cnloc = n; 7513 } 7514 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 7515 pcbddc->mat_graph->nvtxs); 7516 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7517 7518 /* attach info on disconnected subdomains if present */ 7519 if (pcbddc->n_local_subs) { 7520 PetscInt *local_subs, n, totn; 7521 7522 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7523 PetscCall(PetscMalloc1(n, &local_subs)); 7524 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 7525 for (i = 0; i < pcbddc->n_local_subs; i++) { 7526 const PetscInt *idxs; 7527 PetscInt nl, j; 7528 7529 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 7530 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 7531 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 7532 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 7533 } 7534 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 7535 pcbddc->mat_graph->n_local_subs = totn + 1; 7536 pcbddc->mat_graph->local_subs = local_subs; 7537 } 7538 7539 /* Setup of Graph */ 7540 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 7541 } 7542 7543 if (!pcbddc->graphanalyzed) { 7544 /* Graph's connected components analysis */ 7545 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7546 pcbddc->graphanalyzed = PETSC_TRUE; 7547 pcbddc->corner_selected = pcbddc->corner_selection; 7548 } 7549 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7550 PetscFunctionReturn(PETSC_SUCCESS); 7551 } 7552 7553 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7554 { 7555 PetscInt i, j, n; 7556 PetscScalar *alphas; 7557 PetscReal norm, *onorms; 7558 7559 PetscFunctionBegin; 7560 n = *nio; 7561 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 7562 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 7563 PetscCall(VecNormalize(vecs[0], &norm)); 7564 if (norm < PETSC_SMALL) { 7565 onorms[0] = 0.0; 7566 PetscCall(VecSet(vecs[0], 0.0)); 7567 } else { 7568 onorms[0] = norm; 7569 } 7570 7571 for (i = 1; i < n; i++) { 7572 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 7573 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 7574 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 7575 PetscCall(VecNormalize(vecs[i], &norm)); 7576 if (norm < PETSC_SMALL) { 7577 onorms[i] = 0.0; 7578 PetscCall(VecSet(vecs[i], 0.0)); 7579 } else { 7580 onorms[i] = norm; 7581 } 7582 } 7583 /* push nonzero vectors at the beginning */ 7584 for (i = 0; i < n; i++) { 7585 if (onorms[i] == 0.0) { 7586 for (j = i + 1; j < n; j++) { 7587 if (onorms[j] != 0.0) { 7588 PetscCall(VecCopy(vecs[j], vecs[i])); 7589 onorms[j] = 0.0; 7590 } 7591 } 7592 } 7593 } 7594 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7595 PetscCall(PetscFree2(alphas, onorms)); 7596 PetscFunctionReturn(PETSC_SUCCESS); 7597 } 7598 7599 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 7600 { 7601 ISLocalToGlobalMapping mapping; 7602 Mat A; 7603 PetscInt n_neighs, *neighs, *n_shared, **shared; 7604 PetscMPIInt size, rank, color; 7605 PetscInt *xadj, *adjncy; 7606 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 7607 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 7608 PetscInt void_procs, *procs_candidates = NULL; 7609 PetscInt xadj_count, *count; 7610 PetscBool ismatis, use_vwgt = PETSC_FALSE; 7611 PetscSubcomm psubcomm; 7612 MPI_Comm subcomm; 7613 7614 PetscFunctionBegin; 7615 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7616 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7617 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7618 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 7619 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 7620 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7621 7622 if (have_void) *have_void = PETSC_FALSE; 7623 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7624 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7625 PetscCall(MatISGetLocalMat(mat, &A)); 7626 PetscCall(MatGetLocalSize(A, &n, NULL)); 7627 im_active = !!n; 7628 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7629 void_procs = size - active_procs; 7630 /* get ranks of non-active processes in mat communicator */ 7631 if (void_procs) { 7632 PetscInt ncand; 7633 7634 if (have_void) *have_void = PETSC_TRUE; 7635 PetscCall(PetscMalloc1(size, &procs_candidates)); 7636 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7637 for (i = 0, ncand = 0; i < size; i++) { 7638 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7639 } 7640 /* force n_subdomains to be not greater that the number of non-active processes */ 7641 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7642 } 7643 7644 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7645 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7646 PetscCall(MatGetSize(mat, &N, NULL)); 7647 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7648 PetscInt issize, isidx, dest; 7649 if (*n_subdomains == 1) dest = 0; 7650 else dest = rank; 7651 if (im_active) { 7652 issize = 1; 7653 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7654 isidx = procs_candidates[dest]; 7655 } else { 7656 isidx = dest; 7657 } 7658 } else { 7659 issize = 0; 7660 isidx = -1; 7661 } 7662 if (*n_subdomains != 1) *n_subdomains = active_procs; 7663 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7664 PetscCall(PetscFree(procs_candidates)); 7665 PetscFunctionReturn(PETSC_SUCCESS); 7666 } 7667 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL)); 7668 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL)); 7669 threshold = PetscMax(threshold, 2); 7670 7671 /* Get info on mapping */ 7672 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7673 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7674 7675 /* build local CSR graph of subdomains' connectivity */ 7676 PetscCall(PetscMalloc1(2, &xadj)); 7677 xadj[0] = 0; 7678 xadj[1] = PetscMax(n_neighs - 1, 0); 7679 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7680 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7681 PetscCall(PetscCalloc1(n, &count)); 7682 for (i = 1; i < n_neighs; i++) 7683 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7684 7685 xadj_count = 0; 7686 for (i = 1; i < n_neighs; i++) { 7687 for (j = 0; j < n_shared[i]; j++) { 7688 if (count[shared[i][j]] < threshold) { 7689 adjncy[xadj_count] = neighs[i]; 7690 adjncy_wgt[xadj_count] = n_shared[i]; 7691 xadj_count++; 7692 break; 7693 } 7694 } 7695 } 7696 xadj[1] = xadj_count; 7697 PetscCall(PetscFree(count)); 7698 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7699 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7700 7701 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7702 7703 /* Restrict work on active processes only */ 7704 PetscCall(PetscMPIIntCast(im_active, &color)); 7705 if (void_procs) { 7706 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7707 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7708 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7709 subcomm = PetscSubcommChild(psubcomm); 7710 } else { 7711 psubcomm = NULL; 7712 subcomm = PetscObjectComm((PetscObject)mat); 7713 } 7714 7715 v_wgt = NULL; 7716 if (!color) { 7717 PetscCall(PetscFree(xadj)); 7718 PetscCall(PetscFree(adjncy)); 7719 PetscCall(PetscFree(adjncy_wgt)); 7720 } else { 7721 Mat subdomain_adj; 7722 IS new_ranks, new_ranks_contig; 7723 MatPartitioning partitioner; 7724 PetscInt rstart = 0, rend = 0; 7725 PetscInt *is_indices, *oldranks; 7726 PetscMPIInt size; 7727 PetscBool aggregate; 7728 7729 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7730 if (void_procs) { 7731 PetscInt prank = rank; 7732 PetscCall(PetscMalloc1(size, &oldranks)); 7733 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7734 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7735 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7736 } else { 7737 oldranks = NULL; 7738 } 7739 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7740 if (aggregate) { /* TODO: all this part could be made more efficient */ 7741 PetscInt lrows, row, ncols, *cols; 7742 PetscMPIInt nrank; 7743 PetscScalar *vals; 7744 7745 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7746 lrows = 0; 7747 if (nrank < redprocs) { 7748 lrows = size / redprocs; 7749 if (nrank < size % redprocs) lrows++; 7750 } 7751 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7752 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7753 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7754 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7755 row = nrank; 7756 ncols = xadj[1] - xadj[0]; 7757 cols = adjncy; 7758 PetscCall(PetscMalloc1(ncols, &vals)); 7759 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7760 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7761 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7762 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7763 PetscCall(PetscFree(xadj)); 7764 PetscCall(PetscFree(adjncy)); 7765 PetscCall(PetscFree(adjncy_wgt)); 7766 PetscCall(PetscFree(vals)); 7767 if (use_vwgt) { 7768 Vec v; 7769 const PetscScalar *array; 7770 PetscInt nl; 7771 7772 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7773 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7774 PetscCall(VecAssemblyBegin(v)); 7775 PetscCall(VecAssemblyEnd(v)); 7776 PetscCall(VecGetLocalSize(v, &nl)); 7777 PetscCall(VecGetArrayRead(v, &array)); 7778 PetscCall(PetscMalloc1(nl, &v_wgt)); 7779 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7780 PetscCall(VecRestoreArrayRead(v, &array)); 7781 PetscCall(VecDestroy(&v)); 7782 } 7783 } else { 7784 PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7785 if (use_vwgt) { 7786 PetscCall(PetscMalloc1(1, &v_wgt)); 7787 v_wgt[0] = n; 7788 } 7789 } 7790 /* PetscCall(MatView(subdomain_adj,0)); */ 7791 7792 /* Partition */ 7793 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7794 #if defined(PETSC_HAVE_PTSCOTCH) 7795 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7796 #elif defined(PETSC_HAVE_PARMETIS) 7797 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7798 #else 7799 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7800 #endif 7801 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7802 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7803 *n_subdomains = PetscMin((PetscInt)size, *n_subdomains); 7804 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7805 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7806 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7807 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7808 7809 /* renumber new_ranks to avoid "holes" in new set of processors */ 7810 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7811 PetscCall(ISDestroy(&new_ranks)); 7812 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7813 if (!aggregate) { 7814 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7815 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7816 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7817 } else if (oldranks) { 7818 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7819 } else { 7820 ranks_send_to_idx[0] = is_indices[0]; 7821 } 7822 } else { 7823 PetscInt idx = 0; 7824 PetscMPIInt tag; 7825 MPI_Request *reqs; 7826 7827 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7828 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7829 for (i = rstart; i < rend; i++) PetscCallMPI(MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7830 PetscCallMPI(MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7831 PetscCallMPI(MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE)); 7832 PetscCall(PetscFree(reqs)); 7833 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7834 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7835 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7836 } else if (oldranks) { 7837 ranks_send_to_idx[0] = oldranks[idx]; 7838 } else { 7839 ranks_send_to_idx[0] = idx; 7840 } 7841 } 7842 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7843 /* clean up */ 7844 PetscCall(PetscFree(oldranks)); 7845 PetscCall(ISDestroy(&new_ranks_contig)); 7846 PetscCall(MatDestroy(&subdomain_adj)); 7847 PetscCall(MatPartitioningDestroy(&partitioner)); 7848 } 7849 PetscCall(PetscSubcommDestroy(&psubcomm)); 7850 PetscCall(PetscFree(procs_candidates)); 7851 7852 /* assemble parallel IS for sends */ 7853 i = 1; 7854 if (!color) i = 0; 7855 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7856 PetscFunctionReturn(PETSC_SUCCESS); 7857 } 7858 7859 typedef enum { 7860 MATDENSE_PRIVATE = 0, 7861 MATAIJ_PRIVATE, 7862 MATBAIJ_PRIVATE, 7863 MATSBAIJ_PRIVATE 7864 } MatTypePrivate; 7865 7866 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7867 { 7868 Mat local_mat; 7869 IS is_sends_internal; 7870 PetscInt rows, cols, new_local_rows; 7871 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 7872 PetscBool ismatis, isdense, newisdense, destroy_mat; 7873 ISLocalToGlobalMapping l2gmap; 7874 PetscInt *l2gmap_indices; 7875 const PetscInt *is_indices; 7876 MatType new_local_type; 7877 /* buffers */ 7878 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 7879 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 7880 PetscInt *recv_buffer_idxs_local; 7881 PetscScalar *ptr_vals, *recv_buffer_vals; 7882 const PetscScalar *send_buffer_vals; 7883 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 7884 /* MPI */ 7885 MPI_Comm comm, comm_n; 7886 PetscSubcomm subcomm; 7887 PetscMPIInt n_sends, n_recvs, size; 7888 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 7889 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 7890 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 7891 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 7892 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 7893 7894 PetscFunctionBegin; 7895 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7896 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7897 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7898 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 7899 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 7900 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 7901 PetscValidLogicalCollectiveBool(mat, reuse, 6); 7902 PetscValidLogicalCollectiveInt(mat, nis, 8); 7903 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 7904 if (nvecs) { 7905 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 7906 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 7907 } 7908 /* further checks */ 7909 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7910 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7911 /* XXX hack for multi_element */ 7912 if (!isdense) PetscCall(MatConvert(local_mat, MATDENSE, MAT_INPLACE_MATRIX, &local_mat)); 7913 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7914 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7915 7916 PetscCall(MatGetSize(local_mat, &rows, &cols)); 7917 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 7918 if (reuse && *mat_n) { 7919 PetscInt mrows, mcols, mnrows, mncols; 7920 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 7921 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 7922 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 7923 PetscCall(MatGetSize(mat, &mrows, &mcols)); 7924 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 7925 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 7926 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 7927 } 7928 PetscCall(MatGetBlockSize(local_mat, &bs)); 7929 PetscValidLogicalCollectiveInt(mat, bs, 1); 7930 7931 /* prepare IS for sending if not provided */ 7932 if (!is_sends) { 7933 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 7934 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 7935 } else { 7936 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7937 is_sends_internal = is_sends; 7938 } 7939 7940 /* get comm */ 7941 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 7942 7943 /* compute number of sends */ 7944 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 7945 PetscCall(PetscMPIIntCast(i, &n_sends)); 7946 7947 /* compute number of receives */ 7948 PetscCallMPI(MPI_Comm_size(comm, &size)); 7949 PetscCall(PetscMalloc1(size, &iflags)); 7950 PetscCall(PetscArrayzero(iflags, size)); 7951 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 7952 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 7953 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 7954 PetscCall(PetscFree(iflags)); 7955 7956 /* restrict comm if requested */ 7957 subcomm = NULL; 7958 destroy_mat = PETSC_FALSE; 7959 if (restrict_comm) { 7960 PetscMPIInt color, subcommsize; 7961 7962 color = 0; 7963 if (restrict_full) { 7964 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7965 } else { 7966 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7967 } 7968 PetscCall(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 7969 subcommsize = size - subcommsize; 7970 /* check if reuse has been requested */ 7971 if (reuse) { 7972 if (*mat_n) { 7973 PetscMPIInt subcommsize2; 7974 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 7975 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 7976 comm_n = PetscObjectComm((PetscObject)*mat_n); 7977 } else { 7978 comm_n = PETSC_COMM_SELF; 7979 } 7980 } else { /* MAT_INITIAL_MATRIX */ 7981 PetscMPIInt rank; 7982 7983 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7984 PetscCall(PetscSubcommCreate(comm, &subcomm)); 7985 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 7986 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 7987 comm_n = PetscSubcommChild(subcomm); 7988 } 7989 /* flag to destroy *mat_n if not significative */ 7990 if (color) destroy_mat = PETSC_TRUE; 7991 } else { 7992 comm_n = comm; 7993 } 7994 7995 /* prepare send/receive buffers */ 7996 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 7997 PetscCall(PetscArrayzero(ilengths_idxs, size)); 7998 PetscCall(PetscMalloc1(size, &ilengths_vals)); 7999 PetscCall(PetscArrayzero(ilengths_vals, size)); 8000 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 8001 8002 /* Get data from local matrices */ 8003 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 8004 /* TODO: See below some guidelines on how to prepare the local buffers */ 8005 /* 8006 send_buffer_vals should contain the raw values of the local matrix 8007 send_buffer_idxs should contain: 8008 - MatType_PRIVATE type 8009 - PetscInt size_of_l2gmap 8010 - PetscInt global_row_indices[size_of_l2gmap] 8011 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 8012 */ 8013 { 8014 ISLocalToGlobalMapping mapping; 8015 8016 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 8017 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 8018 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 8019 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 8020 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 8021 send_buffer_idxs[1] = i; 8022 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 8023 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 8024 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 8025 PetscCall(PetscMPIIntCast(i, &len)); 8026 for (i = 0; i < n_sends; i++) { 8027 ilengths_vals[is_indices[i]] = len * len; 8028 ilengths_idxs[is_indices[i]] = len + 2; 8029 } 8030 } 8031 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 8032 /* additional is (if any) */ 8033 if (nis) { 8034 PetscMPIInt psum; 8035 PetscInt j; 8036 for (j = 0, psum = 0; j < nis; j++) { 8037 PetscInt plen; 8038 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8039 PetscCall(PetscMPIIntCast(plen, &len)); 8040 psum += len + 1; /* indices + length */ 8041 } 8042 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 8043 for (j = 0, psum = 0; j < nis; j++) { 8044 PetscInt plen; 8045 const PetscInt *is_array_idxs; 8046 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8047 send_buffer_idxs_is[psum] = plen; 8048 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 8049 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 8050 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 8051 psum += plen + 1; /* indices + length */ 8052 } 8053 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 8054 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 8055 } 8056 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8057 8058 buf_size_idxs = 0; 8059 buf_size_vals = 0; 8060 buf_size_idxs_is = 0; 8061 buf_size_vecs = 0; 8062 for (i = 0; i < n_recvs; i++) { 8063 buf_size_idxs += (PetscInt)olengths_idxs[i]; 8064 buf_size_vals += (PetscInt)olengths_vals[i]; 8065 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 8066 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 8067 } 8068 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 8069 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 8070 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 8071 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 8072 8073 /* get new tags for clean communications */ 8074 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 8075 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 8076 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 8077 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 8078 8079 /* allocate for requests */ 8080 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 8081 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 8082 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 8083 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 8084 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 8085 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 8086 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 8087 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 8088 8089 /* communications */ 8090 ptr_idxs = recv_buffer_idxs; 8091 ptr_vals = recv_buffer_vals; 8092 ptr_idxs_is = recv_buffer_idxs_is; 8093 ptr_vecs = recv_buffer_vecs; 8094 for (i = 0; i < n_recvs; i++) { 8095 source_dest = onodes[i]; 8096 PetscCallMPI(MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i])); 8097 PetscCallMPI(MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i])); 8098 ptr_idxs += olengths_idxs[i]; 8099 ptr_vals += olengths_vals[i]; 8100 if (nis) { 8101 source_dest = onodes_is[i]; 8102 PetscCallMPI(MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i])); 8103 ptr_idxs_is += olengths_idxs_is[i]; 8104 } 8105 if (nvecs) { 8106 source_dest = onodes[i]; 8107 PetscCallMPI(MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i])); 8108 ptr_vecs += olengths_idxs[i] - 2; 8109 } 8110 } 8111 for (i = 0; i < n_sends; i++) { 8112 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 8113 PetscCallMPI(MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 8114 PetscCallMPI(MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 8115 if (nis) PetscCallMPI(MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 8116 if (nvecs) { 8117 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8118 PetscCallMPI(MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 8119 } 8120 } 8121 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 8122 PetscCall(ISDestroy(&is_sends_internal)); 8123 8124 /* assemble new l2g map */ 8125 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 8126 ptr_idxs = recv_buffer_idxs; 8127 new_local_rows = 0; 8128 for (i = 0; i < n_recvs; i++) { 8129 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8130 ptr_idxs += olengths_idxs[i]; 8131 } 8132 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 8133 ptr_idxs = recv_buffer_idxs; 8134 new_local_rows = 0; 8135 for (i = 0; i < n_recvs; i++) { 8136 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 8137 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8138 ptr_idxs += olengths_idxs[i]; 8139 } 8140 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 8141 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 8142 PetscCall(PetscFree(l2gmap_indices)); 8143 8144 /* infer new local matrix type from received local matrices type */ 8145 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 8146 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 8147 if (n_recvs) { 8148 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 8149 ptr_idxs = recv_buffer_idxs; 8150 for (i = 0; i < n_recvs; i++) { 8151 if ((PetscInt)new_local_type_private != *ptr_idxs) { 8152 new_local_type_private = MATAIJ_PRIVATE; 8153 break; 8154 } 8155 ptr_idxs += olengths_idxs[i]; 8156 } 8157 switch (new_local_type_private) { 8158 case MATDENSE_PRIVATE: 8159 new_local_type = MATSEQAIJ; 8160 bs = 1; 8161 break; 8162 case MATAIJ_PRIVATE: 8163 new_local_type = MATSEQAIJ; 8164 bs = 1; 8165 break; 8166 case MATBAIJ_PRIVATE: 8167 new_local_type = MATSEQBAIJ; 8168 break; 8169 case MATSBAIJ_PRIVATE: 8170 new_local_type = MATSEQSBAIJ; 8171 break; 8172 default: 8173 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 8174 } 8175 } else { /* by default, new_local_type is seqaij */ 8176 new_local_type = MATSEQAIJ; 8177 bs = 1; 8178 } 8179 8180 /* create MATIS object if needed */ 8181 if (!reuse) { 8182 PetscCall(MatGetSize(mat, &rows, &cols)); 8183 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8184 } else { 8185 /* it also destroys the local matrices */ 8186 if (*mat_n) { 8187 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 8188 } else { /* this is a fake object */ 8189 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8190 } 8191 } 8192 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 8193 PetscCall(MatSetType(local_mat, new_local_type)); 8194 8195 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 8196 8197 /* Global to local map of received indices */ 8198 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 8199 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 8200 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 8201 8202 /* restore attributes -> type of incoming data and its size */ 8203 buf_size_idxs = 0; 8204 for (i = 0; i < n_recvs; i++) { 8205 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 8206 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 8207 buf_size_idxs += (PetscInt)olengths_idxs[i]; 8208 } 8209 PetscCall(PetscFree(recv_buffer_idxs)); 8210 8211 /* set preallocation */ 8212 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 8213 if (!newisdense) { 8214 PetscInt *new_local_nnz = NULL; 8215 8216 ptr_idxs = recv_buffer_idxs_local; 8217 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 8218 for (i = 0; i < n_recvs; i++) { 8219 PetscInt j; 8220 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 8221 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 8222 } else { 8223 /* TODO */ 8224 } 8225 ptr_idxs += olengths_idxs[i]; 8226 } 8227 if (new_local_nnz) { 8228 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 8229 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 8230 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 8231 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8232 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 8233 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8234 } else { 8235 PetscCall(MatSetUp(local_mat)); 8236 } 8237 PetscCall(PetscFree(new_local_nnz)); 8238 } else { 8239 PetscCall(MatSetUp(local_mat)); 8240 } 8241 8242 /* set values */ 8243 ptr_vals = recv_buffer_vals; 8244 ptr_idxs = recv_buffer_idxs_local; 8245 for (i = 0; i < n_recvs; i++) { 8246 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 8247 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 8248 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 8249 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 8250 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 8251 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 8252 } else { 8253 /* TODO */ 8254 } 8255 ptr_idxs += olengths_idxs[i]; 8256 ptr_vals += olengths_vals[i]; 8257 } 8258 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 8259 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 8260 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 8261 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 8262 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 8263 PetscCall(PetscFree(recv_buffer_vals)); 8264 8265 #if 0 8266 if (!restrict_comm) { /* check */ 8267 Vec lvec,rvec; 8268 PetscReal infty_error; 8269 8270 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 8271 PetscCall(VecSetRandom(rvec,NULL)); 8272 PetscCall(MatMult(mat,rvec,lvec)); 8273 PetscCall(VecScale(lvec,-1.0)); 8274 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 8275 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 8276 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 8277 PetscCall(VecDestroy(&rvec)); 8278 PetscCall(VecDestroy(&lvec)); 8279 } 8280 #endif 8281 8282 /* assemble new additional is (if any) */ 8283 if (nis) { 8284 PetscInt **temp_idxs, *count_is, j, psum; 8285 8286 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 8287 PetscCall(PetscCalloc1(nis, &count_is)); 8288 ptr_idxs = recv_buffer_idxs_is; 8289 psum = 0; 8290 for (i = 0; i < n_recvs; i++) { 8291 for (j = 0; j < nis; j++) { 8292 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8293 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8294 psum += plen; 8295 ptr_idxs += plen + 1; /* shift pointer to received data */ 8296 } 8297 } 8298 PetscCall(PetscMalloc1(nis, &temp_idxs)); 8299 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 8300 for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]); 8301 PetscCall(PetscArrayzero(count_is, nis)); 8302 ptr_idxs = recv_buffer_idxs_is; 8303 for (i = 0; i < n_recvs; i++) { 8304 for (j = 0; j < nis; j++) { 8305 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8306 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 8307 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8308 ptr_idxs += plen + 1; /* shift pointer to received data */ 8309 } 8310 } 8311 for (i = 0; i < nis; i++) { 8312 PetscCall(ISDestroy(&isarray[i])); 8313 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 8314 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 8315 } 8316 PetscCall(PetscFree(count_is)); 8317 PetscCall(PetscFree(temp_idxs[0])); 8318 PetscCall(PetscFree(temp_idxs)); 8319 } 8320 /* free workspace */ 8321 PetscCall(PetscFree(recv_buffer_idxs_is)); 8322 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 8323 PetscCall(PetscFree(send_buffer_idxs)); 8324 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 8325 if (isdense) { 8326 PetscCall(MatISGetLocalMat(mat, &local_mat)); 8327 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 8328 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8329 } else { 8330 /* PetscCall(PetscFree(send_buffer_vals)); */ 8331 } 8332 if (nis) { 8333 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 8334 PetscCall(PetscFree(send_buffer_idxs_is)); 8335 } 8336 8337 if (nvecs) { 8338 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 8339 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 8340 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8341 PetscCall(VecDestroy(&nnsp_vec[0])); 8342 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 8343 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 8344 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 8345 /* set values */ 8346 ptr_vals = recv_buffer_vecs; 8347 ptr_idxs = recv_buffer_idxs_local; 8348 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8349 for (i = 0; i < n_recvs; i++) { 8350 PetscInt j; 8351 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 8352 ptr_idxs += olengths_idxs[i]; 8353 ptr_vals += olengths_idxs[i] - 2; 8354 } 8355 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8356 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8357 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8358 } 8359 8360 PetscCall(PetscFree(recv_buffer_vecs)); 8361 PetscCall(PetscFree(recv_buffer_idxs_local)); 8362 PetscCall(PetscFree(recv_req_idxs)); 8363 PetscCall(PetscFree(recv_req_vals)); 8364 PetscCall(PetscFree(recv_req_vecs)); 8365 PetscCall(PetscFree(recv_req_idxs_is)); 8366 PetscCall(PetscFree(send_req_idxs)); 8367 PetscCall(PetscFree(send_req_vals)); 8368 PetscCall(PetscFree(send_req_vecs)); 8369 PetscCall(PetscFree(send_req_idxs_is)); 8370 PetscCall(PetscFree(ilengths_vals)); 8371 PetscCall(PetscFree(ilengths_idxs)); 8372 PetscCall(PetscFree(olengths_vals)); 8373 PetscCall(PetscFree(olengths_idxs)); 8374 PetscCall(PetscFree(onodes)); 8375 if (nis) { 8376 PetscCall(PetscFree(ilengths_idxs_is)); 8377 PetscCall(PetscFree(olengths_idxs_is)); 8378 PetscCall(PetscFree(onodes_is)); 8379 } 8380 PetscCall(PetscSubcommDestroy(&subcomm)); 8381 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8382 PetscCall(MatDestroy(mat_n)); 8383 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 8384 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8385 PetscCall(VecDestroy(&nnsp_vec[0])); 8386 } 8387 *mat_n = NULL; 8388 } 8389 PetscFunctionReturn(PETSC_SUCCESS); 8390 } 8391 8392 /* temporary hack into ksp private data structure */ 8393 #include <petsc/private/kspimpl.h> 8394 8395 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat) 8396 { 8397 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8398 PC_IS *pcis = (PC_IS *)pc->data; 8399 PCBDDCGraph graph = pcbddc->mat_graph; 8400 Mat coarse_mat, coarse_mat_is; 8401 Mat coarsedivudotp = NULL; 8402 Mat coarseG, t_coarse_mat_is; 8403 MatNullSpace CoarseNullSpace = NULL; 8404 ISLocalToGlobalMapping coarse_islg; 8405 IS coarse_is, *isarray, corners; 8406 PetscInt i, im_active = -1, active_procs = -1; 8407 PetscInt nis, nisdofs, nisneu, nisvert; 8408 PetscInt coarse_eqs_per_proc, coarsening_ratio; 8409 PC pc_temp; 8410 PCType coarse_pc_type; 8411 KSPType coarse_ksp_type; 8412 PetscBool multilevel_requested, multilevel_allowed; 8413 PetscBool coarse_reuse, multi_element = graph->multi_element; 8414 PetscInt ncoarse, nedcfield; 8415 PetscBool compute_vecs = PETSC_FALSE; 8416 PetscScalar *array; 8417 MatReuse coarse_mat_reuse; 8418 PetscBool restr, full_restr, have_void; 8419 PetscMPIInt size; 8420 8421 PetscFunctionBegin; 8422 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8423 /* Assign global numbering to coarse dofs */ 8424 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8425 PetscInt ocoarse_size; 8426 compute_vecs = PETSC_TRUE; 8427 8428 pcbddc->new_primal_space = PETSC_TRUE; 8429 ocoarse_size = pcbddc->coarse_size; 8430 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8431 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 8432 /* see if we can avoid some work */ 8433 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8434 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8435 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8436 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8437 coarse_reuse = PETSC_FALSE; 8438 } else { /* we can safely reuse already computed coarse matrix */ 8439 coarse_reuse = PETSC_TRUE; 8440 } 8441 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8442 coarse_reuse = PETSC_FALSE; 8443 } 8444 /* reset any subassembling information */ 8445 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8446 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8447 coarse_reuse = PETSC_TRUE; 8448 } 8449 if (coarse_reuse && pcbddc->coarse_ksp) { 8450 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 8451 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8452 coarse_mat_reuse = MAT_REUSE_MATRIX; 8453 } else { 8454 coarse_mat = NULL; 8455 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8456 } 8457 8458 /* creates temporary l2gmap and IS for coarse indexes */ 8459 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 8460 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 8461 8462 /* creates temporary MATIS object for coarse matrix */ 8463 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is)); 8464 PetscCall(MatSetType(t_coarse_mat_is, MATIS)); 8465 PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size)); 8466 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_TRUE)); 8467 PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg)); 8468 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat)); 8469 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8470 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8471 PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view")); 8472 8473 /* count "active" (i.e. with positive local size) and "void" processes */ 8474 im_active = !!pcis->n; 8475 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8476 8477 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8478 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8479 /* full_restr : just use the receivers from the subassembling pattern */ 8480 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 8481 coarse_mat_is = NULL; 8482 multilevel_allowed = PETSC_FALSE; 8483 multilevel_requested = PETSC_FALSE; 8484 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 8485 if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1); 8486 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8487 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8488 coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio; 8489 if (multilevel_requested) { 8490 ncoarse = active_procs / coarsening_ratio; 8491 restr = PETSC_FALSE; 8492 full_restr = PETSC_FALSE; 8493 } else { 8494 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 8495 restr = PETSC_TRUE; 8496 full_restr = PETSC_TRUE; 8497 } 8498 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8499 ncoarse = PetscMax(1, ncoarse); 8500 if (!pcbddc->coarse_subassembling) { 8501 if (coarsening_ratio > 1) { 8502 if (multilevel_requested) { 8503 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8504 } else { 8505 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8506 } 8507 } else { 8508 PetscMPIInt rank; 8509 8510 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 8511 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8512 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 8513 } 8514 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8515 PetscInt psum; 8516 if (pcbddc->coarse_ksp) psum = 1; 8517 else psum = 0; 8518 PetscCall(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8519 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8520 } 8521 /* determine if we can go multilevel */ 8522 if (multilevel_requested) { 8523 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8524 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8525 } 8526 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8527 8528 /* dump subassembling pattern */ 8529 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 8530 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8531 nedcfield = -1; 8532 corners = NULL; 8533 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8534 PetscInt *tidxs, *tidxs2, nout, tsize, i; 8535 const PetscInt *idxs; 8536 ISLocalToGlobalMapping tmap; 8537 8538 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8539 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 8540 /* allocate space for temporary storage */ 8541 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 8542 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 8543 /* allocate for IS array */ 8544 nisdofs = pcbddc->n_ISForDofsLocal; 8545 if (pcbddc->nedclocal) { 8546 if (pcbddc->nedfield > -1) { 8547 nedcfield = pcbddc->nedfield; 8548 } else { 8549 nedcfield = 0; 8550 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 8551 nisdofs = 1; 8552 } 8553 } 8554 nisneu = !!pcbddc->NeumannBoundariesLocal; 8555 nisvert = 0; /* nisvert is not used */ 8556 nis = nisdofs + nisneu + nisvert; 8557 PetscCall(PetscMalloc1(nis, &isarray)); 8558 /* dofs splitting */ 8559 for (i = 0; i < nisdofs; i++) { 8560 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8561 if (nedcfield != i) { 8562 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 8563 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8564 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8565 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8566 } else { 8567 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 8568 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 8569 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8570 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8571 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 8572 } 8573 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8574 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 8575 /* PetscCall(ISView(isarray[i],0)); */ 8576 } 8577 /* neumann boundaries */ 8578 if (pcbddc->NeumannBoundariesLocal) { 8579 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8580 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 8581 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8582 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8583 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8584 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8585 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 8586 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8587 } 8588 /* coordinates */ 8589 if (pcbddc->corner_selected) { 8590 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8591 PetscCall(ISGetLocalSize(corners, &tsize)); 8592 PetscCall(ISGetIndices(corners, &idxs)); 8593 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8594 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8595 PetscCall(ISRestoreIndices(corners, &idxs)); 8596 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8597 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8598 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 8599 } 8600 PetscCall(PetscFree(tidxs)); 8601 PetscCall(PetscFree(tidxs2)); 8602 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8603 } else { 8604 nis = 0; 8605 nisdofs = 0; 8606 nisneu = 0; 8607 nisvert = 0; 8608 isarray = NULL; 8609 } 8610 /* destroy no longer needed map */ 8611 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8612 8613 /* subassemble */ 8614 if (multilevel_allowed) { 8615 Vec vp[1]; 8616 PetscInt nvecs = 0; 8617 PetscBool reuse; 8618 8619 vp[0] = NULL; 8620 /* XXX HDIV also */ 8621 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8622 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 8623 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 8624 PetscCall(VecSetType(vp[0], VECSTANDARD)); 8625 nvecs = 1; 8626 8627 if (pcbddc->divudotp) { 8628 Mat B, loc_divudotp; 8629 Vec v, p; 8630 IS dummy; 8631 PetscInt np; 8632 8633 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8634 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8635 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8636 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8637 PetscCall(MatCreateVecs(B, &v, &p)); 8638 PetscCall(VecSet(p, 1.)); 8639 PetscCall(MatMultTranspose(B, p, v)); 8640 PetscCall(VecDestroy(&p)); 8641 PetscCall(MatDestroy(&B)); 8642 PetscCall(VecGetArray(vp[0], &array)); 8643 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8644 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8645 PetscCall(VecResetArray(pcbddc->vec1_P)); 8646 PetscCall(VecRestoreArray(vp[0], &array)); 8647 PetscCall(ISDestroy(&dummy)); 8648 PetscCall(VecDestroy(&v)); 8649 } 8650 } 8651 if (coarse_mat) reuse = PETSC_TRUE; 8652 else reuse = PETSC_FALSE; 8653 if (multi_element) { 8654 /* XXX divudotp */ 8655 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_FALSE)); 8656 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8657 coarse_mat_is = t_coarse_mat_is; 8658 } else { 8659 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8660 if (reuse) { 8661 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8662 } else { 8663 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8664 } 8665 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8666 PetscScalar *arraym; 8667 const PetscScalar *arrayv; 8668 PetscInt nl; 8669 PetscCall(VecGetLocalSize(vp[0], &nl)); 8670 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8671 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8672 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8673 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8674 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8675 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8676 PetscCall(VecDestroy(&vp[0])); 8677 } else { 8678 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8679 } 8680 } 8681 } else { 8682 if (ncoarse != size) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8683 else { 8684 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8685 coarse_mat_is = t_coarse_mat_is; 8686 } 8687 } 8688 if (coarse_mat_is || coarse_mat) { 8689 if (!multilevel_allowed) { 8690 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8691 } else { 8692 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8693 if (coarse_mat_is) { 8694 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8695 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8696 coarse_mat = coarse_mat_is; 8697 } 8698 } 8699 } 8700 PetscCall(MatDestroy(&t_coarse_mat_is)); 8701 PetscCall(MatDestroy(&coarse_mat_is)); 8702 8703 /* create local to global scatters for coarse problem */ 8704 if (compute_vecs) { 8705 PetscInt lrows; 8706 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8707 if (coarse_mat) { 8708 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8709 } else { 8710 lrows = 0; 8711 } 8712 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8713 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8714 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8715 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8716 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8717 } 8718 PetscCall(ISDestroy(&coarse_is)); 8719 8720 /* set defaults for coarse KSP and PC */ 8721 if (multilevel_allowed) { 8722 coarse_ksp_type = KSPRICHARDSON; 8723 coarse_pc_type = PCBDDC; 8724 } else { 8725 coarse_ksp_type = KSPPREONLY; 8726 coarse_pc_type = PCREDUNDANT; 8727 } 8728 8729 /* print some info if requested */ 8730 if (pcbddc->dbg_flag) { 8731 if (!multilevel_allowed) { 8732 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8733 if (multilevel_requested) { 8734 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio)); 8735 } else if (pcbddc->max_levels) { 8736 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8737 } 8738 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8739 } 8740 } 8741 8742 /* communicate coarse discrete gradient */ 8743 coarseG = NULL; 8744 if (pcbddc->nedcG && multilevel_allowed) { 8745 MPI_Comm ccomm; 8746 if (coarse_mat) { 8747 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8748 } else { 8749 ccomm = MPI_COMM_NULL; 8750 } 8751 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8752 } 8753 8754 /* create the coarse KSP object only once with defaults */ 8755 if (coarse_mat) { 8756 PetscBool isredundant, isbddc, force, valid; 8757 PetscViewer dbg_viewer = NULL; 8758 PetscBool isset, issym, isher, isspd; 8759 8760 if (pcbddc->dbg_flag) { 8761 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8762 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8763 } 8764 if (!pcbddc->coarse_ksp) { 8765 char prefix[256], str_level[16]; 8766 size_t len; 8767 8768 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8769 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel)); 8770 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8771 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8772 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1)); 8773 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8774 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8775 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8776 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8777 /* TODO is this logic correct? should check for coarse_mat type */ 8778 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8779 /* prefix */ 8780 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8781 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8782 if (!pcbddc->current_level) { 8783 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8784 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8785 } else { 8786 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8787 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8788 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8789 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8790 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8791 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)pcbddc->current_level)); 8792 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8793 } 8794 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8795 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8796 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8797 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8798 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8799 /* allow user customization */ 8800 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8801 /* get some info after set from options */ 8802 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8803 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8804 force = PETSC_FALSE; 8805 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8806 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8807 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8808 if (multilevel_allowed && !force && !valid) { 8809 isbddc = PETSC_TRUE; 8810 PetscCall(PCSetType(pc_temp, PCBDDC)); 8811 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8812 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8813 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8814 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8815 PetscObjectOptionsBegin((PetscObject)pc_temp); 8816 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8817 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8818 PetscOptionsEnd(); 8819 pc_temp->setfromoptionscalled++; 8820 } 8821 } 8822 } 8823 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8824 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8825 if (nisdofs) { 8826 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8827 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8828 } 8829 if (nisneu) { 8830 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8831 PetscCall(ISDestroy(&isarray[nisdofs])); 8832 } 8833 if (nisvert) { 8834 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8835 PetscCall(ISDestroy(&isarray[nis - 1])); 8836 } 8837 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8838 8839 /* get some info after set from options */ 8840 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8841 8842 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8843 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8844 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8845 force = PETSC_FALSE; 8846 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8847 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8848 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8849 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8850 if (isredundant) { 8851 KSP inner_ksp; 8852 PC inner_pc; 8853 8854 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8855 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8856 } 8857 8858 /* parameters which miss an API */ 8859 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8860 if (isbddc) { 8861 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8862 8863 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8864 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8865 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8866 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8867 if (pcbddc_coarse->benign_saddle_point) { 8868 Mat coarsedivudotp_is; 8869 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8870 IS row, col; 8871 const PetscInt *gidxs; 8872 PetscInt n, st, M, N; 8873 8874 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 8875 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 8876 st = st - n; 8877 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 8878 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 8879 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 8880 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 8881 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 8882 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 8883 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 8884 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 8885 PetscCall(ISGetSize(row, &M)); 8886 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 8887 PetscCall(ISDestroy(&row)); 8888 PetscCall(ISDestroy(&col)); 8889 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 8890 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 8891 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 8892 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 8893 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8894 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8895 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 8896 PetscCall(MatDestroy(&coarsedivudotp)); 8897 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 8898 PetscCall(MatDestroy(&coarsedivudotp_is)); 8899 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8900 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8901 } 8902 } 8903 8904 /* propagate symmetry info of coarse matrix */ 8905 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 8906 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 8907 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 8908 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 8909 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 8910 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 8911 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 8912 8913 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 8914 /* set operators */ 8915 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 8916 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 8917 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8918 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 8919 } 8920 PetscCall(MatDestroy(&coarseG)); 8921 PetscCall(PetscFree(isarray)); 8922 #if 0 8923 { 8924 PetscViewer viewer; 8925 char filename[256]; 8926 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 8927 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8928 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8929 PetscCall(MatView(coarse_mat,viewer)); 8930 PetscCall(PetscViewerPopFormat(viewer)); 8931 PetscCall(PetscViewerDestroy(&viewer)); 8932 } 8933 #endif 8934 8935 if (corners) { 8936 Vec gv; 8937 IS is; 8938 const PetscInt *idxs; 8939 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 8940 PetscScalar *coords; 8941 8942 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 8943 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 8944 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 8945 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 8946 PetscCall(VecSetBlockSize(gv, cdim)); 8947 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 8948 PetscCall(VecSetType(gv, VECSTANDARD)); 8949 PetscCall(VecSetFromOptions(gv)); 8950 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8951 8952 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8953 PetscCall(ISGetLocalSize(is, &n)); 8954 PetscCall(ISGetIndices(is, &idxs)); 8955 PetscCall(PetscMalloc1(n * cdim, &coords)); 8956 for (i = 0; i < n; i++) { 8957 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 8958 } 8959 PetscCall(ISRestoreIndices(is, &idxs)); 8960 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8961 8962 PetscCall(ISGetLocalSize(corners, &n)); 8963 PetscCall(ISGetIndices(corners, &idxs)); 8964 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 8965 PetscCall(ISRestoreIndices(corners, &idxs)); 8966 PetscCall(PetscFree(coords)); 8967 PetscCall(VecAssemblyBegin(gv)); 8968 PetscCall(VecAssemblyEnd(gv)); 8969 PetscCall(VecGetArray(gv, &coords)); 8970 if (pcbddc->coarse_ksp) { 8971 PC coarse_pc; 8972 PetscBool isbddc; 8973 8974 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 8975 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 8976 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8977 PetscReal *realcoords; 8978 8979 PetscCall(VecGetLocalSize(gv, &n)); 8980 #if defined(PETSC_USE_COMPLEX) 8981 PetscCall(PetscMalloc1(n, &realcoords)); 8982 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 8983 #else 8984 realcoords = coords; 8985 #endif 8986 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 8987 #if defined(PETSC_USE_COMPLEX) 8988 PetscCall(PetscFree(realcoords)); 8989 #endif 8990 } 8991 } 8992 PetscCall(VecRestoreArray(gv, &coords)); 8993 PetscCall(VecDestroy(&gv)); 8994 } 8995 PetscCall(ISDestroy(&corners)); 8996 8997 if (pcbddc->coarse_ksp) { 8998 Vec crhs, csol; 8999 9000 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 9001 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 9002 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL)); 9003 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs)); 9004 } 9005 PetscCall(MatDestroy(&coarsedivudotp)); 9006 9007 /* compute null space for coarse solver if the benign trick has been requested */ 9008 if (pcbddc->benign_null) { 9009 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 9010 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 9011 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 9012 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 9013 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9014 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9015 if (coarse_mat) { 9016 Vec nullv; 9017 PetscScalar *array, *array2; 9018 PetscInt nl; 9019 9020 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 9021 PetscCall(VecGetLocalSize(nullv, &nl)); 9022 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9023 PetscCall(VecGetArray(nullv, &array2)); 9024 PetscCall(PetscArraycpy(array2, array, nl)); 9025 PetscCall(VecRestoreArray(nullv, &array2)); 9026 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9027 PetscCall(VecNormalize(nullv, NULL)); 9028 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 9029 PetscCall(VecDestroy(&nullv)); 9030 } 9031 } 9032 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 9033 9034 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9035 if (pcbddc->coarse_ksp) { 9036 PetscBool ispreonly; 9037 9038 if (CoarseNullSpace) { 9039 PetscBool isnull; 9040 9041 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 9042 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 9043 /* TODO: add local nullspaces (if any) */ 9044 } 9045 /* setup coarse ksp */ 9046 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 9047 /* Check coarse problem if in debug mode or if solving with an iterative method */ 9048 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 9049 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 9050 KSP check_ksp; 9051 KSPType check_ksp_type; 9052 PC check_pc; 9053 Vec check_vec, coarse_vec; 9054 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 9055 PetscInt its; 9056 PetscBool compute_eigs; 9057 PetscReal *eigs_r, *eigs_c; 9058 PetscInt neigs; 9059 const char *prefix; 9060 9061 /* Create ksp object suitable for estimation of extreme eigenvalues */ 9062 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 9063 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel)); 9064 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 9065 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 9066 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 9067 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size)); 9068 /* prevent from setup unneeded object */ 9069 PetscCall(KSPGetPC(check_ksp, &check_pc)); 9070 PetscCall(PCSetType(check_pc, PCNONE)); 9071 if (ispreonly) { 9072 check_ksp_type = KSPPREONLY; 9073 compute_eigs = PETSC_FALSE; 9074 } else { 9075 check_ksp_type = KSPGMRES; 9076 compute_eigs = PETSC_TRUE; 9077 } 9078 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 9079 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 9080 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 9081 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 9082 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 9083 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 9084 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 9085 PetscCall(KSPSetFromOptions(check_ksp)); 9086 PetscCall(KSPSetUp(check_ksp)); 9087 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 9088 PetscCall(KSPSetPC(check_ksp, check_pc)); 9089 /* create random vec */ 9090 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 9091 PetscCall(VecSetRandom(check_vec, NULL)); 9092 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9093 /* solve coarse problem */ 9094 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 9095 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 9096 /* set eigenvalue estimation if preonly has not been requested */ 9097 if (compute_eigs) { 9098 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 9099 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 9100 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 9101 if (neigs) { 9102 lambda_max = eigs_r[neigs - 1]; 9103 lambda_min = eigs_r[0]; 9104 if (pcbddc->use_coarse_estimates) { 9105 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 9106 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 9107 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 9108 } 9109 } 9110 } 9111 } 9112 9113 /* check coarse problem residual error */ 9114 if (pcbddc->dbg_flag) { 9115 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 9116 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9117 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 9118 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 9119 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9120 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 9121 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 9122 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer)); 9123 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer)); 9124 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 9125 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 9126 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 9127 if (compute_eigs) { 9128 PetscReal lambda_max_s, lambda_min_s; 9129 KSPConvergedReason reason; 9130 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 9131 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 9132 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 9133 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 9134 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 9135 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 9136 } 9137 PetscCall(PetscViewerFlush(dbg_viewer)); 9138 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9139 } 9140 PetscCall(VecDestroy(&check_vec)); 9141 PetscCall(VecDestroy(&coarse_vec)); 9142 PetscCall(KSPDestroy(&check_ksp)); 9143 if (compute_eigs) { 9144 PetscCall(PetscFree(eigs_r)); 9145 PetscCall(PetscFree(eigs_c)); 9146 } 9147 } 9148 } 9149 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 9150 /* print additional info */ 9151 if (pcbddc->dbg_flag) { 9152 /* waits until all processes reaches this point */ 9153 PetscCall(PetscBarrier((PetscObject)pc)); 9154 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 9155 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9156 } 9157 9158 /* free memory */ 9159 PetscCall(MatDestroy(&coarse_mat)); 9160 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9161 PetscFunctionReturn(PETSC_SUCCESS); 9162 } 9163 9164 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 9165 { 9166 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9167 PC_IS *pcis = (PC_IS *)pc->data; 9168 IS subset, subset_mult, subset_n; 9169 PetscInt local_size, coarse_size = 0; 9170 PetscInt *local_primal_indices = NULL; 9171 const PetscInt *t_local_primal_indices; 9172 9173 PetscFunctionBegin; 9174 /* Compute global number of coarse dofs */ 9175 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 9176 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 9177 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 9178 PetscCall(ISDestroy(&subset_n)); 9179 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 9180 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 9181 PetscCall(ISDestroy(&subset)); 9182 PetscCall(ISDestroy(&subset_mult)); 9183 PetscCall(ISGetLocalSize(subset_n, &local_size)); 9184 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 9185 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 9186 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 9187 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 9188 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 9189 PetscCall(ISDestroy(&subset_n)); 9190 9191 if (pcbddc->dbg_flag) { 9192 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9193 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 9194 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size)); 9195 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9196 } 9197 9198 /* get back data */ 9199 *coarse_size_n = coarse_size; 9200 *local_primal_indices_n = local_primal_indices; 9201 PetscFunctionReturn(PETSC_SUCCESS); 9202 } 9203 9204 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 9205 { 9206 IS localis_t; 9207 PetscInt i, lsize, *idxs, n; 9208 PetscScalar *vals; 9209 9210 PetscFunctionBegin; 9211 /* get indices in local ordering exploiting local to global map */ 9212 PetscCall(ISGetLocalSize(globalis, &lsize)); 9213 PetscCall(PetscMalloc1(lsize, &vals)); 9214 for (i = 0; i < lsize; i++) vals[i] = 1.0; 9215 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 9216 PetscCall(VecSet(gwork, 0.0)); 9217 PetscCall(VecSet(lwork, 0.0)); 9218 if (idxs) { /* multilevel guard */ 9219 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 9220 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 9221 } 9222 PetscCall(VecAssemblyBegin(gwork)); 9223 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 9224 PetscCall(PetscFree(vals)); 9225 PetscCall(VecAssemblyEnd(gwork)); 9226 /* now compute set in local ordering */ 9227 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9228 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9229 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 9230 PetscCall(VecGetSize(lwork, &n)); 9231 for (i = 0, lsize = 0; i < n; i++) { 9232 if (PetscRealPart(vals[i]) > 0.5) lsize++; 9233 } 9234 PetscCall(PetscMalloc1(lsize, &idxs)); 9235 for (i = 0, lsize = 0; i < n; i++) { 9236 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 9237 } 9238 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 9239 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 9240 *localis = localis_t; 9241 PetscFunctionReturn(PETSC_SUCCESS); 9242 } 9243 9244 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 9245 { 9246 PC_IS *pcis = (PC_IS *)pc->data; 9247 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9248 PC_IS *pcisf; 9249 PC_BDDC *pcbddcf; 9250 PC pcf; 9251 9252 PetscFunctionBegin; 9253 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 9254 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 9255 PetscCall(PCSetType(pcf, PCBDDC)); 9256 9257 pcisf = (PC_IS *)pcf->data; 9258 pcbddcf = (PC_BDDC *)pcf->data; 9259 9260 pcisf->is_B_local = pcis->is_B_local; 9261 pcisf->vec1_N = pcis->vec1_N; 9262 pcisf->BtoNmap = pcis->BtoNmap; 9263 pcisf->n = pcis->n; 9264 pcisf->n_B = pcis->n_B; 9265 9266 PetscCall(PetscFree(pcbddcf->mat_graph)); 9267 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9268 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 9269 pcbddcf->sub_schurs = schurs; 9270 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 9271 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 9272 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 9273 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 9274 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9275 pcbddcf->use_faces = PETSC_TRUE; 9276 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9277 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9278 pcbddcf->use_qr_single = (PetscBool)!constraints; 9279 pcbddcf->fake_change = PETSC_TRUE; 9280 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9281 9282 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9283 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9284 9285 *change = pcbddcf->ConstraintMatrix; 9286 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 9287 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 9288 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9289 9290 if (schurs) pcbddcf->sub_schurs = NULL; 9291 pcbddcf->ConstraintMatrix = NULL; 9292 pcbddcf->mat_graph = NULL; 9293 pcisf->is_B_local = NULL; 9294 pcisf->vec1_N = NULL; 9295 pcisf->BtoNmap = NULL; 9296 PetscCall(PCDestroy(&pcf)); 9297 PetscFunctionReturn(PETSC_SUCCESS); 9298 } 9299 9300 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9301 { 9302 PC_IS *pcis = (PC_IS *)pc->data; 9303 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9304 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 9305 Mat S_j; 9306 PetscInt *used_xadj, *used_adjncy; 9307 PetscBool free_used_adj; 9308 9309 PetscFunctionBegin; 9310 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9311 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9312 free_used_adj = PETSC_FALSE; 9313 if (pcbddc->sub_schurs_layers == -1) { 9314 used_xadj = NULL; 9315 used_adjncy = NULL; 9316 } else { 9317 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9318 used_xadj = pcbddc->mat_graph->xadj; 9319 used_adjncy = pcbddc->mat_graph->adjncy; 9320 } else if (pcbddc->computed_rowadj) { 9321 used_xadj = pcbddc->mat_graph->xadj; 9322 used_adjncy = pcbddc->mat_graph->adjncy; 9323 } else { 9324 PetscBool flg_row = PETSC_FALSE; 9325 const PetscInt *xadj, *adjncy; 9326 PetscInt nvtxs; 9327 9328 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9329 if (flg_row) { 9330 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 9331 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 9332 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 9333 free_used_adj = PETSC_TRUE; 9334 } else { 9335 pcbddc->sub_schurs_layers = -1; 9336 used_xadj = NULL; 9337 used_adjncy = NULL; 9338 } 9339 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9340 } 9341 } 9342 9343 /* setup sub_schurs data */ 9344 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 9345 if (!sub_schurs->schur_explicit) { 9346 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9347 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 9348 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 9349 } else { 9350 Mat change = NULL; 9351 Vec scaling = NULL; 9352 IS change_primal = NULL, iP; 9353 PetscInt benign_n; 9354 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9355 PetscBool need_change = PETSC_FALSE; 9356 PetscBool discrete_harmonic = PETSC_FALSE; 9357 9358 if (!pcbddc->use_vertices && reuse_solvers) { 9359 PetscInt n_vertices; 9360 9361 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 9362 reuse_solvers = (PetscBool)!n_vertices; 9363 } 9364 if (!pcbddc->benign_change_explicit) { 9365 benign_n = pcbddc->benign_n; 9366 } else { 9367 benign_n = 0; 9368 } 9369 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9370 We need a global reduction to avoid possible deadlocks. 9371 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9372 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9373 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9374 PetscCall(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 9375 need_change = (PetscBool)(!need_change); 9376 } 9377 /* If the user defines additional constraints, we import them here */ 9378 if (need_change) { 9379 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 9380 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 9381 } 9382 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9383 9384 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 9385 if (iP) { 9386 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 9387 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 9388 PetscOptionsEnd(); 9389 } 9390 if (discrete_harmonic) { 9391 Mat A; 9392 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 9393 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 9394 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 9395 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 9396 pcbddc->benign_zerodiag_subs, change, change_primal)); 9397 PetscCall(MatDestroy(&A)); 9398 } else { 9399 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 9400 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 9401 } 9402 PetscCall(MatDestroy(&change)); 9403 PetscCall(ISDestroy(&change_primal)); 9404 } 9405 PetscCall(MatDestroy(&S_j)); 9406 9407 /* free adjacency */ 9408 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 9409 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9410 PetscFunctionReturn(PETSC_SUCCESS); 9411 } 9412 9413 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9414 { 9415 PC_IS *pcis = (PC_IS *)pc->data; 9416 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9417 PCBDDCGraph graph; 9418 9419 PetscFunctionBegin; 9420 /* attach interface graph for determining subsets */ 9421 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9422 IS verticesIS, verticescomm; 9423 PetscInt vsize, *idxs; 9424 9425 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9426 PetscCall(ISGetSize(verticesIS, &vsize)); 9427 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 9428 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 9429 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 9430 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9431 PetscCall(PCBDDCGraphCreate(&graph)); 9432 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 9433 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 9434 PetscCall(ISDestroy(&verticescomm)); 9435 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9436 } else { 9437 graph = pcbddc->mat_graph; 9438 } 9439 /* print some info */ 9440 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9441 IS vertices; 9442 PetscInt nv, nedges, nfaces; 9443 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 9444 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9445 PetscCall(ISGetSize(vertices, &nv)); 9446 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9447 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 9448 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 9449 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 9450 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 9451 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9452 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9453 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9454 } 9455 9456 /* sub_schurs init */ 9457 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9458 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 9459 9460 /* free graph struct */ 9461 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 9462 PetscFunctionReturn(PETSC_SUCCESS); 9463 } 9464 9465 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer) 9466 { 9467 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 9468 PetscInt n = pc->pmat->rmap->n, ln, ni, st; 9469 const PetscInt *idxs; 9470 IS gis; 9471 9472 PetscFunctionBegin; 9473 if (!is) PetscFunctionReturn(PETSC_SUCCESS); 9474 PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL)); 9475 PetscCall(MatGetLocalSize(matis->A, NULL, &ln)); 9476 PetscCall(PetscArrayzero(matis->sf_leafdata, ln)); 9477 PetscCall(PetscArrayzero(matis->sf_rootdata, n)); 9478 PetscCall(ISGetLocalSize(is, &ni)); 9479 PetscCall(ISGetIndices(is, &idxs)); 9480 for (PetscInt i = 0; i < ni; i++) { 9481 if (idxs[i] < 0 || idxs[i] >= ln) continue; 9482 matis->sf_leafdata[idxs[i]] = 1; 9483 } 9484 PetscCall(ISRestoreIndices(is, &idxs)); 9485 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9486 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9487 ln = 0; 9488 for (PetscInt i = 0; i < n; i++) { 9489 if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st; 9490 } 9491 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis)); 9492 PetscCall(ISView(gis, viewer)); 9493 PetscCall(ISDestroy(&gis)); 9494 PetscFunctionReturn(PETSC_SUCCESS); 9495 } 9496 9497 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile) 9498 { 9499 PetscInt header[11]; 9500 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9501 PetscViewer viewer; 9502 MPI_Comm comm = PetscObjectComm((PetscObject)pc); 9503 9504 PetscFunctionBegin; 9505 PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer)); 9506 if (load) { 9507 IS is; 9508 Mat A; 9509 9510 PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT)); 9511 PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9512 PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9513 PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9514 PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9515 PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9516 PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9517 PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9518 PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9519 PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9520 PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9521 if (header[0]) { 9522 PetscCall(ISCreate(comm, &is)); 9523 PetscCall(ISLoad(is, viewer)); 9524 PetscCall(PCBDDCSetDirichletBoundaries(pc, is)); 9525 PetscCall(ISDestroy(&is)); 9526 } 9527 if (header[1]) { 9528 PetscCall(ISCreate(comm, &is)); 9529 PetscCall(ISLoad(is, viewer)); 9530 PetscCall(PCBDDCSetNeumannBoundaries(pc, is)); 9531 PetscCall(ISDestroy(&is)); 9532 } 9533 if (header[2]) { 9534 IS *isarray; 9535 9536 PetscCall(PetscMalloc1(header[2], &isarray)); 9537 for (PetscInt i = 0; i < header[2]; i++) { 9538 PetscCall(ISCreate(comm, &isarray[i])); 9539 PetscCall(ISLoad(isarray[i], viewer)); 9540 } 9541 PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray)); 9542 for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i])); 9543 PetscCall(PetscFree(isarray)); 9544 } 9545 if (header[3]) { 9546 PetscCall(ISCreate(comm, &is)); 9547 PetscCall(ISLoad(is, viewer)); 9548 PetscCall(PCBDDCSetPrimalVerticesIS(pc, is)); 9549 PetscCall(ISDestroy(&is)); 9550 } 9551 if (header[4]) { 9552 PetscCall(MatCreate(comm, &A)); 9553 PetscCall(MatSetType(A, MATAIJ)); 9554 PetscCall(MatLoad(A, viewer)); 9555 PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8])); 9556 PetscCall(MatDestroy(&A)); 9557 } 9558 if (header[9]) { 9559 PetscCall(MatCreate(comm, &A)); 9560 PetscCall(MatSetType(A, MATIS)); 9561 PetscCall(MatLoad(A, viewer)); 9562 PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL)); 9563 PetscCall(MatDestroy(&A)); 9564 } 9565 } else { 9566 header[0] = (PetscInt) !!pcbddc->DirichletBoundariesLocal; 9567 header[1] = (PetscInt) !!pcbddc->NeumannBoundariesLocal; 9568 header[2] = pcbddc->n_ISForDofsLocal; 9569 header[3] = (PetscInt) !!pcbddc->user_primal_vertices_local; 9570 header[4] = (PetscInt) !!pcbddc->discretegradient; 9571 header[5] = pcbddc->nedorder; 9572 header[6] = pcbddc->nedfield; 9573 header[7] = (PetscInt)pcbddc->nedglobal; 9574 header[8] = (PetscInt)pcbddc->conforming; 9575 header[9] = (PetscInt) !!pcbddc->divudotp; 9576 header[10] = (PetscInt)pcbddc->divudotp_trans; 9577 if (header[4]) header[3] = 0; 9578 9579 PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT)); 9580 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer)); 9581 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer)); 9582 for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer)); 9583 if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer)); 9584 if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer)); 9585 if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer)); 9586 } 9587 PetscCall(PetscViewerDestroy(&viewer)); 9588 PetscFunctionReturn(PETSC_SUCCESS); 9589 } 9590 9591 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9592 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9593 { 9594 Mat At; 9595 IS rows; 9596 PetscInt rst, ren; 9597 PetscLayout rmap; 9598 9599 PetscFunctionBegin; 9600 rst = ren = 0; 9601 if (ccomm != MPI_COMM_NULL) { 9602 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 9603 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 9604 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 9605 PetscCall(PetscLayoutSetUp(rmap)); 9606 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 9607 } 9608 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 9609 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 9610 PetscCall(ISDestroy(&rows)); 9611 9612 if (ccomm != MPI_COMM_NULL) { 9613 Mat_MPIAIJ *a, *b; 9614 IS from, to; 9615 Vec gvec; 9616 PetscInt lsize; 9617 9618 PetscCall(MatCreate(ccomm, B)); 9619 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 9620 PetscCall(MatSetType(*B, MATAIJ)); 9621 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 9622 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9623 a = (Mat_MPIAIJ *)At->data; 9624 b = (Mat_MPIAIJ *)(*B)->data; 9625 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 9626 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 9627 PetscCall(PetscObjectReference((PetscObject)a->A)); 9628 PetscCall(PetscObjectReference((PetscObject)a->B)); 9629 b->A = a->A; 9630 b->B = a->B; 9631 9632 b->donotstash = a->donotstash; 9633 b->roworiented = a->roworiented; 9634 b->rowindices = NULL; 9635 b->rowvalues = NULL; 9636 b->getrowactive = PETSC_FALSE; 9637 9638 (*B)->rmap = rmap; 9639 (*B)->factortype = A->factortype; 9640 (*B)->assembled = PETSC_TRUE; 9641 (*B)->insertmode = NOT_SET_VALUES; 9642 (*B)->preallocated = PETSC_TRUE; 9643 9644 if (a->colmap) { 9645 #if defined(PETSC_USE_CTABLE) 9646 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 9647 #else 9648 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9649 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9650 #endif 9651 } else b->colmap = NULL; 9652 if (a->garray) { 9653 PetscInt len; 9654 len = a->B->cmap->n; 9655 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9656 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9657 } else b->garray = NULL; 9658 9659 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9660 b->lvec = a->lvec; 9661 9662 /* cannot use VecScatterCopy */ 9663 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9664 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9665 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9666 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9667 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9668 PetscCall(ISDestroy(&from)); 9669 PetscCall(ISDestroy(&to)); 9670 PetscCall(VecDestroy(&gvec)); 9671 } 9672 PetscCall(MatDestroy(&At)); 9673 PetscFunctionReturn(PETSC_SUCCESS); 9674 } 9675 9676 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */ 9677 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA) 9678 { 9679 PetscBool isaij; 9680 MPI_Comm comm; 9681 9682 PetscFunctionBegin; 9683 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9684 PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, "")); 9685 PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented"); 9686 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij)); 9687 if (isaij) { /* SeqAIJ supports repeated rows */ 9688 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA)); 9689 } else { 9690 Mat A_loc; 9691 Mat_SeqAIJ *da; 9692 PetscSF sf; 9693 PetscInt ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata; 9694 PetscScalar *daa; 9695 const PetscInt *idxs; 9696 const PetscSFNode *iremotes; 9697 PetscSFNode *remotes; 9698 9699 /* SF for incoming rows */ 9700 PetscCall(PetscSFCreate(comm, &sf)); 9701 PetscCall(ISGetLocalSize(rows, &ni)); 9702 PetscCall(ISGetIndices(rows, &idxs)); 9703 PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs)); 9704 PetscCall(ISRestoreIndices(rows, &idxs)); 9705 9706 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc)); 9707 da = (Mat_SeqAIJ *)A_loc->data; 9708 PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata)); 9709 for (PetscInt i = 0; i < m; i++) { 9710 rdata[2 * i + 0] = da->i[i + 1] - da->i[i]; 9711 rdata[2 * i + 1] = da->i[i]; 9712 } 9713 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9714 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9715 PetscCall(PetscMalloc1(ni + 1, &di)); 9716 di[0] = 0; 9717 for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0]; 9718 PetscCall(PetscMalloc1(di[ni], &dj)); 9719 PetscCall(PetscMalloc1(di[ni], &daa)); 9720 PetscCall(PetscMalloc1(di[ni], &remotes)); 9721 9722 PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes)); 9723 9724 /* SF graph for nonzeros */ 9725 c = 0; 9726 for (PetscInt i = 0; i < ni; i++) { 9727 const PetscInt rank = iremotes[i].rank; 9728 const PetscInt rsize = ldata[2 * i]; 9729 for (PetscInt j = 0; j < rsize; j++) { 9730 remotes[c].rank = rank; 9731 remotes[c].index = ldata[2 * i + 1] + j; 9732 c++; 9733 } 9734 } 9735 PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]); 9736 PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER)); 9737 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9738 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9739 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9740 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9741 9742 PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA)); 9743 PetscCall(MatDestroy(&A_loc)); 9744 PetscCall(PetscSFDestroy(&sf)); 9745 PetscCall(PetscFree(di)); 9746 PetscCall(PetscFree(dj)); 9747 PetscCall(PetscFree(daa)); 9748 PetscCall(PetscFree(remotes)); 9749 PetscCall(PetscFree2(ldata, rdata)); 9750 } 9751 PetscFunctionReturn(PETSC_SUCCESS); 9752 } 9753