1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <petsc/private/kernels/blockinvert.h> 5 #include <../src/mat/impls/dense/seq/dense.h> 6 #include <petscdmplex.h> 7 #include <petscblaslapack.h> 8 #include <petsc/private/sfimpl.h> 9 #include <petsc/private/dmpleximpl.h> 10 #include <petscdmda.h> 11 12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 13 14 /* if range is true, it returns B s.t. span{B} = range(A) 15 if range is false, it returns B s.t. range(B) _|_ range(A) */ 16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 17 { 18 PetscScalar *uwork, *data, *U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM, bN, lwork, lierr, di = 1; 21 PetscInt ulw, i, nr, nc, n; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 PetscCall(MatGetSize(A, &nr, &nc)); 28 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 33 PetscCall(PetscMalloc1(ulw, &uwork)); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr, nc); 39 if (!rwork) { 40 PetscCall(PetscMalloc1(n, &sing)); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 PetscCall(PetscMalloc1(nr * nr, &U)); 47 PetscCall(PetscBLASIntCast(nr, &bM)); 48 PetscCall(PetscBLASIntCast(nc, &bN)); 49 PetscCall(PetscBLASIntCast(ulw, &lwork)); 50 PetscCall(MatDenseGetArray(A, &data)); 51 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 54 #else 55 PetscCall(PetscMalloc1(5 * n, &rwork2)); 56 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 57 PetscCall(PetscFree(rwork2)); 58 #endif 59 PetscCall(PetscFPTrapPop()); 60 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 61 PetscCall(MatDenseRestoreArray(A, &data)); 62 for (i = 0; i < n; i++) 63 if (sing[i] < PETSC_SMALL) break; 64 if (!rwork) PetscCall(PetscFree(sing)); 65 if (!work) PetscCall(PetscFree(uwork)); 66 /* create B */ 67 if (!range) { 68 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 69 PetscCall(MatDenseGetArray(*B, &data)); 70 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 71 } else { 72 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 73 PetscCall(MatDenseGetArray(*B, &data)); 74 PetscCall(PetscArraycpy(data, U, i * nr)); 75 } 76 PetscCall(MatDenseRestoreArray(*B, &data)); 77 PetscCall(PetscFree(U)); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* TODO REMOVE */ 82 #if defined(PRINT_GDET) 83 static int inc = 0; 84 static int lev = 0; 85 #endif 86 87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 88 { 89 Mat GE, GEd; 90 PetscInt rsize, csize, esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 PetscCall(ISGetSize(edge, &esize)); 95 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 96 PetscCall(ISGetSize(extrow, &rsize)); 97 PetscCall(ISGetSize(extcol, &csize)); 98 99 /* gradients */ 100 ptr = work + 5 * esize; 101 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 102 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 103 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 104 PetscCall(MatDestroy(&GE)); 105 106 /* constants */ 107 ptr += rsize * csize; 108 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 109 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 110 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 111 PetscCall(MatDestroy(&GE)); 112 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 113 PetscCall(MatDestroy(&GEd)); 114 115 if (corners) { 116 Mat GEc; 117 const PetscScalar *vals; 118 PetscScalar v; 119 120 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 121 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 122 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 123 /* v = PetscAbsScalar(vals[0]); */ 124 v = 1.; 125 cvals[0] = vals[0] / v; 126 cvals[1] = vals[1] / v; 127 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 128 PetscCall(MatScale(*GKins, 1. / v)); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 134 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 135 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 136 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 137 PetscCall(MatView(GEc, viewer)); 138 PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK")); 139 PetscCall(MatView(*GKins, viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 141 PetscCall(MatView(GEd, viewer)); 142 PetscCall(PetscViewerDestroy(&viewer)); 143 } 144 #endif 145 PetscCall(MatDestroy(&GEd)); 146 PetscCall(MatDestroy(&GEc)); 147 } 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *); 152 153 PetscErrorCode PCBDDCNedelecSupport(PC pc) 154 { 155 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 156 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 157 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 160 MPI_Comm comm; 161 IS lned, primals, allprimals, nedfieldlocal, elements_corners = NULL; 162 IS *eedges, *extrows, *extcols, *alleedges; 163 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 164 PetscScalar *vals, *work; 165 PetscReal *rwork; 166 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 167 PetscInt ne, nv, Lv, order, n, field; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 187 /* Command line customization */ 188 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 189 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 190 /* print debug info and adaptive order TODO: to be removed */ 191 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 192 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 193 PetscOptionsEnd(); 194 195 /* Return if there are no edges in the decomposition */ 196 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 197 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 198 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 199 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 200 lrc[0] = PETSC_FALSE; 201 for (i = 0; i < n; i++) { 202 if (PetscRealPart(vals[i]) > 2.) { 203 lrc[0] = PETSC_TRUE; 204 break; 205 } 206 } 207 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 208 PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm)); 209 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 210 211 /* Get Nedelec field */ 212 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 213 if (pcbddc->n_ISForDofsLocal && field >= 0) { 214 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 215 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 216 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 217 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 218 ne = n; 219 nedfieldlocal = NULL; 220 global = PETSC_TRUE; 221 } else if (field == PETSC_DECIDE) { 222 PetscInt rst, ren, *idx; 223 224 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 225 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 226 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 227 for (i = rst; i < ren; i++) { 228 PetscInt nc; 229 230 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 231 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 232 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 233 } 234 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 235 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 236 PetscCall(PetscMalloc1(n, &idx)); 237 for (i = 0, ne = 0; i < n; i++) 238 if (matis->sf_leafdata[i]) idx[ne++] = i; 239 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 240 } else { 241 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 242 } 243 244 /* Sanity checks */ 245 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 246 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 247 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 248 249 /* Just set primal dofs and return */ 250 if (setprimal) { 251 IS enedfieldlocal; 252 PetscInt *eidxs; 253 254 PetscCall(PetscMalloc1(ne, &eidxs)); 255 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 256 if (nedfieldlocal) { 257 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 258 for (i = 0, cum = 0; i < ne; i++) { 259 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 260 } 261 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 262 } else { 263 for (i = 0, cum = 0; i < ne; i++) { 264 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 265 } 266 } 267 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 268 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 269 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 270 PetscCall(PetscFree(eidxs)); 271 PetscCall(ISDestroy(&nedfieldlocal)); 272 PetscCall(ISDestroy(&enedfieldlocal)); 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 /* Compute some l2g maps */ 277 if (nedfieldlocal) { 278 IS is; 279 280 /* need to map from the local Nedelec field to local numbering */ 281 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 282 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 283 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 284 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 285 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 286 if (global) { 287 PetscCall(PetscObjectReference((PetscObject)al2g)); 288 el2g = al2g; 289 } else { 290 IS gis; 291 292 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 293 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 294 PetscCall(ISDestroy(&gis)); 295 } 296 PetscCall(ISDestroy(&is)); 297 } else { 298 /* one ref for the destruction of al2g, one for el2g */ 299 PetscCall(PetscObjectReference((PetscObject)al2g)); 300 PetscCall(PetscObjectReference((PetscObject)al2g)); 301 el2g = al2g; 302 fl2g = NULL; 303 } 304 305 /* Start communication to drop connections for interior edges (for cc analysis only) */ 306 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 307 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 308 if (nedfieldlocal) { 309 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 310 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 311 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 312 } else { 313 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 314 } 315 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 316 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 317 318 /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting 319 Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */ 320 if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners)); 321 322 /* drop connections with interior edges to avoid unneeded communications and memory movements */ 323 PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view")); 324 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 325 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 326 if (global) { 327 PetscInt rst; 328 329 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 330 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 331 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 332 } 333 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 334 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 335 } else { 336 PetscInt *tbz; 337 338 PetscCall(PetscMalloc1(ne, &tbz)); 339 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 340 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 342 for (i = 0, cum = 0; i < ne; i++) 343 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 344 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 345 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 346 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 347 PetscCall(PetscFree(tbz)); 348 } 349 350 /* Extract subdomain relevant rows of G */ 351 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 352 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 353 PetscCall(MatAIJExtractRows(G, lned, &lGall)); 354 /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */ 355 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 356 PetscCall(ISDestroy(&lned)); 357 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 358 PetscCall(MatDestroy(&lGall)); 359 PetscCall(MatISGetLocalMat(lGis, &lG)); 360 if (matis->allow_repeated) { /* multi-element support */ 361 Mat *lGn, B; 362 IS *is_rows, *tcols, tmap, nmap; 363 PetscInt subnv; 364 const PetscInt *subvidxs; 365 ISLocalToGlobalMapping mapn; 366 367 PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn)); 368 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows)); 369 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols)); 370 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 371 if (fl2g) { 372 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i])); 373 } else { 374 PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i])); 375 is_rows[i] = pcbddc->local_subs[i]; 376 } 377 PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)])); 378 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn)); 379 PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs)); 381 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i])); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs)); 383 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 384 } 385 386 /* Create new MATIS with repeated vertices */ 387 PetscCall(MatCreate(comm, &B)); 388 PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N)); 389 PetscCall(MatSetType(B, MATIS)); 390 PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE)); 391 PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap)); 392 PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap)); 393 PetscCall(ISDestroy(&tmap)); 394 PetscCall(ISGetLocalSize(nmap, &subnv)); 395 PetscCall(ISGetIndices(nmap, &subvidxs)); 396 PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap)); 397 PetscCall(ISRestoreIndices(nmap, &subvidxs)); 398 PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn)); 399 PetscCall(ISDestroy(&tmap)); 400 PetscCall(ISDestroy(&nmap)); 401 PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn)); 402 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 403 PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG)); 404 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 405 PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)])); 406 PetscCall(ISDestroy(&is_rows[i])); 407 PetscCall(ISDestroy(&tcols[i])); 408 } 409 PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG)); 410 PetscCall(PetscFree(lGn)); 411 PetscCall(PetscFree(is_rows)); 412 PetscCall(PetscFree(tcols)); 413 PetscCall(MatISSetLocalMat(B, lG)); 414 PetscCall(MatDestroy(&lG)); 415 416 PetscCall(MatDestroy(&lGis)); 417 lGis = B; 418 419 lGis->assembled = PETSC_TRUE; 420 } 421 PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view")); 422 423 /* SF for nodal dofs communications */ 424 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 425 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 426 PetscCall(PetscObjectReference((PetscObject)vl2g)); 427 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 428 PetscCall(PetscSFCreate(comm, &sfv)); 429 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 430 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 431 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 432 433 if (elements_corners) { 434 IS tmp; 435 Vec global, local; 436 Mat_IS *tGis = (Mat_IS *)lGis->data; 437 438 PetscCall(MatCreateVecs(lGis, &global, NULL)); 439 PetscCall(MatCreateVecs(tGis->A, &local, NULL)); 440 PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp)); 441 PetscCall(VecDestroy(&global)); 442 PetscCall(VecDestroy(&local)); 443 elements_corners = tmp; 444 } 445 446 /* Destroy temporary G */ 447 PetscCall(MatISGetLocalMat(lGis, &lG)); 448 PetscCall(PetscObjectReference((PetscObject)lG)); 449 PetscCall(MatDestroy(&G)); 450 PetscCall(MatDestroy(&lGis)); 451 452 if (print) { 453 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 454 PetscCall(MatView(lG, NULL)); 455 } 456 457 /* Save lG for values insertion in change of basis */ 458 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 459 460 /* Analyze the edge-nodes connections (duplicate lG) */ 461 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 462 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 463 PetscCall(PetscBTCreate(nv, &btv)); 464 PetscCall(PetscBTCreate(ne, &bte)); 465 PetscCall(PetscBTCreate(ne, &btb)); 466 PetscCall(PetscBTCreate(ne, &btbd)); 467 /* need to import the boundary specification to ensure the 468 proper detection of coarse edges' endpoints */ 469 if (pcbddc->DirichletBoundariesLocal) { 470 IS is; 471 472 if (fl2g) { 473 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 474 } else { 475 is = pcbddc->DirichletBoundariesLocal; 476 } 477 PetscCall(ISGetLocalSize(is, &cum)); 478 PetscCall(ISGetIndices(is, &idxs)); 479 for (i = 0; i < cum; i++) { 480 if (idxs[i] >= 0 && idxs[i] < ne) { 481 PetscCall(PetscBTSet(btb, idxs[i])); 482 PetscCall(PetscBTSet(btbd, idxs[i])); 483 } 484 } 485 PetscCall(ISRestoreIndices(is, &idxs)); 486 if (fl2g) PetscCall(ISDestroy(&is)); 487 } 488 if (pcbddc->NeumannBoundariesLocal) { 489 IS is; 490 491 if (fl2g) { 492 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 493 } else { 494 is = pcbddc->NeumannBoundariesLocal; 495 } 496 PetscCall(ISGetLocalSize(is, &cum)); 497 PetscCall(ISGetIndices(is, &idxs)); 498 for (i = 0; i < cum; i++) { 499 if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i])); 500 } 501 PetscCall(ISRestoreIndices(is, &idxs)); 502 if (fl2g) PetscCall(ISDestroy(&is)); 503 } 504 505 /* Count neighs per dof */ 506 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL)); 507 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL)); 508 509 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 510 for proper detection of coarse edges' endpoints */ 511 PetscCall(PetscBTCreate(ne, &btee)); 512 for (i = 0; i < ne; i++) { 513 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 514 } 515 PetscCall(PetscMalloc1(ne, &marks)); 516 if (!conforming) { 517 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 518 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 519 } 520 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 521 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 522 cum = 0; 523 for (i = 0; i < ne; i++) { 524 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 525 if (!PetscBTLookup(btee, i)) { 526 marks[cum++] = i; 527 continue; 528 } 529 /* set badly connected edge dofs as primal */ 530 if (!conforming) { 531 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 532 marks[cum++] = i; 533 PetscCall(PetscBTSet(bte, i)); 534 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 535 } else { 536 /* every edge dofs should be connected through a certain number of nodal dofs 537 to other edge dofs belonging to coarse edges 538 - at most 2 endpoints 539 - order-1 interior nodal dofs 540 - no undefined nodal dofs (nconn < order) 541 */ 542 PetscInt ends = 0, ints = 0, undef = 0; 543 for (j = ii[i]; j < ii[i + 1]; j++) { 544 PetscInt v = jj[j], k; 545 PetscInt nconn = iit[v + 1] - iit[v]; 546 for (k = iit[v]; k < iit[v + 1]; k++) 547 if (!PetscBTLookup(btee, jjt[k])) nconn--; 548 if (nconn > order) ends++; 549 else if (nconn == order) ints++; 550 else undef++; 551 } 552 if (undef || ends > 2 || ints != order - 1) { 553 marks[cum++] = i; 554 PetscCall(PetscBTSet(bte, i)); 555 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 556 } 557 } 558 } 559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 560 if (!order && ii[i + 1] != ii[i]) { 561 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 562 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 563 } 564 } 565 PetscCall(PetscBTDestroy(&btee)); 566 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 567 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 568 if (!conforming) { 569 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 570 PetscCall(MatDestroy(&lGt)); 571 } 572 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 573 574 /* identify splitpoints and corner candidates */ 575 PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots)); 576 PetscCall(PetscBTCreate(nv, &btvcand)); 577 if (elements_corners) { 578 PetscCall(ISGetLocalSize(elements_corners, &cum)); 579 PetscCall(ISGetIndices(elements_corners, &idxs)); 580 for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i])); 581 PetscCall(ISRestoreIndices(elements_corners, &idxs)); 582 } 583 584 if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */ 585 PetscSF emlsf, vmlsf; 586 PetscInt *eleaves, *vleaves, *meleaves, *mvleaves; 587 PetscInt cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl; 588 589 PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs)); 590 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 591 PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs)); 592 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 593 594 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf)); 595 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf)); 596 597 PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL)); 598 for (i = 0, j = 0; i < ne; i++) j += ecount[i]; 599 PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne); 600 PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j); 601 602 PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL)); 603 for (i = 0, j = 0; i < nv; i++) j += vcount[i]; 604 PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv); 605 PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j); 606 607 PetscCall(PetscMalloc1(ne, &eleaves)); 608 PetscCall(PetscMalloc1(nv, &vleaves)); 609 for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX; 610 for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX; 611 PetscCall(PetscMalloc1(emnl, &meleaves)); 612 PetscCall(PetscMalloc1(vmnl, &mvleaves)); 613 614 PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm)); 615 PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 616 for (i = 0; i < n_subs; i++) { 617 const PetscInt *idxs; 618 const PetscInt subid = cum_subs + i; 619 PetscInt ns; 620 621 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns)); 622 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 623 for (j = 0; j < ns; j++) { 624 const PetscInt e = idxs[j]; 625 626 eleaves[e] = subid; 627 for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid; 628 } 629 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 630 } 631 PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 632 PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 633 PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 634 PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 635 PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 636 PetscCall(PetscFree(eleaves)); 637 PetscCall(PetscFree(vleaves)); 638 639 PetscCall(PetscMalloc1(ne + 1, &eneighs)); 640 eneighs[0] = meleaves; 641 for (i = 0; i < ne; i++) { 642 PetscCall(PetscSortInt(ecount[i], eneighs[i])); 643 eneighs[i + 1] = eneighs[i] + ecount[i]; 644 } 645 PetscCall(PetscMalloc1(nv + 1, &vneighs)); 646 vneighs[0] = mvleaves; 647 for (i = 0; i < nv; i++) { 648 PetscCall(PetscSortInt(vcount[i], vneighs[i])); 649 vneighs[i + 1] = vneighs[i] + vcount[i]; 650 } 651 } else { 652 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs)); 653 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs)); 654 } 655 656 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 657 if (print) { 658 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 659 PetscCall(MatView(lGe, NULL)); 660 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 661 PetscCall(MatView(lGt, NULL)); 662 } 663 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 664 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 665 for (i = 0; i < nv; i++) { 666 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 667 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 668 if (!order) { /* variable order */ 669 PetscReal vorder = 0.; 670 671 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 672 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 673 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 674 ord = 1; 675 } 676 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 677 const PetscInt e = jj[j]; 678 679 if (PetscBTLookup(btbd, e)) { 680 bdir = PETSC_TRUE; 681 break; 682 } 683 if (vc != ecount[e]) { 684 sneighs = PETSC_FALSE; 685 } else { 686 const PetscInt *vn = vneighs[i], *en = eneighs[e]; 687 688 for (PetscInt k = 0; k < vc; k++) { 689 if (vn[k] != en[k]) { 690 sneighs = PETSC_FALSE; 691 break; 692 } 693 } 694 } 695 } 696 if (elements_corners) test = 0; 697 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 698 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 699 PetscCall(PetscBTSet(btv, i)); 700 } else if (test == ord) { 701 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 702 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 703 PetscCall(PetscBTSet(btv, i)); 704 } else if (!elements_corners) { 705 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 706 PetscCall(PetscBTSet(btvcand, i)); 707 } 708 } 709 } 710 PetscCall(PetscBTDestroy(&btbd)); 711 712 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 713 if (order != 1) { 714 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 715 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 716 for (i = 0; i < nv; i++) { 717 if (PetscBTLookup(btvcand, i)) { 718 PetscBool found = PETSC_FALSE; 719 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 720 PetscInt k, e = jj[j]; 721 if (PetscBTLookup(bte, e)) continue; 722 for (k = iit[e]; k < iit[e + 1]; k++) { 723 PetscInt v = jjt[k]; 724 if (v != i && PetscBTLookup(btvcand, v)) { 725 found = PETSC_TRUE; 726 break; 727 } 728 } 729 } 730 if (!found) { 731 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 732 PetscCall(PetscBTClear(btvcand, i)); 733 } else { 734 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 735 } 736 } 737 } 738 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 739 } 740 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 741 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 742 PetscCall(MatDestroy(&lGe)); 743 744 /* Get the local G^T explicitly */ 745 PetscCall(MatDestroy(&lGt)); 746 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 747 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 748 749 /* Mark shared nodal dofs */ 750 PetscCall(PetscBTCreate(nv, &btvi)); 751 for (i = 0; i < nv; i++) { 752 if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i)); 753 } 754 755 if (matis->allow_repeated) { 756 PetscCall(PetscFree(eneighs[0])); 757 PetscCall(PetscFree(vneighs[0])); 758 PetscCall(PetscFree(eneighs)); 759 PetscCall(PetscFree(vneighs)); 760 } 761 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 762 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 763 764 /* communicate corners and splitpoints */ 765 PetscCall(PetscMalloc1(nv, &vmarks)); 766 PetscCall(PetscArrayzero(sfvleaves, nv)); 767 PetscCall(PetscArrayzero(sfvroots, Lv)); 768 for (i = 0; i < nv; i++) 769 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 770 771 if (print) { 772 IS tbz; 773 774 cum = 0; 775 for (i = 0; i < nv; i++) 776 if (sfvleaves[i]) vmarks[cum++] = i; 777 778 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 779 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 780 PetscCall(ISView(tbz, NULL)); 781 PetscCall(ISDestroy(&tbz)); 782 } 783 784 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 785 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 786 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 787 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 788 789 /* Zero rows of lGt corresponding to identified corners 790 and interior nodal dofs */ 791 cum = 0; 792 for (i = 0; i < nv; i++) { 793 if (sfvleaves[i]) { 794 vmarks[cum++] = i; 795 PetscCall(PetscBTSet(btv, i)); 796 } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 797 } 798 PetscCall(PetscBTDestroy(&btvi)); 799 if (print) { 800 IS tbz; 801 802 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 803 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 804 PetscCall(ISView(tbz, NULL)); 805 PetscCall(ISDestroy(&tbz)); 806 } 807 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 808 PetscCall(PetscFree(vmarks)); 809 PetscCall(PetscSFDestroy(&sfv)); 810 PetscCall(PetscFree2(sfvleaves, sfvroots)); 811 812 /* Recompute G */ 813 PetscCall(MatDestroy(&lG)); 814 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 815 if (print) { 816 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 817 PetscCall(MatView(lG, NULL)); 818 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 819 PetscCall(MatView(lGt, NULL)); 820 } 821 822 /* Get primal dofs (if any) */ 823 cum = 0; 824 for (i = 0; i < ne; i++) { 825 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 826 } 827 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 828 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 829 if (print) { 830 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 831 PetscCall(ISView(primals, NULL)); 832 } 833 PetscCall(PetscBTDestroy(&bte)); 834 /* TODO: what if the user passed in some of them ? */ 835 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 836 PetscCall(ISDestroy(&primals)); 837 838 /* Compute edge connectivity */ 839 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 840 841 /* Symbolic conn = lG*lGt */ 842 if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */ 843 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 844 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 845 PetscCall(MatProductSetAlgorithm(conn, "default")); 846 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 847 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 848 PetscCall(MatProductSetFromOptions(conn)); 849 PetscCall(MatProductSymbolic(conn)); 850 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 851 if (fl2g) { 852 PetscBT btf; 853 PetscInt *iia, *jja, *iiu, *jju; 854 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 855 856 /* create CSR for all local dofs */ 857 PetscCall(PetscMalloc1(n + 1, &iia)); 858 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 859 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 860 iiu = pcbddc->mat_graph->xadj; 861 jju = pcbddc->mat_graph->adjncy; 862 } else if (pcbddc->use_local_adj) { 863 rest = PETSC_TRUE; 864 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 865 } else { 866 free = PETSC_TRUE; 867 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 868 iiu[0] = 0; 869 for (i = 0; i < n; i++) { 870 iiu[i + 1] = i + 1; 871 jju[i] = -1; 872 } 873 } 874 875 /* import sizes of CSR */ 876 iia[0] = 0; 877 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 878 879 /* overwrite entries corresponding to the Nedelec field */ 880 PetscCall(PetscBTCreate(n, &btf)); 881 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 882 for (i = 0; i < ne; i++) { 883 PetscCall(PetscBTSet(btf, idxs[i])); 884 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 885 } 886 887 /* iia in CSR */ 888 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 889 890 /* jja in CSR */ 891 PetscCall(PetscMalloc1(iia[n], &jja)); 892 for (i = 0; i < n; i++) 893 if (!PetscBTLookup(btf, i)) 894 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 895 896 /* map edge dofs connectivity */ 897 if (jj) { 898 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 899 for (i = 0; i < ne; i++) { 900 PetscInt e = idxs[i]; 901 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 902 } 903 } 904 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 905 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES)); 906 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 907 if (free) PetscCall(PetscFree2(iiu, jju)); 908 PetscCall(PetscBTDestroy(&btf)); 909 } else { 910 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES)); 911 } 912 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 913 PetscCall(MatDestroy(&conn)); 914 } 915 916 /* Analyze interface for edge dofs */ 917 PetscCall(PCBDDCAnalyzeInterface(pc)); 918 pcbddc->mat_graph->twodim = PETSC_FALSE; 919 920 /* Get coarse edges in the edge space */ 921 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 922 923 if (fl2g) { 924 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 925 PetscCall(PetscMalloc1(nee, &eedges)); 926 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 927 } else { 928 eedges = alleedges; 929 primals = allprimals; 930 } 931 932 /* Mark fine edge dofs with their coarse edge id */ 933 PetscCall(PetscArrayzero(marks, ne)); 934 PetscCall(ISGetLocalSize(primals, &cum)); 935 PetscCall(ISGetIndices(primals, &idxs)); 936 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 937 PetscCall(ISRestoreIndices(primals, &idxs)); 938 if (print) { 939 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 940 PetscCall(ISView(primals, NULL)); 941 } 942 943 maxsize = 0; 944 for (i = 0; i < nee; i++) { 945 PetscInt size, mark = i + 1; 946 947 PetscCall(ISGetLocalSize(eedges[i], &size)); 948 PetscCall(ISGetIndices(eedges[i], &idxs)); 949 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 950 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 951 maxsize = PetscMax(maxsize, size); 952 } 953 954 /* Find coarse edge endpoints */ 955 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 956 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 957 for (i = 0; i < nee; i++) { 958 PetscInt mark = i + 1, size; 959 960 PetscCall(ISGetLocalSize(eedges[i], &size)); 961 if (!size && nedfieldlocal) continue; 962 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 963 PetscCall(ISGetIndices(eedges[i], &idxs)); 964 if (print) { 965 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 966 PetscCall(ISView(eedges[i], NULL)); 967 } 968 for (j = 0; j < size; j++) { 969 PetscInt k, ee = idxs[j]; 970 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 971 for (k = ii[ee]; k < ii[ee + 1]; k++) { 972 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 973 if (PetscBTLookup(btv, jj[k])) { 974 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 975 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 976 PetscInt k2; 977 PetscBool corner = PETSC_FALSE; 978 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 979 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 980 /* it's a corner if either is connected with an edge dof belonging to a different cc or 981 if the edge dof lie on the natural part of the boundary */ 982 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 983 corner = PETSC_TRUE; 984 break; 985 } 986 } 987 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 988 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 989 PetscCall(PetscBTSet(btv, jj[k])); 990 } else { 991 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 992 } 993 } 994 } 995 } 996 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 997 } 998 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 999 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1000 PetscCall(PetscBTDestroy(&btb)); 1001 1002 /* Reset marked primal dofs */ 1003 PetscCall(ISGetLocalSize(primals, &cum)); 1004 PetscCall(ISGetIndices(primals, &idxs)); 1005 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 1006 PetscCall(ISRestoreIndices(primals, &idxs)); 1007 1008 /* Now use the initial lG */ 1009 PetscCall(MatDestroy(&lG)); 1010 PetscCall(MatDestroy(&lGt)); 1011 lG = lGinit; 1012 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 1013 1014 /* Compute extended cols indices */ 1015 PetscCall(PetscBTCreate(nv, &btvc)); 1016 PetscCall(PetscBTCreate(nee, &bter)); 1017 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1018 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 1019 i *= maxsize; 1020 PetscCall(PetscCalloc1(nee, &extcols)); 1021 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 1022 eerr = PETSC_FALSE; 1023 for (i = 0; i < nee; i++) { 1024 PetscInt size, found = 0; 1025 1026 cum = 0; 1027 PetscCall(ISGetLocalSize(eedges[i], &size)); 1028 if (!size && nedfieldlocal) continue; 1029 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1030 PetscCall(ISGetIndices(eedges[i], &idxs)); 1031 PetscCall(PetscBTMemzero(nv, btvc)); 1032 for (j = 0; j < size; j++) { 1033 PetscInt k, ee = idxs[j]; 1034 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1035 PetscInt vv = jj[k]; 1036 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 1037 else if (!PetscBTLookupSet(btvc, vv)) found++; 1038 } 1039 } 1040 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1041 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1042 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1043 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1044 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1045 /* it may happen that endpoints are not defined at this point 1046 if it is the case, mark this edge for a second pass */ 1047 if (cum != size - 1 || found != 2) { 1048 PetscCall(PetscBTSet(bter, i)); 1049 if (print) { 1050 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 1051 PetscCall(ISView(eedges[i], NULL)); 1052 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 1053 PetscCall(ISView(extcols[i], NULL)); 1054 } 1055 eerr = PETSC_TRUE; 1056 } 1057 } 1058 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 1059 PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm)); 1060 if (done) { 1061 PetscInt *newprimals; 1062 1063 PetscCall(PetscMalloc1(ne, &newprimals)); 1064 PetscCall(ISGetLocalSize(primals, &cum)); 1065 PetscCall(ISGetIndices(primals, &idxs)); 1066 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 1067 PetscCall(ISRestoreIndices(primals, &idxs)); 1068 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1069 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 1070 for (i = 0; i < nee; i++) { 1071 PetscBool has_candidates = PETSC_FALSE; 1072 if (PetscBTLookup(bter, i)) { 1073 PetscInt size, mark = i + 1; 1074 1075 PetscCall(ISGetLocalSize(eedges[i], &size)); 1076 PetscCall(ISGetIndices(eedges[i], &idxs)); 1077 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1078 for (j = 0; j < size; j++) { 1079 PetscInt k, ee = idxs[j]; 1080 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 1081 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1082 /* set all candidates located on the edge as corners */ 1083 if (PetscBTLookup(btvcand, jj[k])) { 1084 PetscInt k2, vv = jj[k]; 1085 has_candidates = PETSC_TRUE; 1086 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 1087 PetscCall(PetscBTSet(btv, vv)); 1088 /* set all edge dofs connected to candidate as primals */ 1089 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 1090 if (marks[jjt[k2]] == mark) { 1091 PetscInt k3, ee2 = jjt[k2]; 1092 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 1093 newprimals[cum++] = ee2; 1094 /* finally set the new corners */ 1095 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 1096 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 1097 PetscCall(PetscBTSet(btv, jj[k3])); 1098 } 1099 } 1100 } 1101 } else { 1102 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 1103 } 1104 } 1105 } 1106 if (!has_candidates) { /* circular edge */ 1107 PetscInt k, ee = idxs[0], *tmarks; 1108 1109 PetscCall(PetscCalloc1(ne, &tmarks)); 1110 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 1111 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1112 PetscInt k2; 1113 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 1114 PetscCall(PetscBTSet(btv, jj[k])); 1115 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 1116 } 1117 for (j = 0; j < size; j++) { 1118 if (tmarks[idxs[j]] > 1) { 1119 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 1120 newprimals[cum++] = idxs[j]; 1121 } 1122 } 1123 PetscCall(PetscFree(tmarks)); 1124 } 1125 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1126 } 1127 PetscCall(ISDestroy(&extcols[i])); 1128 } 1129 PetscCall(PetscFree(extcols)); 1130 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1131 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 1132 if (fl2g) { 1133 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 1134 PetscCall(ISDestroy(&primals)); 1135 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1136 PetscCall(PetscFree(eedges)); 1137 } 1138 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1139 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 1140 PetscCall(PetscFree(newprimals)); 1141 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 1142 PetscCall(ISDestroy(&primals)); 1143 PetscCall(PCBDDCAnalyzeInterface(pc)); 1144 pcbddc->mat_graph->twodim = PETSC_FALSE; 1145 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1146 if (fl2g) { 1147 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 1148 PetscCall(PetscMalloc1(nee, &eedges)); 1149 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 1150 } else { 1151 eedges = alleedges; 1152 primals = allprimals; 1153 } 1154 PetscCall(PetscCalloc1(nee, &extcols)); 1155 1156 /* Mark again */ 1157 PetscCall(PetscArrayzero(marks, ne)); 1158 for (i = 0; i < nee; i++) { 1159 PetscInt size, mark = i + 1; 1160 1161 PetscCall(ISGetLocalSize(eedges[i], &size)); 1162 PetscCall(ISGetIndices(eedges[i], &idxs)); 1163 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1164 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1165 } 1166 if (print) { 1167 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1168 PetscCall(ISView(primals, NULL)); 1169 } 1170 1171 /* Recompute extended cols */ 1172 eerr = PETSC_FALSE; 1173 for (i = 0; i < nee; i++) { 1174 PetscInt size; 1175 1176 cum = 0; 1177 PetscCall(ISGetLocalSize(eedges[i], &size)); 1178 if (!size && nedfieldlocal) continue; 1179 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1180 PetscCall(ISGetIndices(eedges[i], &idxs)); 1181 for (j = 0; j < size; j++) { 1182 PetscInt k, ee = idxs[j]; 1183 for (k = ii[ee]; k < ii[ee + 1]; k++) 1184 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1185 } 1186 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1187 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1188 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1189 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1190 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1191 if (cum != size - 1) { 1192 if (print) { 1193 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1194 PetscCall(ISView(eedges[i], NULL)); 1195 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1196 PetscCall(ISView(extcols[i], NULL)); 1197 } 1198 eerr = PETSC_TRUE; 1199 } 1200 } 1201 } 1202 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1203 PetscCall(PetscFree2(extrow, gidxs)); 1204 PetscCall(PetscBTDestroy(&bter)); 1205 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1206 /* an error should not occur at this point */ 1207 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1208 1209 /* Check the number of endpoints */ 1210 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1211 PetscCall(PetscMalloc1(2 * nee, &corners)); 1212 PetscCall(PetscMalloc1(nee, &cedges)); 1213 for (i = 0; i < nee; i++) { 1214 PetscInt size, found = 0, gc[2]; 1215 1216 /* init with defaults */ 1217 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1218 PetscCall(ISGetLocalSize(eedges[i], &size)); 1219 if (!size && nedfieldlocal) continue; 1220 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1221 PetscCall(ISGetIndices(eedges[i], &idxs)); 1222 PetscCall(PetscBTMemzero(nv, btvc)); 1223 for (j = 0; j < size; j++) { 1224 PetscInt k, ee = idxs[j]; 1225 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1226 PetscInt vv = jj[k]; 1227 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1228 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i); 1229 corners[i * 2 + found++] = vv; 1230 } 1231 } 1232 } 1233 if (found != 2) { 1234 PetscInt e; 1235 if (fl2g) { 1236 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1237 } else { 1238 e = idxs[0]; 1239 } 1240 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1241 } 1242 1243 /* get primal dof index on this coarse edge */ 1244 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1245 if (gc[0] > gc[1]) { 1246 PetscInt swap = corners[2 * i]; 1247 corners[2 * i] = corners[2 * i + 1]; 1248 corners[2 * i + 1] = swap; 1249 } 1250 cedges[i] = idxs[size - 1]; 1251 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1252 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1253 } 1254 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1255 PetscCall(PetscBTDestroy(&btvc)); 1256 1257 if (PetscDefined(USE_DEBUG)) { 1258 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1259 not interfere with neighbouring coarse edges */ 1260 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1261 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1262 for (i = 0; i < nv; i++) { 1263 PetscInt emax = 0, eemax = 0; 1264 1265 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1266 PetscCall(PetscArrayzero(emarks, nee + 1)); 1267 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1268 for (j = 1; j < nee + 1; j++) { 1269 if (emax < emarks[j]) { 1270 emax = emarks[j]; 1271 eemax = j; 1272 } 1273 } 1274 /* not relevant for edges */ 1275 if (!eemax) continue; 1276 1277 for (j = ii[i]; j < ii[i + 1]; j++) { 1278 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1279 } 1280 } 1281 PetscCall(PetscFree(emarks)); 1282 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1283 } 1284 1285 /* Compute extended rows indices for edge blocks of the change of basis */ 1286 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1287 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1288 extmem *= maxsize; 1289 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1290 PetscCall(PetscMalloc1(nee, &extrows)); 1291 PetscCall(PetscCalloc1(nee, &extrowcum)); 1292 for (i = 0; i < nv; i++) { 1293 PetscInt mark = 0, size, start; 1294 1295 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1296 for (j = ii[i]; j < ii[i + 1]; j++) 1297 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1298 1299 /* not relevant */ 1300 if (!mark) continue; 1301 1302 /* import extended row */ 1303 mark--; 1304 start = mark * extmem + extrowcum[mark]; 1305 size = ii[i + 1] - ii[i]; 1306 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1307 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1308 extrowcum[mark] += size; 1309 } 1310 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1311 PetscCall(MatDestroy(&lGt)); 1312 PetscCall(PetscFree(marks)); 1313 1314 /* Compress extrows */ 1315 cum = 0; 1316 for (i = 0; i < nee; i++) { 1317 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1318 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1319 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1320 cum = PetscMax(cum, size); 1321 } 1322 PetscCall(PetscFree(extrowcum)); 1323 PetscCall(PetscBTDestroy(&btv)); 1324 PetscCall(PetscBTDestroy(&btvcand)); 1325 1326 /* Workspace for lapack inner calls and VecSetValues */ 1327 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1328 1329 /* Create change of basis matrix (preallocation can be improved) */ 1330 PetscCall(MatCreate(comm, &T)); 1331 PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap)); 1332 PetscCall(MatSetType(T, MATAIJ)); 1333 PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL)); 1334 PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL)); 1335 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1336 PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1337 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1338 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1339 1340 /* Defaults to identity */ 1341 for (i = pc->mat->rmap->rstart; i < pc->mat->rmap->rend; i++) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES)); 1342 1343 /* Create discrete gradient for the coarser level if needed */ 1344 PetscCall(MatDestroy(&pcbddc->nedcG)); 1345 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1346 if (pcbddc->current_level < pcbddc->max_levels) { 1347 ISLocalToGlobalMapping cel2g, cvl2g; 1348 IS wis, gwis; 1349 PetscInt cnv, cne; 1350 1351 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1352 if (fl2g) { 1353 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1354 } else { 1355 PetscCall(PetscObjectReference((PetscObject)wis)); 1356 pcbddc->nedclocal = wis; 1357 } 1358 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1359 PetscCall(ISDestroy(&wis)); 1360 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1361 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1362 PetscCall(ISDestroy(&wis)); 1363 PetscCall(ISDestroy(&gwis)); 1364 1365 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1366 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1367 PetscCall(ISDestroy(&wis)); 1368 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1369 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1370 PetscCall(ISDestroy(&wis)); 1371 PetscCall(ISDestroy(&gwis)); 1372 1373 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1374 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1375 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1376 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1377 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1378 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1379 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1380 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1381 } 1382 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1383 1384 #if defined(PRINT_GDET) 1385 inc = 0; 1386 lev = pcbddc->current_level; 1387 #endif 1388 1389 /* Insert values in the change of basis matrix */ 1390 for (i = 0; i < nee; i++) { 1391 Mat Gins = NULL, GKins = NULL; 1392 IS cornersis = NULL; 1393 PetscScalar cvals[2]; 1394 1395 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1396 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1397 if (Gins && GKins) { 1398 const PetscScalar *data; 1399 const PetscInt *rows, *cols; 1400 PetscInt nrh, nch, nrc, ncc; 1401 1402 PetscCall(ISGetIndices(eedges[i], &cols)); 1403 /* H1 */ 1404 PetscCall(ISGetIndices(extrows[i], &rows)); 1405 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1406 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1407 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1408 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1409 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1410 /* complement */ 1411 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1412 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1413 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1414 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1415 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1416 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1417 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1418 1419 /* coarse discrete gradient */ 1420 if (pcbddc->nedcG) { 1421 PetscInt cols[2]; 1422 1423 cols[0] = 2 * i; 1424 cols[1] = 2 * i + 1; 1425 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1426 } 1427 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1428 } 1429 PetscCall(ISDestroy(&extrows[i])); 1430 PetscCall(ISDestroy(&extcols[i])); 1431 PetscCall(ISDestroy(&cornersis)); 1432 PetscCall(MatDestroy(&Gins)); 1433 PetscCall(MatDestroy(&GKins)); 1434 } 1435 1436 /* for FDM element-by-element: first dof on the edge only constraint. Why? */ 1437 if (elements_corners && pcbddc->mat_graph->multi_element) { 1438 ISLocalToGlobalMapping map; 1439 MatNullSpace nnsp; 1440 Vec quad_vec; 1441 1442 PetscCall(MatCreateVecs(pc->pmat, &quad_vec, NULL)); 1443 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, 1, &quad_vec, &nnsp)); 1444 PetscCall(VecLockReadPop(quad_vec)); 1445 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 1446 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1447 for (i = 0; i < nee; i++) { 1448 const PetscInt *idxs; 1449 PetscScalar one = 1.0; 1450 1451 PetscCall(ISGetLocalSize(alleedges[i], &cum)); 1452 if (!cum) continue; 1453 PetscCall(ISGetIndices(alleedges[i], &idxs)); 1454 PetscCall(VecSetValuesLocal(quad_vec, 1, idxs, &one, INSERT_VALUES)); 1455 PetscCall(ISRestoreIndices(alleedges[i], &idxs)); 1456 } 1457 PetscCall(VecLockReadPush(quad_vec)); 1458 PetscCall(VecDestroy(&quad_vec)); 1459 PetscCall(MatSetNearNullSpace(pc->pmat, nnsp)); 1460 PetscCall(MatNullSpaceDestroy(&nnsp)); 1461 } 1462 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1463 1464 /* Start assembling */ 1465 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1466 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1467 1468 /* Free */ 1469 if (fl2g) { 1470 PetscCall(ISDestroy(&primals)); 1471 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1472 PetscCall(PetscFree(eedges)); 1473 } 1474 1475 /* hack mat_graph with primal dofs on the coarse edges */ 1476 { 1477 PCBDDCGraph graph = pcbddc->mat_graph; 1478 PetscInt *oqueue = graph->queue; 1479 PetscInt *ocptr = graph->cptr; 1480 PetscInt ncc, *idxs; 1481 1482 /* find first primal edge */ 1483 if (pcbddc->nedclocal) { 1484 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1485 } else { 1486 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1487 idxs = cedges; 1488 } 1489 cum = 0; 1490 while (cum < nee && cedges[cum] < 0) cum++; 1491 1492 /* adapt connected components */ 1493 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1494 graph->cptr[0] = 0; 1495 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1496 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1497 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1498 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1499 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1500 ncc++; 1501 lc--; 1502 cum++; 1503 while (cum < nee && cedges[cum] < 0) cum++; 1504 } 1505 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1506 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1507 ncc++; 1508 } 1509 graph->ncc = ncc; 1510 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1511 PetscCall(PetscFree2(ocptr, oqueue)); 1512 } 1513 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1514 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1515 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1516 1517 PetscCall(ISDestroy(&nedfieldlocal)); 1518 PetscCall(PetscFree(extrow)); 1519 PetscCall(PetscFree2(work, rwork)); 1520 PetscCall(PetscFree(corners)); 1521 PetscCall(PetscFree(cedges)); 1522 PetscCall(PetscFree(extrows)); 1523 PetscCall(PetscFree(extcols)); 1524 PetscCall(MatDestroy(&lG)); 1525 1526 /* Complete assembling */ 1527 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1528 PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view")); 1529 if (pcbddc->nedcG) { 1530 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1531 PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view")); 1532 } 1533 1534 PetscCall(ISDestroy(&elements_corners)); 1535 1536 /* set change of basis */ 1537 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE)); 1538 PetscCall(MatDestroy(&T)); 1539 PetscFunctionReturn(PETSC_SUCCESS); 1540 } 1541 1542 /* the near-null space of BDDC carries information on quadrature weights, 1543 and these can be collinear -> so cheat with MatNullSpaceCreate 1544 and create a suitable set of basis vectors first */ 1545 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1546 { 1547 PetscInt i; 1548 1549 PetscFunctionBegin; 1550 for (i = 0; i < nvecs; i++) { 1551 PetscInt first, last; 1552 1553 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1554 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1555 if (i >= first && i < last) { 1556 PetscScalar *data; 1557 PetscCall(VecGetArray(quad_vecs[i], &data)); 1558 if (!has_const) { 1559 data[i - first] = 1.; 1560 } else { 1561 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1562 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1563 } 1564 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1565 } 1566 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1567 } 1568 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1569 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1570 PetscInt first, last; 1571 PetscCall(VecLockReadPop(quad_vecs[i])); 1572 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1573 if (i >= first && i < last) { 1574 PetscScalar *data; 1575 PetscCall(VecGetArray(quad_vecs[i], &data)); 1576 if (!has_const) { 1577 data[i - first] = 0.; 1578 } else { 1579 data[2 * i - first] = 0.; 1580 data[2 * i - first + 1] = 0.; 1581 } 1582 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1583 } 1584 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1585 PetscCall(VecLockReadPush(quad_vecs[i])); 1586 } 1587 PetscFunctionReturn(PETSC_SUCCESS); 1588 } 1589 1590 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1591 { 1592 Mat loc_divudotp; 1593 Vec p, v, quad_vec; 1594 ISLocalToGlobalMapping map; 1595 PetscScalar *array; 1596 1597 PetscFunctionBegin; 1598 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1599 if (!transpose) { 1600 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1601 } else { 1602 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1603 } 1604 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp)); 1605 PetscCall(VecLockReadPop(quad_vec)); 1606 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1607 1608 /* compute local quad vec */ 1609 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1610 if (!transpose) { 1611 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1612 } else { 1613 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1614 } 1615 /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */ 1616 PetscCall(VecSet(p, 1.)); 1617 if (!transpose) { 1618 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1619 } else { 1620 PetscCall(MatMult(loc_divudotp, p, v)); 1621 } 1622 PetscCall(VecDestroy(&p)); 1623 if (vl2l) { 1624 Mat lA; 1625 VecScatter sc; 1626 Vec vins; 1627 1628 PetscCall(MatISGetLocalMat(A, &lA)); 1629 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1630 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1631 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1632 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1633 PetscCall(VecScatterDestroy(&sc)); 1634 PetscCall(VecDestroy(&v)); 1635 v = vins; 1636 } 1637 1638 /* mask summation of interface values */ 1639 PetscInt n, *mmask, *mask, *idxs, nmr, nr; 1640 const PetscInt *degree; 1641 PetscSF msf; 1642 1643 PetscCall(VecGetLocalSize(v, &n)); 1644 PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL)); 1645 PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf)); 1646 PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL)); 1647 PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs)); 1648 PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, °ree)); 1649 PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, °ree)); 1650 for (PetscInt i = 0, c = 0; i < nr; i++) { 1651 mmask[c] = 1; 1652 c += degree[i]; 1653 } 1654 PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1655 PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1656 PetscCall(VecGetArray(v, &array)); 1657 for (PetscInt i = 0; i < n; i++) { 1658 array[i] *= mask[i]; 1659 idxs[i] = i; 1660 } 1661 PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES)); 1662 PetscCall(VecRestoreArray(v, &array)); 1663 PetscCall(PetscFree3(mmask, mask, idxs)); 1664 PetscCall(VecDestroy(&v)); 1665 PetscCall(VecAssemblyBegin(quad_vec)); 1666 PetscCall(VecAssemblyEnd(quad_vec)); 1667 PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view")); 1668 PetscCall(VecLockReadPush(quad_vec)); 1669 PetscCall(VecDestroy(&quad_vec)); 1670 PetscFunctionReturn(PETSC_SUCCESS); 1671 } 1672 1673 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1674 { 1675 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1676 1677 PetscFunctionBegin; 1678 if (primalv) { 1679 if (pcbddc->user_primal_vertices_local) { 1680 IS list[2], newp; 1681 1682 list[0] = primalv; 1683 list[1] = pcbddc->user_primal_vertices_local; 1684 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1685 PetscCall(ISSortRemoveDups(newp)); 1686 PetscCall(ISDestroy(&list[1])); 1687 pcbddc->user_primal_vertices_local = newp; 1688 } else { 1689 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1690 } 1691 } 1692 PetscFunctionReturn(PETSC_SUCCESS); 1693 } 1694 1695 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1696 { 1697 PetscInt f, *comp = (PetscInt *)ctx; 1698 1699 PetscFunctionBegin; 1700 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1701 PetscFunctionReturn(PETSC_SUCCESS); 1702 } 1703 1704 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1705 { 1706 Vec local, global; 1707 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1708 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1709 PetscBool monolithic = PETSC_FALSE; 1710 1711 PetscFunctionBegin; 1712 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1713 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1714 PetscOptionsEnd(); 1715 /* need to convert from global to local topology information and remove references to information in global ordering */ 1716 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1717 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1718 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1719 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1720 if (monolithic) { /* just get block size to properly compute vertices */ 1721 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1722 goto boundary; 1723 } 1724 1725 if (pcbddc->user_provided_isfordofs) { 1726 if (pcbddc->n_ISForDofs) { 1727 PetscInt i; 1728 1729 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1730 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1731 PetscInt bs; 1732 1733 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1734 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1735 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1736 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1737 } 1738 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1739 pcbddc->n_ISForDofs = 0; 1740 PetscCall(PetscFree(pcbddc->ISForDofs)); 1741 } 1742 } else { 1743 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1744 DM dm; 1745 1746 PetscCall(MatGetDM(pc->pmat, &dm)); 1747 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1748 if (dm) { 1749 IS *fields; 1750 PetscInt nf, i; 1751 1752 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1753 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1754 for (i = 0; i < nf; i++) { 1755 PetscInt bs; 1756 1757 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1758 PetscCall(ISGetBlockSize(fields[i], &bs)); 1759 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1760 PetscCall(ISDestroy(&fields[i])); 1761 } 1762 PetscCall(PetscFree(fields)); 1763 pcbddc->n_ISForDofsLocal = nf; 1764 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1765 PetscContainer c; 1766 1767 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1768 if (c) { 1769 MatISLocalFields lf; 1770 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1771 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1772 } else { /* fallback, create the default fields if bs > 1 */ 1773 PetscInt i, n = matis->A->rmap->n; 1774 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1775 if (i > 1) { 1776 pcbddc->n_ISForDofsLocal = i; 1777 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1778 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1779 } 1780 } 1781 } 1782 } else { 1783 PetscInt i; 1784 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1785 } 1786 } 1787 1788 boundary: 1789 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1790 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1791 } else if (pcbddc->DirichletBoundariesLocal) { 1792 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1793 } 1794 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1795 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1796 } else if (pcbddc->NeumannBoundariesLocal) { 1797 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1798 } 1799 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1800 PetscCall(VecDestroy(&global)); 1801 PetscCall(VecDestroy(&local)); 1802 /* detect local disconnected subdomains if requested or needed */ 1803 if (pcbddc->detect_disconnected || matis->allow_repeated) { 1804 IS primalv = NULL; 1805 PetscInt nel; 1806 PetscBool filter = pcbddc->detect_disconnected_filter; 1807 1808 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1809 PetscCall(PetscFree(pcbddc->local_subs)); 1810 PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL)); 1811 if (matis->allow_repeated && nel) { 1812 const PetscInt *elsizes; 1813 1814 pcbddc->n_local_subs = nel; 1815 PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes)); 1816 PetscCall(PetscMalloc1(nel, &pcbddc->local_subs)); 1817 for (PetscInt i = 0, c = 0; i < nel; i++) { 1818 PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i])); 1819 c += elsizes[i]; 1820 } 1821 } else { 1822 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1823 } 1824 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1825 PetscCall(ISDestroy(&primalv)); 1826 } 1827 /* early stage corner detection */ 1828 { 1829 DM dm; 1830 1831 PetscCall(MatGetDM(pc->pmat, &dm)); 1832 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1833 if (dm) { 1834 PetscBool isda; 1835 1836 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1837 if (isda) { 1838 ISLocalToGlobalMapping l2l; 1839 IS corners; 1840 Mat lA; 1841 PetscBool gl, lo; 1842 1843 { 1844 Vec cvec; 1845 const PetscScalar *coords; 1846 PetscInt dof, n, cdim; 1847 PetscBool memc = PETSC_TRUE; 1848 1849 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1850 PetscCall(DMGetCoordinates(dm, &cvec)); 1851 PetscCall(VecGetLocalSize(cvec, &n)); 1852 PetscCall(VecGetBlockSize(cvec, &cdim)); 1853 n /= cdim; 1854 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1855 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1856 PetscCall(VecGetArrayRead(cvec, &coords)); 1857 #if defined(PETSC_USE_COMPLEX) 1858 memc = PETSC_FALSE; 1859 #endif 1860 if (dof != 1) memc = PETSC_FALSE; 1861 if (memc) { 1862 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1863 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1864 PetscReal *bcoords = pcbddc->mat_graph->coords; 1865 PetscInt i, b, d; 1866 1867 for (i = 0; i < n; i++) { 1868 for (b = 0; b < dof; b++) { 1869 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1870 } 1871 } 1872 } 1873 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1874 pcbddc->mat_graph->cdim = cdim; 1875 pcbddc->mat_graph->cnloc = dof * n; 1876 pcbddc->mat_graph->cloc = PETSC_FALSE; 1877 } 1878 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 1879 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 1880 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 1881 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 1882 lo = (PetscBool)(l2l && corners); 1883 PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 1884 if (gl) { /* From PETSc's DMDA */ 1885 const PetscInt *idx; 1886 PetscInt dof, bs, *idxout, n; 1887 1888 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1889 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 1890 PetscCall(ISGetLocalSize(corners, &n)); 1891 PetscCall(ISGetIndices(corners, &idx)); 1892 if (bs == dof) { 1893 PetscCall(PetscMalloc1(n, &idxout)); 1894 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 1895 } else { /* the original DMDA local-to-local map have been modified */ 1896 PetscInt i, d; 1897 1898 PetscCall(PetscMalloc1(dof * n, &idxout)); 1899 for (i = 0; i < n; i++) 1900 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 1901 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 1902 1903 bs = 1; 1904 n *= dof; 1905 } 1906 PetscCall(ISRestoreIndices(corners, &idx)); 1907 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1908 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 1909 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 1910 PetscCall(ISDestroy(&corners)); 1911 pcbddc->corner_selected = PETSC_TRUE; 1912 pcbddc->corner_selection = PETSC_TRUE; 1913 } 1914 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1915 } 1916 } 1917 } 1918 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1919 DM dm; 1920 1921 PetscCall(MatGetDM(pc->pmat, &dm)); 1922 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1923 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1924 Vec vcoords; 1925 PetscSection section; 1926 PetscReal *coords; 1927 PetscInt d, cdim, nl, nf, **ctxs; 1928 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1929 /* debug coordinates */ 1930 PetscViewer viewer; 1931 PetscBool flg; 1932 PetscViewerFormat format; 1933 const char *prefix; 1934 1935 PetscCall(DMGetCoordinateDim(dm, &cdim)); 1936 PetscCall(DMGetLocalSection(dm, §ion)); 1937 PetscCall(PetscSectionGetNumFields(section, &nf)); 1938 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 1939 PetscCall(VecGetLocalSize(vcoords, &nl)); 1940 PetscCall(PetscMalloc1(nl * cdim, &coords)); 1941 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 1942 PetscCall(PetscMalloc1(nf, &ctxs[0])); 1943 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 1944 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 1945 1946 /* debug coordinates */ 1947 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1948 PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 1949 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 1950 for (d = 0; d < cdim; d++) { 1951 PetscInt i; 1952 const PetscScalar *v; 1953 char name[16]; 1954 1955 for (i = 0; i < nf; i++) ctxs[i][0] = d; 1956 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d)); 1957 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 1958 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 1959 if (flg) PetscCall(VecView(vcoords, viewer)); 1960 PetscCall(VecGetArrayRead(vcoords, &v)); 1961 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 1962 PetscCall(VecRestoreArrayRead(vcoords, &v)); 1963 } 1964 PetscCall(VecDestroy(&vcoords)); 1965 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 1966 PetscCall(PetscFree(coords)); 1967 PetscCall(PetscFree(ctxs[0])); 1968 PetscCall(PetscFree2(funcs, ctxs)); 1969 if (flg) { 1970 PetscCall(PetscViewerPopFormat(viewer)); 1971 PetscCall(PetscViewerDestroy(&viewer)); 1972 } 1973 } 1974 } 1975 PetscFunctionReturn(PETSC_SUCCESS); 1976 } 1977 1978 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1979 { 1980 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1981 IS nis; 1982 const PetscInt *idxs; 1983 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 1984 1985 PetscFunctionBegin; 1986 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 1987 if (mop == MPI_LAND) { 1988 /* init rootdata with true */ 1989 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 1990 } else { 1991 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 1992 } 1993 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 1994 PetscCall(ISGetLocalSize(*is, &nd)); 1995 PetscCall(ISGetIndices(*is, &idxs)); 1996 for (i = 0; i < nd; i++) 1997 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 1998 PetscCall(ISRestoreIndices(*is, &idxs)); 1999 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2000 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2001 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2002 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2003 if (mop == MPI_LAND) { 2004 PetscCall(PetscMalloc1(nd, &nidxs)); 2005 } else { 2006 PetscCall(PetscMalloc1(n, &nidxs)); 2007 } 2008 for (i = 0, nnd = 0; i < n; i++) 2009 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 2010 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 2011 PetscCall(ISDestroy(is)); 2012 *is = nis; 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 2017 { 2018 PC_IS *pcis = (PC_IS *)pc->data; 2019 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2020 2021 PetscFunctionBegin; 2022 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 2023 if (pcbddc->ChangeOfBasisMatrix) { 2024 Vec swap; 2025 2026 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 2027 swap = pcbddc->work_change; 2028 pcbddc->work_change = r; 2029 r = swap; 2030 } 2031 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2032 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2033 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2034 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 2035 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2036 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 2037 PetscCall(VecSet(z, 0.)); 2038 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2039 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2040 if (pcbddc->ChangeOfBasisMatrix) { 2041 pcbddc->work_change = r; 2042 PetscCall(VecCopy(z, pcbddc->work_change)); 2043 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 2044 } 2045 PetscFunctionReturn(PETSC_SUCCESS); 2046 } 2047 2048 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 2049 { 2050 PCBDDCBenignMatMult_ctx ctx; 2051 PetscBool apply_right, apply_left, reset_x; 2052 2053 PetscFunctionBegin; 2054 PetscCall(MatShellGetContext(A, &ctx)); 2055 if (transpose) { 2056 apply_right = ctx->apply_left; 2057 apply_left = ctx->apply_right; 2058 } else { 2059 apply_right = ctx->apply_right; 2060 apply_left = ctx->apply_left; 2061 } 2062 reset_x = PETSC_FALSE; 2063 if (apply_right) { 2064 const PetscScalar *ax; 2065 PetscInt nl, i; 2066 2067 PetscCall(VecGetLocalSize(x, &nl)); 2068 PetscCall(VecGetArrayRead(x, &ax)); 2069 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 2070 PetscCall(VecRestoreArrayRead(x, &ax)); 2071 for (i = 0; i < ctx->benign_n; i++) { 2072 PetscScalar sum, val; 2073 const PetscInt *idxs; 2074 PetscInt nz, j; 2075 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2076 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2077 sum = 0.; 2078 if (ctx->apply_p0) { 2079 val = ctx->work[idxs[nz - 1]]; 2080 for (j = 0; j < nz - 1; j++) { 2081 sum += ctx->work[idxs[j]]; 2082 ctx->work[idxs[j]] += val; 2083 } 2084 } else { 2085 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 2086 } 2087 ctx->work[idxs[nz - 1]] -= sum; 2088 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2089 } 2090 PetscCall(VecPlaceArray(x, ctx->work)); 2091 reset_x = PETSC_TRUE; 2092 } 2093 if (transpose) { 2094 PetscCall(MatMultTranspose(ctx->A, x, y)); 2095 } else { 2096 PetscCall(MatMult(ctx->A, x, y)); 2097 } 2098 if (reset_x) PetscCall(VecResetArray(x)); 2099 if (apply_left) { 2100 PetscScalar *ay; 2101 PetscInt i; 2102 2103 PetscCall(VecGetArray(y, &ay)); 2104 for (i = 0; i < ctx->benign_n; i++) { 2105 PetscScalar sum, val; 2106 const PetscInt *idxs; 2107 PetscInt nz, j; 2108 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2109 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2110 val = -ay[idxs[nz - 1]]; 2111 if (ctx->apply_p0) { 2112 sum = 0.; 2113 for (j = 0; j < nz - 1; j++) { 2114 sum += ay[idxs[j]]; 2115 ay[idxs[j]] += val; 2116 } 2117 ay[idxs[nz - 1]] += sum; 2118 } else { 2119 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 2120 ay[idxs[nz - 1]] = 0.; 2121 } 2122 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2123 } 2124 PetscCall(VecRestoreArray(y, &ay)); 2125 } 2126 PetscFunctionReturn(PETSC_SUCCESS); 2127 } 2128 2129 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2130 { 2131 PetscFunctionBegin; 2132 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 2133 PetscFunctionReturn(PETSC_SUCCESS); 2134 } 2135 2136 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2137 { 2138 PetscFunctionBegin; 2139 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 2140 PetscFunctionReturn(PETSC_SUCCESS); 2141 } 2142 2143 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2144 { 2145 PC_IS *pcis = (PC_IS *)pc->data; 2146 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2147 PCBDDCBenignMatMult_ctx ctx; 2148 2149 PetscFunctionBegin; 2150 if (!restore) { 2151 Mat A_IB, A_BI; 2152 PetscScalar *work; 2153 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2154 2155 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 2156 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 2157 PetscCall(PetscMalloc1(pcis->n, &work)); 2158 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 2159 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 2160 PetscCall(MatSetType(A_IB, MATSHELL)); 2161 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private)); 2162 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2163 PetscCall(PetscNew(&ctx)); 2164 PetscCall(MatShellSetContext(A_IB, ctx)); 2165 ctx->apply_left = PETSC_TRUE; 2166 ctx->apply_right = PETSC_FALSE; 2167 ctx->apply_p0 = PETSC_FALSE; 2168 ctx->benign_n = pcbddc->benign_n; 2169 if (reuse) { 2170 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2171 ctx->free = PETSC_FALSE; 2172 } else { /* TODO: could be optimized for successive solves */ 2173 ISLocalToGlobalMapping N_to_D; 2174 PetscInt i; 2175 2176 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 2177 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 2178 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 2179 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2180 ctx->free = PETSC_TRUE; 2181 } 2182 ctx->A = pcis->A_IB; 2183 ctx->work = work; 2184 PetscCall(MatSetUp(A_IB)); 2185 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2186 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2187 pcis->A_IB = A_IB; 2188 2189 /* A_BI as A_IB^T */ 2190 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2191 pcbddc->benign_original_mat = pcis->A_BI; 2192 pcis->A_BI = A_BI; 2193 } else { 2194 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2195 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2196 PetscCall(MatDestroy(&pcis->A_IB)); 2197 pcis->A_IB = ctx->A; 2198 ctx->A = NULL; 2199 PetscCall(MatDestroy(&pcis->A_BI)); 2200 pcis->A_BI = pcbddc->benign_original_mat; 2201 pcbddc->benign_original_mat = NULL; 2202 if (ctx->free) { 2203 PetscInt i; 2204 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2205 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2206 } 2207 PetscCall(PetscFree(ctx->work)); 2208 PetscCall(PetscFree(ctx)); 2209 } 2210 PetscFunctionReturn(PETSC_SUCCESS); 2211 } 2212 2213 /* used just in bddc debug mode */ 2214 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2215 { 2216 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2217 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2218 Mat An; 2219 2220 PetscFunctionBegin; 2221 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2222 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2223 if (is1) { 2224 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2225 PetscCall(MatDestroy(&An)); 2226 } else { 2227 *B = An; 2228 } 2229 PetscFunctionReturn(PETSC_SUCCESS); 2230 } 2231 2232 /* TODO: add reuse flag */ 2233 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2234 { 2235 Mat Bt; 2236 PetscScalar *a, *bdata; 2237 const PetscInt *ii, *ij; 2238 PetscInt m, n, i, nnz, *bii, *bij; 2239 PetscBool flg_row; 2240 2241 PetscFunctionBegin; 2242 PetscCall(MatGetSize(A, &n, &m)); 2243 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2244 PetscCall(MatSeqAIJGetArray(A, &a)); 2245 nnz = n; 2246 for (i = 0; i < ii[n]; i++) { 2247 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2248 } 2249 PetscCall(PetscMalloc1(n + 1, &bii)); 2250 PetscCall(PetscMalloc1(nnz, &bij)); 2251 PetscCall(PetscMalloc1(nnz, &bdata)); 2252 nnz = 0; 2253 bii[0] = 0; 2254 for (i = 0; i < n; i++) { 2255 PetscInt j; 2256 for (j = ii[i]; j < ii[i + 1]; j++) { 2257 PetscScalar entry = a[j]; 2258 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2259 bij[nnz] = ij[j]; 2260 bdata[nnz] = entry; 2261 nnz++; 2262 } 2263 } 2264 bii[i + 1] = nnz; 2265 } 2266 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2267 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2268 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2269 { 2270 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data; 2271 b->free_a = PETSC_TRUE; 2272 b->free_ij = PETSC_TRUE; 2273 } 2274 if (*B == A) PetscCall(MatDestroy(&A)); 2275 *B = Bt; 2276 PetscFunctionReturn(PETSC_SUCCESS); 2277 } 2278 2279 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2280 { 2281 Mat B = NULL; 2282 DM dm; 2283 IS is_dummy, *cc_n; 2284 ISLocalToGlobalMapping l2gmap_dummy; 2285 PCBDDCGraph graph; 2286 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2287 PetscInt i, n; 2288 PetscInt *xadj, *adjncy; 2289 PetscBool isplex = PETSC_FALSE; 2290 2291 PetscFunctionBegin; 2292 if (ncc) *ncc = 0; 2293 if (cc) *cc = NULL; 2294 if (primalv) *primalv = NULL; 2295 PetscCall(PCBDDCGraphCreate(&graph)); 2296 PetscCall(MatGetDM(pc->pmat, &dm)); 2297 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2298 if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, "")); 2299 if (filter) isplex = PETSC_FALSE; 2300 2301 if (isplex) { /* this code has been modified from plexpartition.c */ 2302 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2303 PetscInt *adj = NULL; 2304 IS cellNumbering; 2305 const PetscInt *cellNum; 2306 PetscBool useCone, useClosure; 2307 PetscSection section; 2308 PetscSegBuffer adjBuffer; 2309 PetscSF sfPoint; 2310 2311 PetscCall(DMConvert(dm, DMPLEX, &dm)); 2312 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2313 PetscCall(DMGetPointSF(dm, &sfPoint)); 2314 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2315 /* Build adjacency graph via a section/segbuffer */ 2316 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2317 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2318 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2319 /* Always use FVM adjacency to create partitioner graph */ 2320 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2321 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2322 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2323 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2324 for (n = 0, p = pStart; p < pEnd; p++) { 2325 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2326 if (nroots > 0) { 2327 if (cellNum[p] < 0) continue; 2328 } 2329 adjSize = PETSC_DETERMINE; 2330 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2331 for (a = 0; a < adjSize; ++a) { 2332 const PetscInt point = adj[a]; 2333 if (pStart <= point && point < pEnd) { 2334 PetscInt *PETSC_RESTRICT pBuf; 2335 PetscCall(PetscSectionAddDof(section, p, 1)); 2336 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2337 *pBuf = point; 2338 } 2339 } 2340 n++; 2341 } 2342 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2343 /* Derive CSR graph from section/segbuffer */ 2344 PetscCall(PetscSectionSetUp(section)); 2345 PetscCall(PetscSectionGetStorageSize(section, &size)); 2346 PetscCall(PetscMalloc1(n + 1, &xadj)); 2347 for (idx = 0, p = pStart; p < pEnd; p++) { 2348 if (nroots > 0) { 2349 if (cellNum[p] < 0) continue; 2350 } 2351 PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++])); 2352 } 2353 xadj[n] = size; 2354 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2355 /* Clean up */ 2356 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2357 PetscCall(PetscSectionDestroy(§ion)); 2358 PetscCall(PetscFree(adj)); 2359 graph->xadj = xadj; 2360 graph->adjncy = adjncy; 2361 } else { 2362 Mat A; 2363 PetscBool isseqaij, flg_row; 2364 2365 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2366 if (!A->rmap->N || !A->cmap->N) { 2367 PetscCall(PCBDDCGraphDestroy(&graph)); 2368 PetscFunctionReturn(PETSC_SUCCESS); 2369 } 2370 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2371 if (!isseqaij && filter) { 2372 PetscBool isseqdense; 2373 2374 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2375 if (!isseqdense) { 2376 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2377 } else { /* TODO: rectangular case and LDA */ 2378 PetscScalar *array; 2379 PetscReal chop = 1.e-6; 2380 2381 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2382 PetscCall(MatDenseGetArray(B, &array)); 2383 PetscCall(MatGetSize(B, &n, NULL)); 2384 for (i = 0; i < n; i++) { 2385 PetscInt j; 2386 for (j = i + 1; j < n; j++) { 2387 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2388 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2389 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2390 } 2391 } 2392 PetscCall(MatDenseRestoreArray(B, &array)); 2393 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2394 } 2395 } else { 2396 PetscCall(PetscObjectReference((PetscObject)A)); 2397 B = A; 2398 } 2399 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2400 2401 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2402 if (filter) { 2403 PetscScalar *data; 2404 PetscInt j, cum; 2405 2406 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2407 PetscCall(MatSeqAIJGetArray(B, &data)); 2408 cum = 0; 2409 for (i = 0; i < n; i++) { 2410 PetscInt t; 2411 2412 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2413 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2414 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2415 } 2416 t = xadj_filtered[i]; 2417 xadj_filtered[i] = cum; 2418 cum += t; 2419 } 2420 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2421 graph->xadj = xadj_filtered; 2422 graph->adjncy = adjncy_filtered; 2423 } else { 2424 graph->xadj = xadj; 2425 graph->adjncy = adjncy; 2426 } 2427 } 2428 /* compute local connected components using PCBDDCGraph */ 2429 graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */ 2430 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2431 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2432 PetscCall(ISDestroy(&is_dummy)); 2433 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX)); 2434 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2435 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2436 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2437 2438 /* partial clean up */ 2439 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2440 if (B) { 2441 PetscBool flg_row; 2442 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2443 PetscCall(MatDestroy(&B)); 2444 } 2445 if (isplex) { 2446 PetscCall(PetscFree(xadj)); 2447 PetscCall(PetscFree(adjncy)); 2448 } 2449 2450 /* get back data */ 2451 if (isplex) { 2452 if (ncc) *ncc = graph->ncc; 2453 if (cc || primalv) { 2454 Mat A; 2455 PetscBT btv, btvt, btvc; 2456 PetscSection subSection; 2457 PetscInt *ids, cum, cump, *cids, *pids; 2458 PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd; 2459 2460 PetscCall(DMGetDimension(dm, &dim)); 2461 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2462 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 2463 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd)); 2464 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 2465 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 2466 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2467 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2468 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2469 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2470 PetscCall(PetscBTCreate(pEnd - pStart, &btvc)); 2471 2472 /* First see if we find corners for the subdomains, i.e. a vertex 2473 shared by at least dim subdomain boundary faces. This does not 2474 cover all the possible cases with simplices but it is enough 2475 for tensor cells */ 2476 if (vStart != fStart && dim <= 3) { 2477 for (PetscInt c = cStart; c < cEnd; c++) { 2478 PetscInt nf, cnt = 0, mcnt = dim, *cfaces; 2479 const PetscInt *faces; 2480 2481 PetscCall(DMPlexGetConeSize(dm, c, &nf)); 2482 PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces)); 2483 PetscCall(DMPlexGetCone(dm, c, &faces)); 2484 for (PetscInt f = 0; f < nf; f++) { 2485 PetscInt nc, ff; 2486 2487 PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc)); 2488 PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL)); 2489 if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f]; 2490 } 2491 if (cnt >= mcnt) { 2492 PetscInt size, *closure = NULL; 2493 2494 PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2495 for (PetscInt k = 0; k < 2 * size; k += 2) { 2496 PetscInt v = closure[k]; 2497 if (v >= vStart && v < vEnd) { 2498 PetscInt vsize, *vclosure = NULL; 2499 2500 cnt = 0; 2501 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2502 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) { 2503 PetscInt f = vclosure[vk]; 2504 if (f >= fStart && f < fEnd) { 2505 PetscInt nc, ff; 2506 PetscBool valid = PETSC_FALSE; 2507 2508 for (PetscInt fk = 0; fk < nf; fk++) 2509 if (f == cfaces[fk]) valid = PETSC_TRUE; 2510 if (!valid) continue; 2511 PetscCall(DMPlexGetSupportSize(dm, f, &nc)); 2512 PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL)); 2513 if (nc == 1 && f == ff) cnt++; 2514 } 2515 } 2516 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart)); 2517 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2518 } 2519 } 2520 PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2521 } 2522 PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces)); 2523 } 2524 } 2525 2526 cids[0] = 0; 2527 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2528 PetscInt j; 2529 2530 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2531 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2532 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2533 2534 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2535 for (k = 0; k < 2 * size; k += 2) { 2536 PetscInt s, pp, p = closure[k], off, dof, cdof; 2537 2538 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2539 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2540 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2541 for (s = 0; s < dof - cdof; s++) { 2542 if (PetscBTLookupSet(btvt, off + s)) continue; 2543 if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2544 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2545 else pids[cump++] = off + s; /* cross-vertex */ 2546 } 2547 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2548 if (pp != p) { 2549 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2550 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2551 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2552 for (s = 0; s < dof - cdof; s++) { 2553 if (PetscBTLookupSet(btvt, off + s)) continue; 2554 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2555 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2556 else pids[cump++] = off + s; /* cross-vertex */ 2557 } 2558 } 2559 } 2560 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2561 } 2562 cids[i + 1] = cum; 2563 /* mark dofs as already assigned */ 2564 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2565 } 2566 if (cc) { 2567 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2568 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2569 *cc = cc_n; 2570 } 2571 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2572 PetscCall(PetscFree3(ids, cids, pids)); 2573 PetscCall(PetscBTDestroy(&btv)); 2574 PetscCall(PetscBTDestroy(&btvt)); 2575 PetscCall(PetscBTDestroy(&btvc)); 2576 PetscCall(DMDestroy(&dm)); 2577 } 2578 } else { 2579 if (ncc) *ncc = graph->ncc; 2580 if (cc) { 2581 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2582 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2583 *cc = cc_n; 2584 } 2585 } 2586 /* clean up graph */ 2587 graph->xadj = NULL; 2588 graph->adjncy = NULL; 2589 PetscCall(PCBDDCGraphDestroy(&graph)); 2590 PetscFunctionReturn(PETSC_SUCCESS); 2591 } 2592 2593 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2594 { 2595 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2596 PC_IS *pcis = (PC_IS *)pc->data; 2597 IS dirIS = NULL; 2598 PetscInt i; 2599 2600 PetscFunctionBegin; 2601 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2602 if (zerodiag) { 2603 Mat A; 2604 Vec vec3_N; 2605 PetscScalar *vals; 2606 const PetscInt *idxs; 2607 PetscInt nz, *count; 2608 2609 /* p0 */ 2610 PetscCall(VecSet(pcis->vec1_N, 0.)); 2611 PetscCall(PetscMalloc1(pcis->n, &vals)); 2612 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2613 PetscCall(ISGetIndices(zerodiag, &idxs)); 2614 for (i = 0; i < nz; i++) vals[i] = 1.; 2615 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2616 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2617 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2618 /* v_I */ 2619 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2620 for (i = 0; i < nz; i++) vals[i] = 0.; 2621 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2622 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2623 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2624 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2625 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2626 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2627 if (dirIS) { 2628 PetscInt n; 2629 2630 PetscCall(ISGetLocalSize(dirIS, &n)); 2631 PetscCall(ISGetIndices(dirIS, &idxs)); 2632 for (i = 0; i < n; i++) vals[i] = 0.; 2633 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2634 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2635 } 2636 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2637 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2638 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2639 PetscCall(VecSet(vec3_N, 0.)); 2640 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2641 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2642 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2643 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2644 PetscCall(PetscFree(vals)); 2645 PetscCall(VecDestroy(&vec3_N)); 2646 2647 /* there should not be any pressure dofs lying on the interface */ 2648 PetscCall(PetscCalloc1(pcis->n, &count)); 2649 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2650 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2651 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2652 PetscCall(ISGetIndices(zerodiag, &idxs)); 2653 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2654 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2655 PetscCall(PetscFree(count)); 2656 } 2657 PetscCall(ISDestroy(&dirIS)); 2658 2659 /* check PCBDDCBenignGetOrSetP0 */ 2660 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2661 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2662 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2663 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2664 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2665 for (i = 0; i < pcbddc->benign_n; i++) { 2666 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2667 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2668 } 2669 PetscFunctionReturn(PETSC_SUCCESS); 2670 } 2671 2672 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2673 { 2674 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2675 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2676 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2677 PetscInt nz, n, benign_n, bsp = 1; 2678 PetscInt *interior_dofs, n_interior_dofs, nneu; 2679 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2680 2681 PetscFunctionBegin; 2682 if (reuse) goto project_b0; 2683 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2684 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2685 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2686 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2687 has_null_pressures = PETSC_TRUE; 2688 have_null = PETSC_TRUE; 2689 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2690 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2691 Checks if all the pressure dofs in each subdomain have a zero diagonal 2692 If not, a change of basis on pressures is not needed 2693 since the local Schur complements are already SPD 2694 */ 2695 if (pcbddc->n_ISForDofsLocal) { 2696 IS iP = NULL; 2697 PetscInt p, *pp; 2698 PetscBool flg; 2699 2700 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2701 n = pcbddc->n_ISForDofsLocal; 2702 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2703 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2704 PetscOptionsEnd(); 2705 if (!flg) { 2706 n = 1; 2707 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2708 } 2709 2710 bsp = 0; 2711 for (p = 0; p < n; p++) { 2712 PetscInt bs; 2713 2714 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2715 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2716 bsp += bs; 2717 } 2718 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2719 bsp = 0; 2720 for (p = 0; p < n; p++) { 2721 const PetscInt *idxs; 2722 PetscInt b, bs, npl, *bidxs; 2723 2724 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2725 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2726 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2727 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2728 for (b = 0; b < bs; b++) { 2729 PetscInt i; 2730 2731 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2732 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2733 bsp++; 2734 } 2735 PetscCall(PetscFree(bidxs)); 2736 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2737 } 2738 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2739 2740 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2741 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2742 if (iP) { 2743 IS newpressures; 2744 2745 PetscCall(ISDifference(pressures, iP, &newpressures)); 2746 PetscCall(ISDestroy(&pressures)); 2747 pressures = newpressures; 2748 } 2749 PetscCall(ISSorted(pressures, &sorted)); 2750 if (!sorted) PetscCall(ISSort(pressures)); 2751 PetscCall(PetscFree(pp)); 2752 } 2753 2754 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2755 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2756 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2757 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2758 PetscCall(ISSorted(zerodiag, &sorted)); 2759 if (!sorted) PetscCall(ISSort(zerodiag)); 2760 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2761 zerodiag_save = zerodiag; 2762 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2763 if (!nz) { 2764 if (n) have_null = PETSC_FALSE; 2765 has_null_pressures = PETSC_FALSE; 2766 PetscCall(ISDestroy(&zerodiag)); 2767 } 2768 recompute_zerodiag = PETSC_FALSE; 2769 2770 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2771 zerodiag_subs = NULL; 2772 benign_n = 0; 2773 n_interior_dofs = 0; 2774 interior_dofs = NULL; 2775 nneu = 0; 2776 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2777 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2778 if (checkb) { /* need to compute interior nodes */ 2779 PetscInt n, i; 2780 PetscInt *count; 2781 ISLocalToGlobalMapping mapping; 2782 2783 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL)); 2784 PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL)); 2785 PetscCall(PetscMalloc1(n, &interior_dofs)); 2786 for (i = 0; i < n; i++) 2787 if (count[i] < 2) interior_dofs[n_interior_dofs++] = i; 2788 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL)); 2789 } 2790 if (has_null_pressures) { 2791 IS *subs; 2792 PetscInt nsubs, i, j, nl; 2793 const PetscInt *idxs; 2794 PetscScalar *array; 2795 Vec *work; 2796 2797 subs = pcbddc->local_subs; 2798 nsubs = pcbddc->n_local_subs; 2799 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2800 if (checkb) { 2801 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2802 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2803 PetscCall(ISGetIndices(zerodiag, &idxs)); 2804 /* work[0] = 1_p */ 2805 PetscCall(VecSet(work[0], 0.)); 2806 PetscCall(VecGetArray(work[0], &array)); 2807 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2808 PetscCall(VecRestoreArray(work[0], &array)); 2809 /* work[0] = 1_v */ 2810 PetscCall(VecSet(work[1], 1.)); 2811 PetscCall(VecGetArray(work[1], &array)); 2812 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2813 PetscCall(VecRestoreArray(work[1], &array)); 2814 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2815 } 2816 2817 if (nsubs > 1 || bsp > 1) { 2818 IS *is; 2819 PetscInt b, totb; 2820 2821 totb = bsp; 2822 is = bsp > 1 ? bzerodiag : &zerodiag; 2823 nsubs = PetscMax(nsubs, 1); 2824 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2825 for (b = 0; b < totb; b++) { 2826 for (i = 0; i < nsubs; i++) { 2827 ISLocalToGlobalMapping l2g; 2828 IS t_zerodiag_subs; 2829 PetscInt nl; 2830 2831 if (subs) { 2832 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2833 } else { 2834 IS tis; 2835 2836 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2837 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2838 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2839 PetscCall(ISDestroy(&tis)); 2840 } 2841 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2842 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2843 if (nl) { 2844 PetscBool valid = PETSC_TRUE; 2845 2846 if (checkb) { 2847 PetscCall(VecSet(matis->x, 0)); 2848 PetscCall(ISGetLocalSize(subs[i], &nl)); 2849 PetscCall(ISGetIndices(subs[i], &idxs)); 2850 PetscCall(VecGetArray(matis->x, &array)); 2851 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2852 PetscCall(VecRestoreArray(matis->x, &array)); 2853 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2854 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2855 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2856 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2857 PetscCall(VecGetArray(matis->y, &array)); 2858 for (j = 0; j < n_interior_dofs; j++) { 2859 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2860 valid = PETSC_FALSE; 2861 break; 2862 } 2863 } 2864 PetscCall(VecRestoreArray(matis->y, &array)); 2865 } 2866 if (valid && nneu) { 2867 const PetscInt *idxs; 2868 PetscInt nzb; 2869 2870 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2871 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2872 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2873 if (nzb) valid = PETSC_FALSE; 2874 } 2875 if (valid && pressures) { 2876 IS t_pressure_subs, tmp; 2877 PetscInt i1, i2; 2878 2879 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 2880 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 2881 PetscCall(ISGetLocalSize(tmp, &i1)); 2882 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 2883 if (i2 != i1) valid = PETSC_FALSE; 2884 PetscCall(ISDestroy(&t_pressure_subs)); 2885 PetscCall(ISDestroy(&tmp)); 2886 } 2887 if (valid) { 2888 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 2889 benign_n++; 2890 } else recompute_zerodiag = PETSC_TRUE; 2891 } 2892 PetscCall(ISDestroy(&t_zerodiag_subs)); 2893 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2894 } 2895 } 2896 } else { /* there's just one subdomain (or zero if they have not been detected */ 2897 PetscBool valid = PETSC_TRUE; 2898 2899 if (nneu) valid = PETSC_FALSE; 2900 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 2901 if (valid && checkb) { 2902 PetscCall(MatMult(matis->A, work[0], matis->x)); 2903 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 2904 PetscCall(VecGetArray(matis->x, &array)); 2905 for (j = 0; j < n_interior_dofs; j++) { 2906 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2907 valid = PETSC_FALSE; 2908 break; 2909 } 2910 } 2911 PetscCall(VecRestoreArray(matis->x, &array)); 2912 } 2913 if (valid) { 2914 benign_n = 1; 2915 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 2916 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2917 zerodiag_subs[0] = zerodiag; 2918 } 2919 } 2920 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 2921 } 2922 PetscCall(PetscFree(interior_dofs)); 2923 2924 if (!benign_n) { 2925 PetscInt n; 2926 2927 PetscCall(ISDestroy(&zerodiag)); 2928 recompute_zerodiag = PETSC_FALSE; 2929 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2930 if (n) have_null = PETSC_FALSE; 2931 } 2932 2933 /* final check for null pressures */ 2934 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 2935 2936 if (recompute_zerodiag) { 2937 PetscCall(ISDestroy(&zerodiag)); 2938 if (benign_n == 1) { 2939 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2940 zerodiag = zerodiag_subs[0]; 2941 } else { 2942 PetscInt i, nzn, *new_idxs; 2943 2944 nzn = 0; 2945 for (i = 0; i < benign_n; i++) { 2946 PetscInt ns; 2947 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2948 nzn += ns; 2949 } 2950 PetscCall(PetscMalloc1(nzn, &new_idxs)); 2951 nzn = 0; 2952 for (i = 0; i < benign_n; i++) { 2953 PetscInt ns, *idxs; 2954 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2955 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2956 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 2957 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2958 nzn += ns; 2959 } 2960 PetscCall(PetscSortInt(nzn, new_idxs)); 2961 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 2962 } 2963 have_null = PETSC_FALSE; 2964 } 2965 2966 /* determines if the coarse solver will be singular or not */ 2967 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2968 2969 /* Prepare matrix to compute no-net-flux */ 2970 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2971 Mat A, loc_divudotp; 2972 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 2973 IS row, col, isused = NULL; 2974 PetscInt M, N, n, st, n_isused; 2975 2976 if (pressures) { 2977 isused = pressures; 2978 } else { 2979 isused = zerodiag_save; 2980 } 2981 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 2982 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2983 PetscCall(MatGetLocalSize(A, &n, NULL)); 2984 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 2985 n_isused = 0; 2986 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 2987 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 2988 st = st - n_isused; 2989 if (n) { 2990 const PetscInt *gidxs; 2991 2992 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 2993 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 2994 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2995 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2996 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 2997 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 2998 } else { 2999 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 3000 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 3001 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 3002 } 3003 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 3004 PetscCall(ISGetSize(row, &M)); 3005 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 3006 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 3007 PetscCall(ISDestroy(&row)); 3008 PetscCall(ISDestroy(&col)); 3009 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 3010 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 3011 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3012 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 3013 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 3014 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 3015 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 3016 PetscCall(MatDestroy(&loc_divudotp)); 3017 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3018 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3019 } 3020 PetscCall(ISDestroy(&zerodiag_save)); 3021 PetscCall(ISDestroy(&pressures)); 3022 if (bzerodiag) { 3023 PetscInt i; 3024 3025 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 3026 PetscCall(PetscFree(bzerodiag)); 3027 } 3028 pcbddc->benign_n = benign_n; 3029 pcbddc->benign_zerodiag_subs = zerodiag_subs; 3030 3031 /* determines if the problem has subdomains with 0 pressure block */ 3032 have_null = (PetscBool)(!!pcbddc->benign_n); 3033 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 3034 3035 project_b0: 3036 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 3037 /* change of basis and p0 dofs */ 3038 if (pcbddc->benign_n) { 3039 PetscInt i, s, *nnz; 3040 3041 /* local change of basis for pressures */ 3042 PetscCall(MatDestroy(&pcbddc->benign_change)); 3043 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 3044 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 3045 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 3046 PetscCall(PetscMalloc1(n, &nnz)); 3047 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 3048 for (i = 0; i < pcbddc->benign_n; i++) { 3049 const PetscInt *idxs; 3050 PetscInt nzs, j; 3051 3052 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 3053 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3054 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 3055 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 3056 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3057 } 3058 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 3059 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3060 PetscCall(PetscFree(nnz)); 3061 /* set identity by default */ 3062 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 3063 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3064 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 3065 /* set change on pressures */ 3066 for (s = 0; s < pcbddc->benign_n; s++) { 3067 PetscScalar *array; 3068 const PetscInt *idxs; 3069 PetscInt nzs; 3070 3071 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 3072 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3073 for (i = 0; i < nzs - 1; i++) { 3074 PetscScalar vals[2]; 3075 PetscInt cols[2]; 3076 3077 cols[0] = idxs[i]; 3078 cols[1] = idxs[nzs - 1]; 3079 vals[0] = 1.; 3080 vals[1] = 1.; 3081 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 3082 } 3083 PetscCall(PetscMalloc1(nzs, &array)); 3084 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 3085 array[nzs - 1] = 1.; 3086 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 3087 /* store local idxs for p0 */ 3088 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 3089 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3090 PetscCall(PetscFree(array)); 3091 } 3092 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3093 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3094 3095 /* project if needed */ 3096 if (pcbddc->benign_change_explicit) { 3097 Mat M; 3098 3099 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 3100 PetscCall(MatDestroy(&pcbddc->local_mat)); 3101 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 3102 PetscCall(MatDestroy(&M)); 3103 } 3104 /* store global idxs for p0 */ 3105 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 3106 } 3107 *zerodiaglocal = zerodiag; 3108 PetscFunctionReturn(PETSC_SUCCESS); 3109 } 3110 3111 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3112 { 3113 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3114 PetscScalar *array; 3115 3116 PetscFunctionBegin; 3117 if (!pcbddc->benign_sf) { 3118 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 3119 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 3120 } 3121 if (get) { 3122 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 3123 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3124 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3125 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 3126 } else { 3127 PetscCall(VecGetArray(v, &array)); 3128 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3129 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3130 PetscCall(VecRestoreArray(v, &array)); 3131 } 3132 PetscFunctionReturn(PETSC_SUCCESS); 3133 } 3134 3135 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3136 { 3137 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3138 3139 PetscFunctionBegin; 3140 /* TODO: add error checking 3141 - avoid nested pop (or push) calls. 3142 - cannot push before pop. 3143 - cannot call this if pcbddc->local_mat is NULL 3144 */ 3145 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 3146 if (pop) { 3147 if (pcbddc->benign_change_explicit) { 3148 IS is_p0; 3149 MatReuse reuse; 3150 3151 /* extract B_0 */ 3152 reuse = MAT_INITIAL_MATRIX; 3153 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 3154 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 3155 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 3156 /* remove rows and cols from local problem */ 3157 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 3158 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 3159 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 3160 PetscCall(ISDestroy(&is_p0)); 3161 } else { 3162 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 3163 PetscScalar *vals; 3164 PetscInt i, n, *idxs_ins; 3165 3166 PetscCall(VecGetLocalSize(matis->y, &n)); 3167 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 3168 if (!pcbddc->benign_B0) { 3169 PetscInt *nnz; 3170 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 3171 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 3172 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 3173 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 3174 for (i = 0; i < pcbddc->benign_n; i++) { 3175 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 3176 nnz[i] = n - nnz[i]; 3177 } 3178 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 3179 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3180 PetscCall(PetscFree(nnz)); 3181 } 3182 3183 for (i = 0; i < pcbddc->benign_n; i++) { 3184 PetscScalar *array; 3185 PetscInt *idxs, j, nz, cum; 3186 3187 PetscCall(VecSet(matis->x, 0.)); 3188 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 3189 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3190 for (j = 0; j < nz; j++) vals[j] = 1.; 3191 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 3192 PetscCall(VecAssemblyBegin(matis->x)); 3193 PetscCall(VecAssemblyEnd(matis->x)); 3194 PetscCall(VecSet(matis->y, 0.)); 3195 PetscCall(MatMult(matis->A, matis->x, matis->y)); 3196 PetscCall(VecGetArray(matis->y, &array)); 3197 cum = 0; 3198 for (j = 0; j < n; j++) { 3199 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3200 vals[cum] = array[j]; 3201 idxs_ins[cum] = j; 3202 cum++; 3203 } 3204 } 3205 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 3206 PetscCall(VecRestoreArray(matis->y, &array)); 3207 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3208 } 3209 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3210 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3211 PetscCall(PetscFree2(idxs_ins, vals)); 3212 } 3213 } else { /* push */ 3214 3215 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 3216 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 3217 PetscScalar *B0_vals; 3218 PetscInt *B0_cols, B0_ncol; 3219 3220 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3221 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 3222 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 3223 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 3224 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3225 } 3226 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3227 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3228 } 3229 PetscFunctionReturn(PETSC_SUCCESS); 3230 } 3231 3232 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3233 { 3234 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3235 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3236 PetscBLASInt B_dummyint, B_neigs, B_ierr, B_lwork; 3237 PetscBLASInt *B_iwork, *B_ifail; 3238 PetscScalar *work, lwork; 3239 PetscScalar *St, *S, *eigv; 3240 PetscScalar *Sarray, *Starray; 3241 PetscReal *eigs, thresh, lthresh, uthresh; 3242 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 3243 PetscBool allocated_S_St, upart; 3244 #if defined(PETSC_USE_COMPLEX) 3245 PetscReal *rwork; 3246 #endif 3247 3248 PetscFunctionBegin; 3249 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3250 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3251 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3252 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3253 sub_schurs->is_posdef); 3254 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3255 3256 if (pcbddc->dbg_flag) { 3257 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3258 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3259 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3260 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3261 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3262 } 3263 3264 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3265 3266 /* max size of subsets */ 3267 mss = 0; 3268 for (i = 0; i < sub_schurs->n_subs; i++) { 3269 PetscInt subset_size; 3270 3271 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3272 mss = PetscMax(mss, subset_size); 3273 } 3274 3275 /* min/max and threshold */ 3276 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3277 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3278 nmax = PetscMax(nmin, nmax); 3279 allocated_S_St = PETSC_FALSE; 3280 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3281 allocated_S_St = PETSC_TRUE; 3282 } 3283 3284 /* allocate lapack workspace */ 3285 cum = cum2 = 0; 3286 maxneigs = 0; 3287 for (i = 0; i < sub_schurs->n_subs; i++) { 3288 PetscInt n, subset_size; 3289 3290 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3291 n = PetscMin(subset_size, nmax); 3292 cum += subset_size; 3293 cum2 += subset_size * n; 3294 maxneigs = PetscMax(maxneigs, n); 3295 } 3296 lwork = 0; 3297 if (mss) { 3298 PetscScalar sdummy = 0.; 3299 PetscBLASInt B_itype = 1; 3300 PetscBLASInt B_N, idummy = 0; 3301 PetscReal rdummy = 0., zero = 0.0; 3302 PetscReal eps = 0.0; /* dlamch? */ 3303 3304 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3305 PetscCall(PetscBLASIntCast(mss, &B_N)); 3306 B_lwork = -1; 3307 /* some implementations may complain about NULL pointers, even if we are querying */ 3308 S = &sdummy; 3309 St = &sdummy; 3310 eigs = &rdummy; 3311 eigv = &sdummy; 3312 B_iwork = &idummy; 3313 B_ifail = &idummy; 3314 #if defined(PETSC_USE_COMPLEX) 3315 rwork = &rdummy; 3316 #endif 3317 thresh = 1.0; 3318 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3319 #if defined(PETSC_USE_COMPLEX) 3320 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3321 #else 3322 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3323 #endif 3324 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr); 3325 PetscCall(PetscFPTrapPop()); 3326 } 3327 3328 nv = 0; 3329 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3330 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3331 } 3332 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3333 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3334 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3335 #if defined(PETSC_USE_COMPLEX) 3336 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3337 #endif 3338 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3339 &pcbddc->adaptive_constraints_data)); 3340 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3341 3342 maxneigs = 0; 3343 cum = cumarray = 0; 3344 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3345 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3346 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3347 const PetscInt *idxs; 3348 3349 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3350 for (cum = 0; cum < nv; cum++) { 3351 pcbddc->adaptive_constraints_n[cum] = 1; 3352 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3353 pcbddc->adaptive_constraints_data[cum] = 1.0; 3354 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3355 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3356 } 3357 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3358 } 3359 3360 if (mss) { /* multilevel */ 3361 if (sub_schurs->gdsw) { 3362 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3363 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3364 } else { 3365 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3366 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3367 } 3368 } 3369 3370 lthresh = pcbddc->adaptive_threshold[0]; 3371 uthresh = pcbddc->adaptive_threshold[1]; 3372 upart = pcbddc->use_deluxe_scaling; 3373 for (i = 0; i < sub_schurs->n_subs; i++) { 3374 const PetscInt *idxs; 3375 PetscReal upper, lower; 3376 PetscInt j, subset_size, eigs_start = 0; 3377 PetscBLASInt B_N; 3378 PetscBool same_data = PETSC_FALSE; 3379 PetscBool scal = PETSC_FALSE; 3380 3381 if (upart) { 3382 upper = PETSC_MAX_REAL; 3383 lower = uthresh; 3384 } else { 3385 if (sub_schurs->gdsw) { 3386 upper = uthresh; 3387 lower = PETSC_MIN_REAL; 3388 } else { 3389 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3390 upper = 1. / uthresh; 3391 lower = 0.; 3392 } 3393 } 3394 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3395 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3396 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3397 /* this is experimental: we assume the dofs have been properly grouped to have 3398 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3399 if (!sub_schurs->is_posdef) { 3400 Mat T; 3401 3402 for (j = 0; j < subset_size; j++) { 3403 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3404 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3405 PetscCall(MatScale(T, -1.0)); 3406 PetscCall(MatDestroy(&T)); 3407 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3408 PetscCall(MatScale(T, -1.0)); 3409 PetscCall(MatDestroy(&T)); 3410 if (sub_schurs->change_primal_sub) { 3411 PetscInt nz, k; 3412 const PetscInt *idxs; 3413 3414 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3415 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3416 for (k = 0; k < nz; k++) { 3417 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3418 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3419 } 3420 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3421 } 3422 scal = PETSC_TRUE; 3423 break; 3424 } 3425 } 3426 } 3427 3428 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3429 if (sub_schurs->is_symmetric) { 3430 PetscInt j, k; 3431 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3432 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3433 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3434 } 3435 for (j = 0; j < subset_size; j++) { 3436 for (k = j; k < subset_size; k++) { 3437 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3438 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3439 } 3440 } 3441 } else { 3442 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3443 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3444 } 3445 } else { 3446 S = Sarray + cumarray; 3447 St = Starray + cumarray; 3448 } 3449 /* see if we can save some work */ 3450 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3451 3452 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3453 B_neigs = 0; 3454 } else { 3455 PetscBLASInt B_itype = 1; 3456 PetscBLASInt B_IL, B_IU; 3457 PetscReal eps = -1.0; /* dlamch? */ 3458 PetscInt nmin_s; 3459 PetscBool compute_range; 3460 3461 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3462 B_neigs = 0; 3463 compute_range = (PetscBool)!same_data; 3464 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3465 3466 if (pcbddc->dbg_flag) { 3467 PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof; 3468 3469 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3470 PetscCall( 3471 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc)); 3472 } 3473 3474 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3475 if (compute_range) { 3476 /* ask for eigenvalues larger than thresh */ 3477 if (sub_schurs->is_posdef) { 3478 #if defined(PETSC_USE_COMPLEX) 3479 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3480 #else 3481 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3482 #endif 3483 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3484 } else { /* no theory so far, but it works nicely */ 3485 PetscInt recipe = 0, recipe_m = 1; 3486 PetscReal bb[2]; 3487 3488 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3489 switch (recipe) { 3490 case 0: 3491 if (scal) { 3492 bb[0] = PETSC_MIN_REAL; 3493 bb[1] = lthresh; 3494 } else { 3495 bb[0] = uthresh; 3496 bb[1] = PETSC_MAX_REAL; 3497 } 3498 #if defined(PETSC_USE_COMPLEX) 3499 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3500 #else 3501 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3502 #endif 3503 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3504 break; 3505 case 1: 3506 bb[0] = PETSC_MIN_REAL; 3507 bb[1] = lthresh * lthresh; 3508 #if defined(PETSC_USE_COMPLEX) 3509 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3510 #else 3511 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3512 #endif 3513 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3514 if (!scal) { 3515 PetscBLASInt B_neigs2 = 0; 3516 3517 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3518 bb[1] = PETSC_MAX_REAL; 3519 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3520 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3521 #if defined(PETSC_USE_COMPLEX) 3522 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3523 #else 3524 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3525 #endif 3526 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3527 B_neigs += B_neigs2; 3528 } 3529 break; 3530 case 2: 3531 if (scal) { 3532 bb[0] = PETSC_MIN_REAL; 3533 bb[1] = 0; 3534 #if defined(PETSC_USE_COMPLEX) 3535 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3536 #else 3537 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3538 #endif 3539 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3540 } else { 3541 PetscBLASInt B_neigs2 = 0; 3542 PetscBool do_copy = PETSC_FALSE; 3543 3544 lthresh = PetscMax(lthresh, 0.0); 3545 if (lthresh > 0.0) { 3546 bb[0] = PETSC_MIN_REAL; 3547 bb[1] = lthresh * lthresh; 3548 3549 do_copy = PETSC_TRUE; 3550 #if defined(PETSC_USE_COMPLEX) 3551 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3552 #else 3553 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3554 #endif 3555 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3556 } 3557 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3558 bb[1] = PETSC_MAX_REAL; 3559 if (do_copy) { 3560 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3561 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3562 } 3563 #if defined(PETSC_USE_COMPLEX) 3564 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3565 #else 3566 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3567 #endif 3568 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3569 B_neigs += B_neigs2; 3570 } 3571 break; 3572 case 3: 3573 if (scal) { 3574 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3575 } else { 3576 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3577 } 3578 if (!scal) { 3579 bb[0] = uthresh; 3580 bb[1] = PETSC_MAX_REAL; 3581 #if defined(PETSC_USE_COMPLEX) 3582 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3583 #else 3584 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3585 #endif 3586 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3587 } 3588 if (recipe_m > 0 && B_N - B_neigs > 0) { 3589 PetscBLASInt B_neigs2 = 0; 3590 3591 B_IL = 1; 3592 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3593 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3594 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3595 #if defined(PETSC_USE_COMPLEX) 3596 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3597 #else 3598 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3599 #endif 3600 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3601 B_neigs += B_neigs2; 3602 } 3603 break; 3604 case 4: 3605 bb[0] = PETSC_MIN_REAL; 3606 bb[1] = lthresh; 3607 #if defined(PETSC_USE_COMPLEX) 3608 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3609 #else 3610 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3611 #endif 3612 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3613 { 3614 PetscBLASInt B_neigs2 = 0; 3615 3616 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3617 bb[1] = PETSC_MAX_REAL; 3618 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3619 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3620 #if defined(PETSC_USE_COMPLEX) 3621 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3622 #else 3623 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3624 #endif 3625 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3626 B_neigs += B_neigs2; 3627 } 3628 break; 3629 case 5: /* same as before: first compute all eigenvalues, then filter */ 3630 #if defined(PETSC_USE_COMPLEX) 3631 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3632 #else 3633 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3634 #endif 3635 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3636 { 3637 PetscInt e, k, ne; 3638 for (e = 0, ne = 0; e < B_neigs; e++) { 3639 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3640 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3641 eigs[ne] = eigs[e]; 3642 ne++; 3643 } 3644 } 3645 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3646 PetscCall(PetscBLASIntCast(ne, &B_neigs)); 3647 } 3648 break; 3649 default: 3650 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3651 } 3652 } 3653 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3654 PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU)); 3655 B_IL = 1; 3656 #if defined(PETSC_USE_COMPLEX) 3657 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3658 #else 3659 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3660 #endif 3661 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3662 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3663 PetscInt k; 3664 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3665 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3666 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3667 nmin = nmax; 3668 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3669 for (k = 0; k < nmax; k++) { 3670 eigs[k] = 1. / PETSC_SMALL; 3671 eigv[k * (subset_size + 1)] = 1.0; 3672 } 3673 } 3674 PetscCall(PetscFPTrapPop()); 3675 if (B_ierr) { 3676 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3677 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3678 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3679 } 3680 3681 if (B_neigs > nmax) { 3682 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3683 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3684 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3685 } 3686 3687 nmin_s = PetscMin(nmin, B_N); 3688 if (B_neigs < nmin_s) { 3689 PetscBLASInt B_neigs2 = 0; 3690 3691 if (upart) { 3692 if (scal) { 3693 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3694 B_IL = B_neigs + 1; 3695 } else { 3696 PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL)); 3697 B_IU = B_N - B_neigs; 3698 } 3699 } else { 3700 B_IL = B_neigs + 1; 3701 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3702 } 3703 if (pcbddc->dbg_flag) { 3704 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3705 } 3706 if (sub_schurs->is_symmetric) { 3707 PetscInt j, k; 3708 for (j = 0; j < subset_size; j++) { 3709 for (k = j; k < subset_size; k++) { 3710 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3711 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3712 } 3713 } 3714 } else { 3715 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3716 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3717 } 3718 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3719 #if defined(PETSC_USE_COMPLEX) 3720 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3721 #else 3722 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3723 #endif 3724 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3725 PetscCall(PetscFPTrapPop()); 3726 B_neigs += B_neigs2; 3727 } 3728 if (B_ierr) { 3729 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3730 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3731 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3732 } 3733 if (pcbddc->dbg_flag) { 3734 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3735 for (j = 0; j < B_neigs; j++) { 3736 if (!sub_schurs->gdsw) { 3737 if (eigs[j] == 0.0) { 3738 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3739 } else { 3740 if (upart) { 3741 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3742 } else { 3743 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1. / eigs[j + eigs_start]))); 3744 } 3745 } 3746 } else { 3747 double pg = (double)eigs[j + eigs_start]; 3748 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3749 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3750 } 3751 } 3752 } 3753 } 3754 /* change the basis back to the original one */ 3755 if (sub_schurs->change) { 3756 Mat change, phi, phit; 3757 3758 if (pcbddc->dbg_flag > 2) { 3759 PetscInt ii; 3760 for (ii = 0; ii < B_neigs; ii++) { 3761 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3762 for (j = 0; j < B_N; j++) { 3763 #if defined(PETSC_USE_COMPLEX) 3764 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3765 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3766 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3767 #else 3768 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3769 #endif 3770 } 3771 } 3772 } 3773 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3774 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3775 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi)); 3776 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3777 PetscCall(MatDestroy(&phit)); 3778 PetscCall(MatDestroy(&phi)); 3779 } 3780 maxneigs = PetscMax(B_neigs, maxneigs); 3781 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3782 if (B_neigs) { 3783 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3784 3785 if (pcbddc->dbg_flag > 1) { 3786 PetscInt ii; 3787 for (ii = 0; ii < B_neigs; ii++) { 3788 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3789 for (j = 0; j < B_N; j++) { 3790 #if defined(PETSC_USE_COMPLEX) 3791 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3792 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3793 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3794 #else 3795 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3796 #endif 3797 } 3798 } 3799 } 3800 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3801 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3802 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3803 cum++; 3804 } 3805 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3806 /* shift for next computation */ 3807 cumarray += subset_size * subset_size; 3808 } 3809 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3810 3811 if (mss) { 3812 if (sub_schurs->gdsw) { 3813 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3814 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3815 } else { 3816 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3817 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3818 /* destroy matrices (junk) */ 3819 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3820 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3821 } 3822 } 3823 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3824 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3825 #if defined(PETSC_USE_COMPLEX) 3826 PetscCall(PetscFree(rwork)); 3827 #endif 3828 if (pcbddc->dbg_flag) { 3829 PetscInt maxneigs_r; 3830 PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3831 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3832 } 3833 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3834 PetscFunctionReturn(PETSC_SUCCESS); 3835 } 3836 3837 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3838 { 3839 Mat coarse_submat; 3840 3841 PetscFunctionBegin; 3842 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3843 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3844 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3845 3846 /* Setup local neumann solver ksp_R */ 3847 /* PCBDDCSetUpLocalScatters should be called first! */ 3848 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3849 3850 /* 3851 Setup local correction and local part of coarse basis. 3852 Gives back the dense local part of the coarse matrix in column major ordering 3853 */ 3854 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat)); 3855 3856 /* Compute total number of coarse nodes and setup coarse solver */ 3857 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat)); 3858 PetscCall(MatDestroy(&coarse_submat)); 3859 PetscFunctionReturn(PETSC_SUCCESS); 3860 } 3861 3862 PetscErrorCode PCBDDCResetCustomization(PC pc) 3863 { 3864 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3865 3866 PetscFunctionBegin; 3867 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3868 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3869 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3870 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3871 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3872 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3873 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3874 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3875 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3876 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3877 PetscFunctionReturn(PETSC_SUCCESS); 3878 } 3879 3880 PetscErrorCode PCBDDCResetTopography(PC pc) 3881 { 3882 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3883 PetscInt i; 3884 3885 PetscFunctionBegin; 3886 PetscCall(MatDestroy(&pcbddc->nedcG)); 3887 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3888 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3889 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3890 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3891 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3892 PetscCall(VecDestroy(&pcbddc->work_change)); 3893 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3894 PetscCall(MatDestroy(&pcbddc->divudotp)); 3895 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3896 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3897 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3898 pcbddc->n_local_subs = 0; 3899 PetscCall(PetscFree(pcbddc->local_subs)); 3900 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3901 pcbddc->graphanalyzed = PETSC_FALSE; 3902 pcbddc->recompute_topography = PETSC_TRUE; 3903 pcbddc->corner_selected = PETSC_FALSE; 3904 PetscFunctionReturn(PETSC_SUCCESS); 3905 } 3906 3907 PetscErrorCode PCBDDCResetSolvers(PC pc) 3908 { 3909 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3910 3911 PetscFunctionBegin; 3912 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3913 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3914 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3915 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3916 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3917 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3918 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3919 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3920 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3921 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3922 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3923 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3924 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3925 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3926 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3927 PetscCall(KSPReset(pcbddc->ksp_D)); 3928 PetscCall(KSPReset(pcbddc->ksp_R)); 3929 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3930 PetscCall(MatDestroy(&pcbddc->local_mat)); 3931 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3932 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 3933 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3934 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3935 PetscCall(MatDestroy(&pcbddc->benign_change)); 3936 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3937 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 3938 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3939 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3940 if (pcbddc->benign_zerodiag_subs) { 3941 PetscInt i; 3942 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3943 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3944 } 3945 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3946 PetscFunctionReturn(PETSC_SUCCESS); 3947 } 3948 3949 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3950 { 3951 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3952 PC_IS *pcis = (PC_IS *)pc->data; 3953 VecType impVecType; 3954 PetscInt n_constraints, n_R, old_size; 3955 3956 PetscFunctionBegin; 3957 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3958 n_R = pcis->n - pcbddc->n_vertices; 3959 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 3960 /* local work vectors (try to avoid unneeded work)*/ 3961 /* R nodes */ 3962 old_size = -1; 3963 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 3964 if (n_R != old_size) { 3965 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3966 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3967 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 3968 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 3969 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 3970 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 3971 } 3972 /* local primal dofs */ 3973 old_size = -1; 3974 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 3975 if (pcbddc->local_primal_size != old_size) { 3976 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3977 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 3978 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 3979 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 3980 } 3981 /* local explicit constraints */ 3982 old_size = -1; 3983 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 3984 if (n_constraints && n_constraints != old_size) { 3985 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3986 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 3987 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 3988 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 3989 } 3990 PetscFunctionReturn(PETSC_SUCCESS); 3991 } 3992 3993 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode) 3994 { 3995 PetscBool flg; 3996 const PetscScalar *a; 3997 3998 PetscFunctionBegin; 3999 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg)); 4000 if (flg) { 4001 PetscCall(MatDenseGetArrayRead(S, &a)); 4002 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE)); 4003 PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode)); 4004 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE)); 4005 PetscCall(MatDenseRestoreArrayRead(S, &a)); 4006 } else { 4007 const PetscInt *ii, *jj; 4008 PetscInt n; 4009 PetscInt buf[8192], *bufc = NULL; 4010 PetscBool freeb = PETSC_FALSE; 4011 Mat Sm = S; 4012 4013 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg)); 4014 if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm)); 4015 else PetscCall(PetscObjectReference((PetscObject)S)); 4016 PetscCall(MatSeqAIJGetArrayRead(Sm, &a)); 4017 PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4018 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure"); 4019 if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) { 4020 bufc = buf; 4021 } else { 4022 PetscCall(PetscMalloc1(nc, &bufc)); 4023 freeb = PETSC_TRUE; 4024 } 4025 4026 for (PetscInt i = 0; i < n; i++) { 4027 const PetscInt nci = ii[i + 1] - ii[i]; 4028 4029 for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]]; 4030 PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode)); 4031 } 4032 PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4033 PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a)); 4034 PetscCall(MatDestroy(&Sm)); 4035 if (freeb) PetscCall(PetscFree(bufc)); 4036 } 4037 PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY)); 4038 PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY)); 4039 PetscFunctionReturn(PETSC_SUCCESS); 4040 } 4041 4042 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat) 4043 { 4044 Mat_SeqAIJ *aij; 4045 PetscInt *ii, *jj; 4046 PetscScalar *aa; 4047 PetscInt nnz = 0, m, nc; 4048 const PetscScalar *a; 4049 const PetscScalar zero = 0.0; 4050 4051 PetscFunctionBegin; 4052 PetscCall(MatGetLocalSize(D, &m, &nc)); 4053 PetscCall(MatDenseGetArrayRead(D, &a)); 4054 PetscCall(PetscMalloc1(m + 1, &ii)); 4055 PetscCall(PetscMalloc1(m * nc, &jj)); 4056 PetscCall(PetscMalloc1(m * nc, &aa)); 4057 ii[0] = 0; 4058 for (PetscInt k = 0; k < m; k++) { 4059 for (PetscInt s = 0; s < nc; s++) { 4060 const PetscInt c = s + k * nc; 4061 const PetscScalar v = a[k + s * m]; 4062 4063 if (PetscUnlikely(j[c] < 0 || v == zero)) continue; 4064 jj[nnz] = j[c]; 4065 aa[nnz] = a[k + s * m]; 4066 nnz++; 4067 } 4068 ii[k + 1] = nnz; 4069 } 4070 4071 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat)); 4072 PetscCall(MatDenseRestoreArrayRead(D, &a)); 4073 4074 aij = (Mat_SeqAIJ *)(*mat)->data; 4075 aij->free_a = PETSC_TRUE; 4076 aij->free_ij = PETSC_TRUE; 4077 PetscFunctionReturn(PETSC_SUCCESS); 4078 } 4079 4080 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */ 4081 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B) 4082 { 4083 PetscInt n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL; 4084 const PetscBool allowzeropivot = PETSC_FALSE; 4085 PetscBool zeropivotdetected = PETSC_FALSE; 4086 const PetscReal shift = 0.0; 4087 PetscInt ipvt[5], *ii, *jj, *indi, *indj; 4088 PetscScalar work[25], *v_work = NULL, *aa, *diag; 4089 PetscLogDouble flops = 0.0; 4090 4091 PetscFunctionBegin; 4092 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices"); 4093 for (PetscInt i = 0; i < nblocks; i++) { 4094 ncnt += bsizes[i]; 4095 ncnt2 += PetscSqr(bsizes[i]); 4096 } 4097 PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n); 4098 for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]); 4099 if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots)); 4100 4101 PetscCall(PetscMalloc1(n + 1, &ii)); 4102 PetscCall(PetscMalloc1(ncnt2, &jj)); 4103 PetscCall(PetscCalloc1(ncnt2, &aa)); 4104 4105 ncnt = 0; 4106 ii[0] = 0; 4107 indi = ii; 4108 indj = jj; 4109 diag = aa; 4110 for (PetscInt i = 0; i < nblocks; i++) { 4111 const PetscInt bs = bsizes[i]; 4112 4113 for (PetscInt k = 0; k < bs; k++) { 4114 indi[k + 1] = indi[k] + bs; 4115 for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j; 4116 } 4117 PetscCall(MatGetValues(A, bs, indj, bs, indj, diag)); 4118 switch (bs) { 4119 case 1: 4120 *diag = 1.0 / (*diag); 4121 break; 4122 case 2: 4123 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 4124 break; 4125 case 3: 4126 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 4127 break; 4128 case 4: 4129 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 4130 break; 4131 case 5: 4132 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4133 break; 4134 case 6: 4135 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 4136 break; 4137 case 7: 4138 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 4139 break; 4140 default: 4141 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4142 } 4143 ncnt += bs; 4144 flops += 2.0 * PetscPowInt(bs, 3) / 3.0; 4145 diag += bs * bs; 4146 indj += bs * bs; 4147 indi += bs; 4148 } 4149 PetscCall(PetscLogFlops(flops)); 4150 PetscCall(PetscFree2(v_work, v_pivots)); 4151 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B)); 4152 { 4153 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data; 4154 aij->free_a = PETSC_TRUE; 4155 aij->free_ij = PETSC_TRUE; 4156 } 4157 PetscFunctionReturn(PETSC_SUCCESS); 4158 } 4159 4160 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B) 4161 { 4162 const PetscScalar *rarr; 4163 PetscScalar *larr; 4164 PetscSF vsf; 4165 PetscInt n, rld, lld; 4166 4167 PetscFunctionBegin; 4168 PetscCall(MatGetSize(A, NULL, &n)); 4169 PetscCall(MatDenseGetLDA(A, &rld)); 4170 PetscCall(MatDenseGetLDA(B, &lld)); 4171 PetscCall(MatDenseGetArrayRead(A, &rarr)); 4172 PetscCall(MatDenseGetArrayWrite(B, &larr)); 4173 PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf)); 4174 PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4175 PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4176 PetscCall(MatDenseRestoreArrayRead(A, &rarr)); 4177 PetscCall(MatDenseRestoreArrayWrite(B, &larr)); 4178 PetscCall(PetscSFDestroy(&vsf)); 4179 PetscFunctionReturn(PETSC_SUCCESS); 4180 } 4181 4182 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat) 4183 { 4184 PC_IS *pcis = (PC_IS *)pc->data; 4185 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4186 PCBDDCGraph graph = pcbddc->mat_graph; 4187 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4188 /* submatrices of local problem */ 4189 Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL; 4190 /* submatrices of local coarse problem */ 4191 Mat S_CV = NULL, S_VC = NULL, S_CC = NULL; 4192 /* working matrices */ 4193 Mat C_CR; 4194 4195 /* additional working stuff */ 4196 PC pc_R; 4197 IS is_R, is_V, is_C; 4198 const PetscInt *idx_V, *idx_C; 4199 Mat F, Brhs = NULL; 4200 Vec dummy_vec; 4201 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 4202 PetscInt *idx_V_B; 4203 PetscInt lda_rhs, n_vertices, n_constraints, *p0_lidx_I; 4204 PetscInt n_eff_vertices, n_eff_constraints; 4205 PetscInt i, n_R, n_D, n_B; 4206 PetscScalar one = 1.0, m_one = -1.0; 4207 4208 /* Multi-element support */ 4209 PetscBool multi_element = graph->multi_element; 4210 PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL; 4211 PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL; 4212 IS is_C_perm = NULL; 4213 PetscInt n_C_bss = 0, *C_bss = NULL; 4214 Mat coarse_phi_multi; 4215 4216 PetscFunctionBegin; 4217 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 4218 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4219 4220 /* Set Non-overlapping dimensions */ 4221 n_vertices = pcbddc->n_vertices; 4222 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 4223 n_B = pcis->n_B; 4224 n_D = pcis->n - n_B; 4225 n_R = pcis->n - n_vertices; 4226 4227 /* vertices in boundary numbering */ 4228 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 4229 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 4230 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 4231 4232 /* these two cases still need to be optimized */ 4233 if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE; 4234 4235 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 4236 if (multi_element) { 4237 PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 4238 4239 PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat)); 4240 PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size)); 4241 PetscCall(MatSetType(*coarse_submat, MATSEQAIJ)); 4242 PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 4243 PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 4244 4245 /* group vertices and constraints by subdomain id */ 4246 const PetscInt *vidxs = pcbddc->primal_indices_local_idxs; 4247 const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices; 4248 PetscInt *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz; 4249 PetscInt n_el = PetscMax(graph->n_local_subs, 1); 4250 4251 PetscCall(PetscCalloc1(2 * n_el, &count_eff)); 4252 PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V)); 4253 PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C)); 4254 for (PetscInt i = 0; i < n_vertices; i++) { 4255 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4256 4257 V_to_eff_V[i] = count_eff[s]; 4258 count_eff[s] += 1; 4259 } 4260 for (PetscInt i = 0; i < n_constraints; i++) { 4261 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1; 4262 4263 C_to_eff_C[i] = count_eff[s]; 4264 count_eff[s] += 1; 4265 } 4266 4267 /* preallocation */ 4268 PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz)); 4269 for (PetscInt i = 0; i < n_vertices; i++) { 4270 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4271 4272 nnz[i] = count_eff[s] + count_eff[s + 1]; 4273 } 4274 for (PetscInt i = 0; i < n_constraints; i++) { 4275 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub; 4276 4277 nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1]; 4278 } 4279 PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz)); 4280 PetscCall(PetscFree(nnz)); 4281 4282 n_eff_vertices = 0; 4283 n_eff_constraints = 0; 4284 for (PetscInt i = 0; i < n_el; i++) { 4285 n_eff_vertices = PetscMax(n_eff_vertices, count_eff[2 * i]); 4286 n_eff_constraints = PetscMax(n_eff_constraints, count_eff[2 * i + 1]); 4287 count_eff[2 * i] = 0; 4288 count_eff[2 * i + 1] = 0; 4289 } 4290 4291 const PetscInt *idx; 4292 PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C)); 4293 4294 for (PetscInt i = 0; i < n_vertices; i++) { 4295 const PetscInt e = graph->nodes[vidxs[i]].local_sub; 4296 const PetscInt s = 2 * e; 4297 4298 V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i; 4299 count_eff[s] += 1; 4300 } 4301 for (PetscInt i = 0; i < n_constraints; i++) { 4302 const PetscInt e = graph->nodes[cidxs[i]].local_sub; 4303 const PetscInt s = 2 * e + 1; 4304 4305 C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i; 4306 count_eff[s] += 1; 4307 } 4308 4309 PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J)); 4310 PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J)); 4311 PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J)); 4312 PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J)); 4313 for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1; 4314 for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1; 4315 for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1; 4316 for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1; 4317 4318 PetscCall(ISGetIndices(pcbddc->is_R_local, &idx)); 4319 for (PetscInt i = 0; i < n_R; i++) { 4320 const PetscInt e = graph->nodes[idx[i]].local_sub; 4321 const PetscInt s = 2 * e; 4322 PetscInt j; 4323 4324 for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4325 for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4326 } 4327 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx)); 4328 PetscCall(ISGetIndices(pcis->is_B_local, &idx)); 4329 for (PetscInt i = 0; i < n_B; i++) { 4330 const PetscInt e = graph->nodes[idx[i]].local_sub; 4331 const PetscInt s = 2 * e; 4332 PetscInt j; 4333 4334 for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4335 for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4336 } 4337 PetscCall(ISRestoreIndices(pcis->is_B_local, &idx)); 4338 4339 /* permutation and blocksizes for block invert of S_CC */ 4340 PetscInt *idxp; 4341 4342 PetscCall(PetscMalloc1(n_constraints, &idxp)); 4343 PetscCall(PetscMalloc1(n_el, &C_bss)); 4344 n_C_bss = 0; 4345 for (PetscInt e = 0, cnt = 0; e < n_el; e++) { 4346 const PetscInt nc = count_eff[2 * e + 1]; 4347 4348 if (nc) C_bss[n_C_bss++] = nc; 4349 for (PetscInt c = 0; c < nc; c++) { idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; } 4350 cnt += nc; 4351 } 4352 4353 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm)); 4354 4355 PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C)); 4356 PetscCall(PetscFree(count_eff)); 4357 } else { 4358 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat)); 4359 n_eff_constraints = n_constraints; 4360 n_eff_vertices = n_vertices; 4361 } 4362 4363 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 4364 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 4365 PetscCall(PCSetUp(pc_R)); 4366 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 4367 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 4368 lda_rhs = n_R; 4369 need_benign_correction = PETSC_FALSE; 4370 if (isLU || isCHOL) { 4371 PetscCall(PCFactorGetMatrix(pc_R, &F)); 4372 } else if (sub_schurs && sub_schurs->reuse_solver) { 4373 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4374 MatFactorType type; 4375 4376 F = reuse_solver->F; 4377 PetscCall(MatGetFactorType(F, &type)); 4378 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 4379 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 4380 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 4381 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 4382 } else F = NULL; 4383 4384 /* determine if we can use a sparse right-hand side */ 4385 sparserhs = PETSC_FALSE; 4386 if (F && !multi_element) { 4387 MatSolverType solver; 4388 4389 PetscCall(MatFactorGetSolverType(F, &solver)); 4390 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 4391 } 4392 4393 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 4394 dummy_vec = NULL; 4395 if (need_benign_correction && lda_rhs != n_R && F) { 4396 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 4397 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 4398 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 4399 } 4400 4401 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4402 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4403 4404 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R)); 4405 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V)); 4406 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C)); 4407 PetscCall(ISGetIndices(is_V, &idx_V)); 4408 PetscCall(ISGetIndices(is_C, &idx_C)); 4409 4410 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4411 if (n_constraints) { 4412 Mat C_B; 4413 4414 /* Extract constraints on R nodes: C_{CR} */ 4415 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 4416 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4417 4418 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4419 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4420 if (!sparserhs) { 4421 PetscScalar *marr; 4422 4423 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs)); 4424 PetscCall(MatDenseGetArrayWrite(Brhs, &marr)); 4425 for (i = 0; i < n_constraints; i++) { 4426 const PetscScalar *row_cmat_values; 4427 const PetscInt *row_cmat_indices; 4428 PetscInt size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i; 4429 4430 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4431 for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j]; 4432 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4433 } 4434 PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr)); 4435 } else { 4436 Mat tC_CR; 4437 4438 PetscCall(MatScale(C_CR, -1.0)); 4439 if (lda_rhs != n_R) { 4440 PetscScalar *aa; 4441 PetscInt r, *ii, *jj; 4442 PetscBool done; 4443 4444 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4445 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4446 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 4447 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 4448 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4449 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4450 } else { 4451 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4452 tC_CR = C_CR; 4453 } 4454 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 4455 PetscCall(MatDestroy(&tC_CR)); 4456 } 4457 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R)); 4458 if (F) { 4459 if (need_benign_correction) { 4460 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4461 4462 /* rhs is already zero on interior dofs, no need to change the rhs */ 4463 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 4464 } 4465 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 4466 if (need_benign_correction) { 4467 PetscScalar *marr; 4468 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4469 4470 /* XXX multi_element? */ 4471 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4472 if (lda_rhs != n_R) { 4473 for (i = 0; i < n_eff_constraints; i++) { 4474 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4475 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4476 PetscCall(VecResetArray(dummy_vec)); 4477 } 4478 } else { 4479 for (i = 0; i < n_eff_constraints; i++) { 4480 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4481 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4482 PetscCall(VecResetArray(pcbddc->vec1_R)); 4483 } 4484 } 4485 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4486 } 4487 } else { 4488 const PetscScalar *barr; 4489 PetscScalar *marr; 4490 4491 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4492 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4493 for (i = 0; i < n_eff_constraints; i++) { 4494 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4495 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4496 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4497 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4498 PetscCall(VecResetArray(pcbddc->vec1_R)); 4499 PetscCall(VecResetArray(pcbddc->vec2_R)); 4500 } 4501 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4502 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4503 } 4504 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 4505 PetscCall(MatDestroy(&Brhs)); 4506 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4507 if (!pcbddc->switch_static) { 4508 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2)); 4509 for (i = 0; i < n_eff_constraints; i++) { 4510 Vec r, b; 4511 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 4512 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 4513 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4514 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4515 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 4516 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 4517 } 4518 if (multi_element) { 4519 Mat T; 4520 4521 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4522 PetscCall(MatDestroy(&local_auxmat2_R)); 4523 local_auxmat2_R = T; 4524 PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T)); 4525 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4526 pcbddc->local_auxmat2 = T; 4527 } 4528 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4529 } else { 4530 if (multi_element) { 4531 Mat T; 4532 4533 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4534 PetscCall(MatDestroy(&local_auxmat2_R)); 4535 local_auxmat2_R = T; 4536 } 4537 if (lda_rhs != n_R) { 4538 PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 4539 } else { 4540 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4541 pcbddc->local_auxmat2 = local_auxmat2_R; 4542 } 4543 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4544 } 4545 PetscCall(MatScale(S_CC, m_one)); 4546 if (multi_element) { 4547 Mat T, T2; 4548 IS isp, ispi; 4549 4550 isp = is_C_perm; 4551 4552 PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi)); 4553 PetscCall(MatPermute(S_CC, isp, isp, &T)); 4554 PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2)); 4555 PetscCall(MatDestroy(&T)); 4556 PetscCall(MatDestroy(&S_CC)); 4557 PetscCall(MatPermute(T2, ispi, ispi, &S_CC)); 4558 PetscCall(MatDestroy(&T2)); 4559 PetscCall(ISDestroy(&ispi)); 4560 } else { 4561 if (isCHOL) { 4562 PetscCall(MatCholeskyFactor(S_CC, NULL, NULL)); 4563 } else { 4564 PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL)); 4565 } 4566 PetscCall(MatSeqDenseInvertFactors_Private(S_CC)); 4567 } 4568 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4569 PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1)); 4570 PetscCall(MatDestroy(&C_B)); 4571 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES)); 4572 } 4573 4574 /* Get submatrices from subdomain matrix */ 4575 if (n_vertices) { 4576 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4577 PetscBool oldpin; 4578 #endif 4579 IS is_aux; 4580 4581 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4582 IS tis; 4583 4584 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4585 PetscCall(ISSort(tis)); 4586 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4587 PetscCall(ISDestroy(&tis)); 4588 } else { 4589 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4590 } 4591 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4592 oldpin = pcbddc->local_mat->boundtocpu; 4593 #endif 4594 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4595 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4596 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4597 /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4598 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4599 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4600 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4601 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4602 #endif 4603 PetscCall(ISDestroy(&is_aux)); 4604 } 4605 PetscCall(ISDestroy(&is_C_perm)); 4606 PetscCall(PetscFree(C_bss)); 4607 4608 p0_lidx_I = NULL; 4609 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4610 const PetscInt *idxs; 4611 4612 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4613 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4614 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4615 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4616 } 4617 4618 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4619 4620 /* Matrices of coarse basis functions (local) */ 4621 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4622 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4623 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4624 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4625 if (!multi_element) { 4626 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B)); 4627 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D)); 4628 coarse_phi_multi = NULL; 4629 } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */ 4630 IS is_rows[2] = {pcbddc->is_R_local, NULL}; 4631 IS is_cols[2] = {is_V, is_C}; 4632 4633 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1])); 4634 PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi)); 4635 PetscCall(ISDestroy(&is_rows[1])); 4636 } 4637 4638 /* vertices */ 4639 if (n_vertices) { 4640 PetscBool restoreavr = PETSC_FALSE; 4641 Mat A_RRmA_RV = NULL; 4642 4643 PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4644 PetscCall(MatDestroy(&A_VV)); 4645 4646 if (n_R) { 4647 Mat A_RV_bcorr = NULL, S_VV; 4648 4649 PetscCall(MatScale(A_RV, m_one)); 4650 if (need_benign_correction) { 4651 ISLocalToGlobalMapping RtoN; 4652 IS is_p0; 4653 PetscInt *idxs_p0, n; 4654 4655 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4656 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4657 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4658 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4659 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4660 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4661 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4662 PetscCall(ISDestroy(&is_p0)); 4663 } 4664 4665 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV)); 4666 if (!sparserhs || need_benign_correction) { 4667 if (lda_rhs == n_R && !multi_element) { 4668 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4669 } else { 4670 Mat T; 4671 PetscScalar *av, *array; 4672 const PetscInt *xadj, *adjncy; 4673 PetscInt n; 4674 PetscBool flg_row; 4675 4676 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T)); 4677 PetscCall(MatDenseGetArrayWrite(T, &array)); 4678 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4679 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4680 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4681 for (i = 0; i < n; i++) { 4682 PetscInt j; 4683 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j]; 4684 } 4685 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4686 PetscCall(MatDenseRestoreArrayWrite(T, &array)); 4687 PetscCall(MatDestroy(&A_RV)); 4688 A_RV = T; 4689 } 4690 if (need_benign_correction) { 4691 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4692 PetscScalar *marr; 4693 4694 /* XXX multi_element */ 4695 PetscCall(MatDenseGetArray(A_RV, &marr)); 4696 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4697 4698 | 0 0 0 | (V) 4699 L = | 0 0 -1 | (P-p0) 4700 | 0 0 -1 | (p0) 4701 4702 */ 4703 for (i = 0; i < reuse_solver->benign_n; i++) { 4704 const PetscScalar *vals; 4705 const PetscInt *idxs, *idxs_zero; 4706 PetscInt n, j, nz; 4707 4708 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4709 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4710 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4711 for (j = 0; j < n; j++) { 4712 PetscScalar val = vals[j]; 4713 PetscInt k, col = idxs[j]; 4714 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4715 } 4716 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4717 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4718 } 4719 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4720 } 4721 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4722 Brhs = A_RV; 4723 } else { 4724 Mat tA_RVT, A_RVT; 4725 4726 if (!pcbddc->symmetric_primal) { 4727 /* A_RV already scaled by -1 */ 4728 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4729 } else { 4730 restoreavr = PETSC_TRUE; 4731 PetscCall(MatScale(A_VR, -1.0)); 4732 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4733 A_RVT = A_VR; 4734 } 4735 if (lda_rhs != n_R) { 4736 PetscScalar *aa; 4737 PetscInt r, *ii, *jj; 4738 PetscBool done; 4739 4740 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4741 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4742 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4743 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4744 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4745 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4746 } else { 4747 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4748 tA_RVT = A_RVT; 4749 } 4750 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4751 PetscCall(MatDestroy(&tA_RVT)); 4752 PetscCall(MatDestroy(&A_RVT)); 4753 } 4754 if (F) { 4755 /* need to correct the rhs */ 4756 if (need_benign_correction) { 4757 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4758 PetscScalar *marr; 4759 4760 PetscCall(MatDenseGetArray(Brhs, &marr)); 4761 if (lda_rhs != n_R) { 4762 for (i = 0; i < n_eff_vertices; i++) { 4763 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4764 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4765 PetscCall(VecResetArray(dummy_vec)); 4766 } 4767 } else { 4768 for (i = 0; i < n_eff_vertices; i++) { 4769 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4770 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4771 PetscCall(VecResetArray(pcbddc->vec1_R)); 4772 } 4773 } 4774 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4775 } 4776 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4777 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4778 /* need to correct the solution */ 4779 if (need_benign_correction) { 4780 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4781 PetscScalar *marr; 4782 4783 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4784 if (lda_rhs != n_R) { 4785 for (i = 0; i < n_eff_vertices; i++) { 4786 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4787 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4788 PetscCall(VecResetArray(dummy_vec)); 4789 } 4790 } else { 4791 for (i = 0; i < n_eff_vertices; i++) { 4792 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4793 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4794 PetscCall(VecResetArray(pcbddc->vec1_R)); 4795 } 4796 } 4797 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4798 } 4799 } else { 4800 const PetscScalar *barr; 4801 PetscScalar *marr; 4802 4803 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4804 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4805 for (i = 0; i < n_eff_vertices; i++) { 4806 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4807 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4808 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4809 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4810 PetscCall(VecResetArray(pcbddc->vec1_R)); 4811 PetscCall(VecResetArray(pcbddc->vec2_R)); 4812 } 4813 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4814 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4815 } 4816 PetscCall(MatDestroy(&A_RV)); 4817 PetscCall(MatDestroy(&Brhs)); 4818 /* S_VV and S_CV */ 4819 if (n_constraints) { 4820 Mat B; 4821 4822 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B)); 4823 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B)); 4824 4825 /* S_CV = pcbddc->local_auxmat1 * B */ 4826 if (multi_element) { 4827 Mat T; 4828 4829 PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T)); 4830 PetscCall(MatDestroy(&B)); 4831 B = T; 4832 } 4833 PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV)); 4834 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4835 PetscCall(MatProductSetFromOptions(S_CV)); 4836 PetscCall(MatProductSymbolic(S_CV)); 4837 PetscCall(MatProductNumeric(S_CV)); 4838 PetscCall(MatProductClear(S_CV)); 4839 PetscCall(MatDestroy(&B)); 4840 4841 /* B = local_auxmat2_R * S_CV */ 4842 PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B)); 4843 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4844 PetscCall(MatProductSetFromOptions(B)); 4845 PetscCall(MatProductSymbolic(B)); 4846 PetscCall(MatProductNumeric(B)); 4847 4848 PetscCall(MatScale(S_CV, m_one)); 4849 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES)); 4850 4851 if (multi_element) { 4852 Mat T; 4853 4854 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4855 PetscCall(MatDestroy(&A_RRmA_RV)); 4856 A_RRmA_RV = T; 4857 } 4858 PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */ 4859 PetscCall(MatDestroy(&B)); 4860 } else if (multi_element) { 4861 Mat T; 4862 4863 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4864 PetscCall(MatDestroy(&A_RRmA_RV)); 4865 A_RRmA_RV = T; 4866 } 4867 4868 if (lda_rhs != n_R) { 4869 Mat T; 4870 4871 PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T)); 4872 PetscCall(MatDestroy(&A_RRmA_RV)); 4873 A_RRmA_RV = T; 4874 } 4875 4876 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4877 if (need_benign_correction) { /* XXX SPARSE */ 4878 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4879 PetscScalar *sums; 4880 const PetscScalar *marr; 4881 4882 PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr)); 4883 PetscCall(PetscMalloc1(n_vertices, &sums)); 4884 for (i = 0; i < reuse_solver->benign_n; i++) { 4885 const PetscScalar *vals; 4886 const PetscInt *idxs, *idxs_zero; 4887 PetscInt n, j, nz; 4888 4889 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4890 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4891 for (j = 0; j < n_vertices; j++) { 4892 sums[j] = 0.; 4893 for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R]; 4894 } 4895 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4896 for (j = 0; j < n; j++) { 4897 PetscScalar val = vals[j]; 4898 for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES)); 4899 } 4900 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4901 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4902 } 4903 PetscCall(PetscFree(sums)); 4904 PetscCall(MatDestroy(&A_RV_bcorr)); 4905 PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr)); 4906 } 4907 4908 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV)); 4909 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4910 PetscCall(MatDestroy(&S_VV)); 4911 } 4912 4913 /* coarse basis functions */ 4914 if (coarse_phi_multi) { 4915 Mat Vid; 4916 4917 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid)); 4918 PetscCall(MatShift_Basic(Vid, 1.0)); 4919 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV)); 4920 PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid)); 4921 PetscCall(MatDestroy(&Vid)); 4922 } else { 4923 if (A_RRmA_RV) { 4924 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B)); 4925 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4926 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D)); 4927 if (pcbddc->benign_n) { 4928 for (i = 0; i < n_vertices; i++) { PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); } 4929 } 4930 } 4931 } 4932 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES)); 4933 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 4934 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 4935 } 4936 PetscCall(MatDestroy(&A_RRmA_RV)); 4937 } 4938 PetscCall(MatDestroy(&A_RV)); 4939 PetscCall(VecDestroy(&dummy_vec)); 4940 4941 if (n_constraints) { 4942 Mat B, B2; 4943 4944 PetscCall(MatScale(S_CC, m_one)); 4945 PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B)); 4946 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4947 PetscCall(MatProductSetFromOptions(B)); 4948 PetscCall(MatProductSymbolic(B)); 4949 PetscCall(MatProductNumeric(B)); 4950 4951 if (n_vertices) { 4952 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4953 PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC)); 4954 } else { 4955 if (lda_rhs != n_R) { 4956 Mat tB; 4957 4958 PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB)); 4959 PetscCall(MatDestroy(&B)); 4960 B = tB; 4961 } 4962 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC)); 4963 } 4964 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES)); 4965 } 4966 4967 /* coarse basis functions */ 4968 if (coarse_phi_multi) { 4969 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B)); 4970 } else { 4971 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 4972 PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2)); 4973 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2)); 4974 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4975 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 4976 PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2)); 4977 if (pcbddc->benign_n) { 4978 for (i = 0; i < n_constraints; i++) { PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); } 4979 } 4980 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2)); 4981 } 4982 } 4983 PetscCall(MatDestroy(&B)); 4984 } 4985 4986 /* assemble sparse coarse basis functions */ 4987 if (coarse_phi_multi) { 4988 Mat T; 4989 4990 PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T)); 4991 PetscCall(MatDestroy(&coarse_phi_multi)); 4992 PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B)); 4993 if (pcbddc->switch_static || pcbddc->dbg_flag) { PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); } 4994 PetscCall(MatDestroy(&T)); 4995 } 4996 PetscCall(MatDestroy(&local_auxmat2_R)); 4997 PetscCall(PetscFree(p0_lidx_I)); 4998 4999 /* coarse matrix entries relative to B_0 */ 5000 if (pcbddc->benign_n) { 5001 Mat B0_B, B0_BPHI; 5002 IS is_dummy; 5003 const PetscScalar *data; 5004 PetscInt j; 5005 5006 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5007 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5008 PetscCall(ISDestroy(&is_dummy)); 5009 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5010 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5011 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 5012 for (j = 0; j < pcbddc->benign_n; j++) { 5013 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5014 for (i = 0; i < pcbddc->local_primal_size; i++) { 5015 PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5016 PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5017 } 5018 } 5019 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 5020 PetscCall(MatDestroy(&B0_B)); 5021 PetscCall(MatDestroy(&B0_BPHI)); 5022 } 5023 5024 /* compute other basis functions for non-symmetric problems */ 5025 if (!pcbddc->symmetric_primal) { 5026 Mat B_V = NULL, B_C = NULL; 5027 PetscScalar *marray, *work; 5028 5029 /* TODO multi_element MatDenseScatter */ 5030 if (n_constraints) { 5031 Mat S_CCT, C_CRT; 5032 5033 PetscCall(MatScale(S_CC, m_one)); 5034 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 5035 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 5036 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C)); 5037 PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C)); 5038 PetscCall(MatDestroy(&S_CCT)); 5039 if (n_vertices) { 5040 Mat S_VCT; 5041 5042 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 5043 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V)); 5044 PetscCall(MatDestroy(&S_VCT)); 5045 PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V)); 5046 } 5047 PetscCall(MatDestroy(&C_CRT)); 5048 } else { 5049 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 5050 } 5051 if (n_vertices && n_R) { 5052 PetscScalar *av, *marray; 5053 const PetscInt *xadj, *adjncy; 5054 PetscInt n; 5055 PetscBool flg_row; 5056 5057 /* B_V = B_V - A_VR^T */ 5058 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 5059 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5060 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 5061 PetscCall(MatDenseGetArray(B_V, &marray)); 5062 for (i = 0; i < n; i++) { 5063 PetscInt j; 5064 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 5065 } 5066 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5067 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5068 PetscCall(MatDestroy(&A_VR)); 5069 } 5070 5071 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 5072 PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work)); 5073 if (n_vertices) { 5074 PetscCall(MatDenseGetArray(B_V, &marray)); 5075 for (i = 0; i < n_vertices; i++) { 5076 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 5077 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5078 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5079 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5080 PetscCall(VecResetArray(pcbddc->vec1_R)); 5081 PetscCall(VecResetArray(pcbddc->vec2_R)); 5082 } 5083 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5084 } 5085 if (B_C) { 5086 PetscCall(MatDenseGetArray(B_C, &marray)); 5087 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 5088 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 5089 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5090 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5091 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5092 PetscCall(VecResetArray(pcbddc->vec1_R)); 5093 PetscCall(VecResetArray(pcbddc->vec2_R)); 5094 } 5095 PetscCall(MatDenseRestoreArray(B_C, &marray)); 5096 } 5097 /* coarse basis functions */ 5098 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B)); 5099 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D)); 5100 for (i = 0; i < pcbddc->local_primal_size; i++) { 5101 Vec v; 5102 5103 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 5104 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 5105 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5106 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5107 if (i < n_vertices) { 5108 PetscScalar one = 1.0; 5109 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 5110 PetscCall(VecAssemblyBegin(v)); 5111 PetscCall(VecAssemblyEnd(v)); 5112 } 5113 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 5114 5115 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5116 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 5117 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5118 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5119 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 5120 } 5121 PetscCall(VecResetArray(pcbddc->vec1_R)); 5122 } 5123 PetscCall(MatDestroy(&B_V)); 5124 PetscCall(MatDestroy(&B_C)); 5125 PetscCall(PetscFree(work)); 5126 } else { 5127 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 5128 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 5129 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 5130 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 5131 } 5132 PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5133 PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5134 5135 /* free memory */ 5136 PetscCall(PetscFree(V_to_eff_V)); 5137 PetscCall(PetscFree(C_to_eff_C)); 5138 PetscCall(PetscFree(R_eff_V_J)); 5139 PetscCall(PetscFree(R_eff_C_J)); 5140 PetscCall(PetscFree(B_eff_V_J)); 5141 PetscCall(PetscFree(B_eff_C_J)); 5142 PetscCall(ISDestroy(&is_R)); 5143 PetscCall(ISRestoreIndices(is_V, &idx_V)); 5144 PetscCall(ISRestoreIndices(is_C, &idx_C)); 5145 PetscCall(ISDestroy(&is_V)); 5146 PetscCall(ISDestroy(&is_C)); 5147 PetscCall(PetscFree(idx_V_B)); 5148 PetscCall(MatDestroy(&S_CV)); 5149 PetscCall(MatDestroy(&S_VC)); 5150 PetscCall(MatDestroy(&S_CC)); 5151 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 5152 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 5153 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 5154 5155 /* Checking coarse_sub_mat and coarse basis functions */ 5156 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5157 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5158 if (pcbddc->dbg_flag) { 5159 Mat AUXMAT, TM1, TM2, TM3, TM4; 5160 Mat coarse_phi_D, coarse_phi_B; 5161 Mat coarse_psi_D, coarse_psi_B; 5162 Mat A_II, A_BB, A_IB, A_BI; 5163 Mat C_B, CPHI; 5164 IS is_dummy; 5165 Vec mones; 5166 MatType checkmattype = MATSEQAIJ; 5167 PetscReal real_value; 5168 5169 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5170 Mat A; 5171 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 5172 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 5173 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 5174 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 5175 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 5176 PetscCall(MatDestroy(&A)); 5177 } else { 5178 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 5179 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 5180 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 5181 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 5182 } 5183 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 5184 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 5185 if (!pcbddc->symmetric_primal) { 5186 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 5187 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 5188 } 5189 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5190 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 5191 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5192 if (!pcbddc->symmetric_primal) { 5193 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5194 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5195 PetscCall(MatDestroy(&AUXMAT)); 5196 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5197 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5198 PetscCall(MatDestroy(&AUXMAT)); 5199 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5200 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5201 PetscCall(MatDestroy(&AUXMAT)); 5202 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5203 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5204 PetscCall(MatDestroy(&AUXMAT)); 5205 } else { 5206 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5207 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5208 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5209 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5210 PetscCall(MatDestroy(&AUXMAT)); 5211 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5212 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5213 PetscCall(MatDestroy(&AUXMAT)); 5214 } 5215 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 5216 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 5217 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 5218 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 5219 if (pcbddc->benign_n) { 5220 Mat B0_B, B0_BPHI; 5221 const PetscScalar *data2; 5222 PetscScalar *data; 5223 PetscInt j; 5224 5225 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5226 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5227 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5228 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5229 PetscCall(MatDenseGetArray(TM1, &data)); 5230 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 5231 for (j = 0; j < pcbddc->benign_n; j++) { 5232 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5233 for (i = 0; i < pcbddc->local_primal_size; i++) { 5234 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 5235 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 5236 } 5237 } 5238 PetscCall(MatDenseRestoreArray(TM1, &data)); 5239 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 5240 PetscCall(MatDestroy(&B0_B)); 5241 PetscCall(ISDestroy(&is_dummy)); 5242 PetscCall(MatDestroy(&B0_BPHI)); 5243 } 5244 PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN)); 5245 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 5246 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5247 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5248 5249 /* check constraints */ 5250 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 5251 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 5252 if (!pcbddc->benign_n) { /* TODO: add benign case */ 5253 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5254 } else { 5255 PetscScalar *data; 5256 Mat tmat; 5257 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 5258 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 5259 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 5260 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5261 PetscCall(MatDestroy(&tmat)); 5262 } 5263 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 5264 PetscCall(VecSet(mones, -1.0)); 5265 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5266 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5267 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5268 if (!pcbddc->symmetric_primal) { 5269 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 5270 PetscCall(VecSet(mones, -1.0)); 5271 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5272 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5273 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5274 } 5275 PetscCall(MatDestroy(&C_B)); 5276 PetscCall(MatDestroy(&CPHI)); 5277 PetscCall(ISDestroy(&is_dummy)); 5278 PetscCall(VecDestroy(&mones)); 5279 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5280 PetscCall(MatDestroy(&A_II)); 5281 PetscCall(MatDestroy(&A_BB)); 5282 PetscCall(MatDestroy(&A_IB)); 5283 PetscCall(MatDestroy(&A_BI)); 5284 PetscCall(MatDestroy(&TM1)); 5285 PetscCall(MatDestroy(&TM2)); 5286 PetscCall(MatDestroy(&TM3)); 5287 PetscCall(MatDestroy(&TM4)); 5288 PetscCall(MatDestroy(&coarse_phi_D)); 5289 PetscCall(MatDestroy(&coarse_phi_B)); 5290 if (!pcbddc->symmetric_primal) { 5291 PetscCall(MatDestroy(&coarse_psi_D)); 5292 PetscCall(MatDestroy(&coarse_psi_B)); 5293 } 5294 } 5295 5296 #if 0 5297 { 5298 PetscViewer viewer; 5299 char filename[256]; 5300 5301 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 5302 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 5303 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 5304 PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat")); 5305 PetscCall(MatView(*coarse_submat,viewer)); 5306 if (pcbddc->coarse_phi_B) { 5307 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 5308 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 5309 } 5310 if (pcbddc->coarse_phi_D) { 5311 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 5312 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 5313 } 5314 if (pcbddc->coarse_psi_B) { 5315 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 5316 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 5317 } 5318 if (pcbddc->coarse_psi_D) { 5319 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 5320 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 5321 } 5322 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 5323 PetscCall(MatView(pcbddc->local_mat,viewer)); 5324 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 5325 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 5326 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 5327 PetscCall(ISView(pcis->is_I_local,viewer)); 5328 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 5329 PetscCall(ISView(pcis->is_B_local,viewer)); 5330 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 5331 PetscCall(ISView(pcbddc->is_R_local,viewer)); 5332 PetscCall(PetscViewerDestroy(&viewer)); 5333 } 5334 #endif 5335 5336 /* device support */ 5337 { 5338 PetscBool iscuda, iship, iskokkos; 5339 MatType mtype = NULL; 5340 5341 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, "")); 5342 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, "")); 5343 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, "")); 5344 if (iskokkos) { 5345 if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE; 5346 else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE; 5347 } 5348 if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP); 5349 else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP; 5350 else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA; 5351 if (mtype) { 5352 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 5353 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 5354 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 5355 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 5356 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 5357 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 5358 } 5359 } 5360 PetscFunctionReturn(PETSC_SUCCESS); 5361 } 5362 5363 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 5364 { 5365 Mat *work_mat; 5366 IS isrow_s, iscol_s; 5367 PetscBool rsorted, csorted; 5368 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 5369 5370 PetscFunctionBegin; 5371 PetscCall(ISSorted(isrow, &rsorted)); 5372 PetscCall(ISSorted(iscol, &csorted)); 5373 PetscCall(ISGetLocalSize(isrow, &rsize)); 5374 PetscCall(ISGetLocalSize(iscol, &csize)); 5375 5376 if (!rsorted) { 5377 const PetscInt *idxs; 5378 PetscInt *idxs_sorted, i; 5379 5380 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 5381 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 5382 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 5383 PetscCall(ISGetIndices(isrow, &idxs)); 5384 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 5385 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 5386 PetscCall(ISRestoreIndices(isrow, &idxs)); 5387 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 5388 } else { 5389 PetscCall(PetscObjectReference((PetscObject)isrow)); 5390 isrow_s = isrow; 5391 } 5392 5393 if (!csorted) { 5394 if (isrow == iscol) { 5395 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 5396 iscol_s = isrow_s; 5397 } else { 5398 const PetscInt *idxs; 5399 PetscInt *idxs_sorted, i; 5400 5401 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 5402 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 5403 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 5404 PetscCall(ISGetIndices(iscol, &idxs)); 5405 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 5406 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5407 PetscCall(ISRestoreIndices(iscol, &idxs)); 5408 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 5409 } 5410 } else { 5411 PetscCall(PetscObjectReference((PetscObject)iscol)); 5412 iscol_s = iscol; 5413 } 5414 5415 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 5416 5417 if (!rsorted || !csorted) { 5418 Mat new_mat; 5419 IS is_perm_r, is_perm_c; 5420 5421 if (!rsorted) { 5422 PetscInt *idxs_r, i; 5423 PetscCall(PetscMalloc1(rsize, &idxs_r)); 5424 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 5425 PetscCall(PetscFree(idxs_perm_r)); 5426 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 5427 } else { 5428 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 5429 } 5430 PetscCall(ISSetPermutation(is_perm_r)); 5431 5432 if (!csorted) { 5433 if (isrow_s == iscol_s) { 5434 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5435 is_perm_c = is_perm_r; 5436 } else { 5437 PetscInt *idxs_c, i; 5438 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 5439 PetscCall(PetscMalloc1(csize, &idxs_c)); 5440 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 5441 PetscCall(PetscFree(idxs_perm_c)); 5442 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 5443 } 5444 } else { 5445 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 5446 } 5447 PetscCall(ISSetPermutation(is_perm_c)); 5448 5449 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 5450 PetscCall(MatDestroy(&work_mat[0])); 5451 work_mat[0] = new_mat; 5452 PetscCall(ISDestroy(&is_perm_r)); 5453 PetscCall(ISDestroy(&is_perm_c)); 5454 } 5455 5456 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5457 *B = work_mat[0]; 5458 PetscCall(MatDestroyMatrices(1, &work_mat)); 5459 PetscCall(ISDestroy(&isrow_s)); 5460 PetscCall(ISDestroy(&iscol_s)); 5461 PetscFunctionReturn(PETSC_SUCCESS); 5462 } 5463 5464 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5465 { 5466 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5467 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5468 Mat new_mat, lA; 5469 IS is_local, is_global; 5470 PetscInt local_size; 5471 PetscBool isseqaij, issym, isset; 5472 5473 PetscFunctionBegin; 5474 PetscCall(MatDestroy(&pcbddc->local_mat)); 5475 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 5476 if (pcbddc->mat_graph->multi_element) { 5477 Mat *mats, *bdiags; 5478 IS *gsubs; 5479 PetscInt nsubs = pcbddc->n_local_subs; 5480 5481 PetscCall(PetscCalloc1(nsubs * nsubs, &mats)); 5482 PetscCall(PetscMalloc1(nsubs, &gsubs)); 5483 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i])); 5484 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags)); 5485 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i])); 5486 PetscCall(PetscFree(gsubs)); 5487 5488 for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i]; 5489 PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat)); 5490 PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat)); 5491 PetscCall(MatDestroySubMatrices(nsubs, &bdiags)); 5492 PetscCall(PetscFree(mats)); 5493 } else { 5494 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5495 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5496 PetscCall(ISDestroy(&is_local)); 5497 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 5498 PetscCall(ISDestroy(&is_global)); 5499 } 5500 if (pcbddc->dbg_flag) { 5501 Vec x, x_change; 5502 PetscReal error; 5503 5504 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 5505 PetscCall(VecSetRandom(x, NULL)); 5506 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 5507 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5508 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5509 PetscCall(MatMult(new_mat, matis->x, matis->y)); 5510 if (!pcbddc->change_interior) { 5511 const PetscScalar *x, *y, *v; 5512 PetscReal lerror = 0.; 5513 PetscInt i; 5514 5515 PetscCall(VecGetArrayRead(matis->x, &x)); 5516 PetscCall(VecGetArrayRead(matis->y, &y)); 5517 PetscCall(VecGetArrayRead(matis->counter, &v)); 5518 for (i = 0; i < local_size; i++) 5519 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 5520 PetscCall(VecRestoreArrayRead(matis->x, &x)); 5521 PetscCall(VecRestoreArrayRead(matis->y, &y)); 5522 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 5523 PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 5524 if (error > PETSC_SMALL) { 5525 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5526 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 5527 } else { 5528 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 5529 } 5530 } 5531 } 5532 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5533 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5534 PetscCall(VecAXPY(x, -1.0, x_change)); 5535 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 5536 if (error > PETSC_SMALL) { 5537 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5538 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 5539 } else { 5540 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 5541 } 5542 } 5543 PetscCall(VecDestroy(&x)); 5544 PetscCall(VecDestroy(&x_change)); 5545 } 5546 5547 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5548 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 5549 5550 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5551 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 5552 if (isseqaij) { 5553 PetscCall(MatDestroy(&pcbddc->local_mat)); 5554 PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5555 if (lA) { 5556 Mat work; 5557 PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5558 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5559 PetscCall(MatDestroy(&work)); 5560 } 5561 } else { 5562 Mat work_mat; 5563 5564 PetscCall(MatDestroy(&pcbddc->local_mat)); 5565 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5566 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5567 PetscCall(MatDestroy(&work_mat)); 5568 if (lA) { 5569 Mat work; 5570 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5571 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5572 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5573 PetscCall(MatDestroy(&work)); 5574 } 5575 } 5576 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 5577 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 5578 PetscCall(MatDestroy(&new_mat)); 5579 PetscFunctionReturn(PETSC_SUCCESS); 5580 } 5581 5582 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5583 { 5584 PC_IS *pcis = (PC_IS *)pc->data; 5585 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5586 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5587 PetscInt *idx_R_local = NULL; 5588 PetscInt n_vertices, i, j, n_R, n_D, n_B; 5589 PetscInt vbs, bs; 5590 PetscBT bitmask = NULL; 5591 5592 PetscFunctionBegin; 5593 /* 5594 No need to setup local scatters if 5595 - primal space is unchanged 5596 AND 5597 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5598 AND 5599 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5600 */ 5601 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 5602 /* destroy old objects */ 5603 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5604 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5605 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5606 /* Set Non-overlapping dimensions */ 5607 n_B = pcis->n_B; 5608 n_D = pcis->n - n_B; 5609 n_vertices = pcbddc->n_vertices; 5610 5611 /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5612 5613 /* create auxiliary bitmask and allocate workspace */ 5614 if (!sub_schurs || !sub_schurs->reuse_solver) { 5615 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5616 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5617 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5618 5619 for (i = 0, n_R = 0; i < pcis->n; i++) { 5620 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5621 } 5622 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5623 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5624 5625 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5626 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5627 } 5628 5629 /* Block code */ 5630 vbs = 1; 5631 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5632 if (bs > 1 && !(n_vertices % bs)) { 5633 PetscBool is_blocked = PETSC_TRUE; 5634 PetscInt *vary; 5635 if (!sub_schurs || !sub_schurs->reuse_solver) { 5636 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5637 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5638 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5639 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5640 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5641 for (i = 0; i < pcis->n / bs; i++) { 5642 if (vary[i] != 0 && vary[i] != bs) { 5643 is_blocked = PETSC_FALSE; 5644 break; 5645 } 5646 } 5647 PetscCall(PetscFree(vary)); 5648 } else { 5649 /* Verify directly the R set */ 5650 for (i = 0; i < n_R / bs; i++) { 5651 PetscInt j, node = idx_R_local[bs * i]; 5652 for (j = 1; j < bs; j++) { 5653 if (node != idx_R_local[bs * i + j] - j) { 5654 is_blocked = PETSC_FALSE; 5655 break; 5656 } 5657 } 5658 } 5659 } 5660 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5661 vbs = bs; 5662 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5663 } 5664 } 5665 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5666 if (sub_schurs && sub_schurs->reuse_solver) { 5667 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5668 5669 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5670 PetscCall(ISDestroy(&reuse_solver->is_R)); 5671 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5672 reuse_solver->is_R = pcbddc->is_R_local; 5673 } else { 5674 PetscCall(PetscFree(idx_R_local)); 5675 } 5676 5677 /* print some info if requested */ 5678 if (pcbddc->dbg_flag) { 5679 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5680 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5681 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5682 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5683 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5684 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5685 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5686 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5687 } 5688 5689 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5690 if (!sub_schurs || !sub_schurs->reuse_solver) { 5691 IS is_aux1, is_aux2; 5692 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5693 5694 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5695 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5696 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5697 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5698 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5699 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5700 for (i = 0, j = 0; i < n_R; i++) { 5701 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5702 } 5703 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5704 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5705 for (i = 0, j = 0; i < n_B; i++) { 5706 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5707 } 5708 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5709 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5710 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5711 PetscCall(ISDestroy(&is_aux1)); 5712 PetscCall(ISDestroy(&is_aux2)); 5713 5714 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5715 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5716 for (i = 0, j = 0; i < n_R; i++) { 5717 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5718 } 5719 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5720 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5721 PetscCall(ISDestroy(&is_aux1)); 5722 } 5723 PetscCall(PetscBTDestroy(&bitmask)); 5724 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5725 } else { 5726 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5727 IS tis; 5728 PetscInt schur_size; 5729 5730 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5731 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5732 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5733 PetscCall(ISDestroy(&tis)); 5734 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5735 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5736 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5737 PetscCall(ISDestroy(&tis)); 5738 } 5739 } 5740 PetscFunctionReturn(PETSC_SUCCESS); 5741 } 5742 5743 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5744 { 5745 MatNullSpace NullSpace; 5746 Mat dmat; 5747 const Vec *nullvecs; 5748 Vec v, v2, *nullvecs2; 5749 VecScatter sct = NULL; 5750 PetscContainer c; 5751 PetscScalar *ddata; 5752 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5753 PetscBool nnsp_has_cnst; 5754 5755 PetscFunctionBegin; 5756 if (!is && !B) { /* MATIS */ 5757 Mat_IS *matis = (Mat_IS *)A->data; 5758 5759 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5760 sct = matis->cctx; 5761 PetscCall(PetscObjectReference((PetscObject)sct)); 5762 } else { 5763 PetscCall(MatGetNullSpace(B, &NullSpace)); 5764 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5765 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5766 } 5767 PetscCall(MatGetNullSpace(A, &NullSpace)); 5768 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5769 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5770 5771 PetscCall(MatCreateVecs(A, &v, NULL)); 5772 PetscCall(MatCreateVecs(B, &v2, NULL)); 5773 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5774 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs)); 5775 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5776 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5777 PetscCall(VecGetBlockSize(v2, &bs)); 5778 PetscCall(VecGetSize(v2, &N)); 5779 PetscCall(VecGetLocalSize(v2, &n)); 5780 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5781 for (k = 0; k < nnsp_size; k++) { 5782 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5783 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5784 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5785 } 5786 if (nnsp_has_cnst) { 5787 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5788 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5789 } 5790 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5791 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5792 5793 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5794 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c)); 5795 PetscCall(PetscContainerSetPointer(c, ddata)); 5796 PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault)); 5797 PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c)); 5798 PetscCall(PetscContainerDestroy(&c)); 5799 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5800 PetscCall(MatDestroy(&dmat)); 5801 5802 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5803 PetscCall(PetscFree(nullvecs2)); 5804 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5805 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5806 PetscCall(VecDestroy(&v)); 5807 PetscCall(VecDestroy(&v2)); 5808 PetscCall(VecScatterDestroy(&sct)); 5809 PetscFunctionReturn(PETSC_SUCCESS); 5810 } 5811 5812 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5813 { 5814 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5815 PC_IS *pcis = (PC_IS *)pc->data; 5816 PC pc_temp; 5817 Mat A_RR; 5818 MatNullSpace nnsp; 5819 MatReuse reuse; 5820 PetscScalar m_one = -1.0; 5821 PetscReal value; 5822 PetscInt n_D, n_R; 5823 PetscBool issbaij, opts, isset, issym; 5824 void (*f)(void) = NULL; 5825 char dir_prefix[256], neu_prefix[256], str_level[16]; 5826 size_t len; 5827 5828 PetscFunctionBegin; 5829 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5830 /* approximate solver, propagate NearNullSpace if needed */ 5831 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5832 MatNullSpace gnnsp1, gnnsp2; 5833 PetscBool lhas, ghas; 5834 5835 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5836 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5837 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5838 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5839 PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5840 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5841 } 5842 5843 /* compute prefixes */ 5844 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5845 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5846 if (!pcbddc->current_level) { 5847 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5848 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5849 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5850 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5851 } else { 5852 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)pcbddc->current_level)); 5853 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5854 len -= 15; /* remove "pc_bddc_coarse_" */ 5855 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5856 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5857 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5858 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5859 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5860 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5861 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5862 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5863 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5864 } 5865 5866 /* DIRICHLET PROBLEM */ 5867 if (dirichlet) { 5868 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5869 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5870 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5871 if (pcbddc->dbg_flag) { 5872 Mat A_IIn; 5873 5874 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5875 PetscCall(MatDestroy(&pcis->A_II)); 5876 pcis->A_II = A_IIn; 5877 } 5878 } 5879 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5880 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 5881 5882 /* Matrix for Dirichlet problem is pcis->A_II */ 5883 n_D = pcis->n - pcis->n_B; 5884 opts = PETSC_FALSE; 5885 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5886 opts = PETSC_TRUE; 5887 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 5888 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel)); 5889 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 5890 /* default */ 5891 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 5892 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 5893 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 5894 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5895 if (issbaij) { 5896 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5897 } else { 5898 PetscCall(PCSetType(pc_temp, PCLU)); 5899 } 5900 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 5901 } 5902 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 5903 PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view")); 5904 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 5905 /* Allow user's customization */ 5906 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5907 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5908 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5909 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 5910 } 5911 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5912 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5913 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5914 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5915 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5916 const PetscInt *idxs; 5917 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5918 5919 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 5920 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 5921 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5922 for (i = 0; i < nl; i++) { 5923 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5924 } 5925 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 5926 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5927 PetscCall(PetscFree(scoords)); 5928 } 5929 if (sub_schurs && sub_schurs->reuse_solver) { 5930 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5931 5932 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 5933 } 5934 5935 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5936 if (!n_D) { 5937 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5938 PetscCall(PCSetType(pc_temp, PCNONE)); 5939 } 5940 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5941 /* set ksp_D into pcis data */ 5942 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5943 PetscCall(KSPDestroy(&pcis->ksp_D)); 5944 pcis->ksp_D = pcbddc->ksp_D; 5945 } 5946 5947 /* NEUMANN PROBLEM */ 5948 A_RR = NULL; 5949 if (neumann) { 5950 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5951 PetscInt ibs, mbs; 5952 PetscBool issbaij, reuse_neumann_solver, isset, issym; 5953 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5954 5955 reuse_neumann_solver = PETSC_FALSE; 5956 if (sub_schurs && sub_schurs->reuse_solver) { 5957 IS iP; 5958 5959 reuse_neumann_solver = PETSC_TRUE; 5960 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 5961 if (iP) reuse_neumann_solver = PETSC_FALSE; 5962 } 5963 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5964 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 5965 if (pcbddc->ksp_R) { /* already created ksp */ 5966 PetscInt nn_R; 5967 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 5968 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5969 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 5970 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5971 PetscCall(KSPReset(pcbddc->ksp_R)); 5972 PetscCall(MatDestroy(&A_RR)); 5973 reuse = MAT_INITIAL_MATRIX; 5974 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5975 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5976 PetscCall(MatDestroy(&A_RR)); 5977 reuse = MAT_INITIAL_MATRIX; 5978 } else { /* safe to reuse the matrix */ 5979 reuse = MAT_REUSE_MATRIX; 5980 } 5981 } 5982 /* last check */ 5983 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5984 PetscCall(MatDestroy(&A_RR)); 5985 reuse = MAT_INITIAL_MATRIX; 5986 } 5987 } else { /* first time, so we need to create the matrix */ 5988 reuse = MAT_INITIAL_MATRIX; 5989 } 5990 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5991 TODO: Get Rid of these conversions */ 5992 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 5993 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 5994 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 5995 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5996 if (matis->A == pcbddc->local_mat) { 5997 PetscCall(MatDestroy(&pcbddc->local_mat)); 5998 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5999 } else { 6000 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6001 } 6002 } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */ 6003 if (matis->A == pcbddc->local_mat) { 6004 PetscCall(MatDestroy(&pcbddc->local_mat)); 6005 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 6006 } else { 6007 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6008 } 6009 } 6010 /* extract A_RR */ 6011 if (reuse_neumann_solver) { 6012 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6013 6014 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 6015 PetscCall(MatDestroy(&A_RR)); 6016 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 6017 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 6018 } else { 6019 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 6020 } 6021 } else { 6022 PetscCall(MatDestroy(&A_RR)); 6023 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 6024 PetscCall(PetscObjectReference((PetscObject)A_RR)); 6025 } 6026 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 6027 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 6028 } 6029 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 6030 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 6031 opts = PETSC_FALSE; 6032 if (!pcbddc->ksp_R) { /* create object if not present */ 6033 opts = PETSC_TRUE; 6034 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 6035 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel)); 6036 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 6037 /* default */ 6038 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 6039 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 6040 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6041 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 6042 if (issbaij) { 6043 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 6044 } else { 6045 PetscCall(PCSetType(pc_temp, PCLU)); 6046 } 6047 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 6048 } 6049 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 6050 PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view")); 6051 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 6052 if (opts) { /* Allow user's customization once */ 6053 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 6054 } 6055 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6056 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 6057 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 6058 } 6059 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6060 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6061 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 6062 if (f && pcbddc->mat_graph->cloc && !nnsp) { 6063 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 6064 const PetscInt *idxs; 6065 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 6066 6067 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 6068 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 6069 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 6070 for (i = 0; i < nl; i++) { 6071 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 6072 } 6073 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 6074 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 6075 PetscCall(PetscFree(scoords)); 6076 } 6077 6078 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 6079 if (!n_R) { 6080 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6081 PetscCall(PCSetType(pc_temp, PCNONE)); 6082 } 6083 /* Reuse solver if it is present */ 6084 if (reuse_neumann_solver) { 6085 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6086 6087 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 6088 } 6089 PetscCall(KSPSetUp(pcbddc->ksp_R)); 6090 } 6091 6092 if (pcbddc->dbg_flag) { 6093 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6094 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6095 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 6096 } 6097 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 6098 6099 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 6100 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 6101 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 6102 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 6103 /* check Dirichlet and Neumann solvers */ 6104 if (pcbddc->dbg_flag) { 6105 if (dirichlet) { /* Dirichlet */ 6106 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 6107 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 6108 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 6109 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 6110 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 6111 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 6112 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value)); 6113 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6114 } 6115 if (neumann) { /* Neumann */ 6116 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 6117 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 6118 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 6119 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 6120 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 6121 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 6122 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value)); 6123 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6124 } 6125 } 6126 /* free Neumann problem's matrix */ 6127 PetscCall(MatDestroy(&A_RR)); 6128 PetscFunctionReturn(PETSC_SUCCESS); 6129 } 6130 6131 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 6132 { 6133 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6134 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6135 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 6136 6137 PetscFunctionBegin; 6138 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 6139 if (!pcbddc->switch_static) { 6140 if (applytranspose && pcbddc->local_auxmat1) { 6141 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 6142 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6143 } 6144 if (!reuse_solver) { 6145 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6146 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6147 } else { 6148 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6149 6150 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6151 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6152 } 6153 } else { 6154 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6155 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6156 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6157 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6158 if (applytranspose && pcbddc->local_auxmat1) { 6159 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 6160 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6161 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6162 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6163 } 6164 } 6165 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6166 if (!reuse_solver || pcbddc->switch_static) { 6167 if (applytranspose) { 6168 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6169 } else { 6170 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6171 } 6172 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 6173 } else { 6174 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6175 6176 if (applytranspose) { 6177 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6178 } else { 6179 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6180 } 6181 } 6182 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6183 PetscCall(VecSet(inout_B, 0.)); 6184 if (!pcbddc->switch_static) { 6185 if (!reuse_solver) { 6186 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6187 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6188 } else { 6189 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6190 6191 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6192 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6193 } 6194 if (!applytranspose && pcbddc->local_auxmat1) { 6195 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6196 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 6197 } 6198 } else { 6199 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6200 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6201 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6202 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6203 if (!applytranspose && pcbddc->local_auxmat1) { 6204 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6205 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 6206 } 6207 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6208 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6209 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6210 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6211 } 6212 PetscFunctionReturn(PETSC_SUCCESS); 6213 } 6214 6215 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 6216 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 6217 { 6218 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6219 PC_IS *pcis = (PC_IS *)pc->data; 6220 const PetscScalar zero = 0.0; 6221 6222 PetscFunctionBegin; 6223 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 6224 if (!pcbddc->benign_apply_coarse_only) { 6225 if (applytranspose) { 6226 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 6227 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6228 } else { 6229 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 6230 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6231 } 6232 } else { 6233 PetscCall(VecSet(pcbddc->vec1_P, zero)); 6234 } 6235 6236 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 6237 if (pcbddc->benign_n) { 6238 PetscScalar *array; 6239 PetscInt j; 6240 6241 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6242 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 6243 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6244 } 6245 6246 /* start communications from local primal nodes to rhs of coarse solver */ 6247 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 6248 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 6249 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 6250 6251 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 6252 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6253 if (pcbddc->coarse_ksp) { 6254 Mat coarse_mat; 6255 Vec rhs, sol; 6256 MatNullSpace nullsp; 6257 PetscBool isbddc = PETSC_FALSE; 6258 6259 if (pcbddc->benign_have_null) { 6260 PC coarse_pc; 6261 6262 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6263 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 6264 /* we need to propagate to coarser levels the need for a possible benign correction */ 6265 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 6266 PC_BDDC *coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6267 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 6268 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 6269 } 6270 } 6271 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 6272 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 6273 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 6274 if (applytranspose) { 6275 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 6276 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 6277 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6278 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 6279 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6280 } else { 6281 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 6282 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 6283 PC coarse_pc; 6284 6285 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 6286 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6287 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 6288 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 6289 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 6290 } else { 6291 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 6292 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6293 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6294 } 6295 } 6296 /* we don't need the benign correction at coarser levels anymore */ 6297 if (pcbddc->benign_have_null && isbddc) { 6298 PC coarse_pc; 6299 PC_BDDC *coarsepcbddc; 6300 6301 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6302 coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6303 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 6304 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 6305 } 6306 } 6307 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6308 6309 /* Local solution on R nodes */ 6310 if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 6311 /* communications from coarse sol to local primal nodes */ 6312 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 6313 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 6314 6315 /* Sum contributions from the two levels */ 6316 if (!pcbddc->benign_apply_coarse_only) { 6317 if (applytranspose) { 6318 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6319 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6320 } else { 6321 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6322 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6323 } 6324 /* store p0 */ 6325 if (pcbddc->benign_n) { 6326 PetscScalar *array; 6327 PetscInt j; 6328 6329 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6330 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 6331 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6332 } 6333 } else { /* expand the coarse solution */ 6334 if (applytranspose) { 6335 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 6336 } else { 6337 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 6338 } 6339 } 6340 PetscFunctionReturn(PETSC_SUCCESS); 6341 } 6342 6343 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 6344 { 6345 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6346 Vec from, to; 6347 const PetscScalar *array; 6348 6349 PetscFunctionBegin; 6350 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6351 from = pcbddc->coarse_vec; 6352 to = pcbddc->vec1_P; 6353 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6354 Vec tvec; 6355 6356 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6357 PetscCall(VecResetArray(tvec)); 6358 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 6359 PetscCall(VecGetArrayRead(tvec, &array)); 6360 PetscCall(VecPlaceArray(from, array)); 6361 PetscCall(VecRestoreArrayRead(tvec, &array)); 6362 } 6363 } else { /* from local to global -> put data in coarse right-hand side */ 6364 from = pcbddc->vec1_P; 6365 to = pcbddc->coarse_vec; 6366 } 6367 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6368 PetscFunctionReturn(PETSC_SUCCESS); 6369 } 6370 6371 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6372 { 6373 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6374 Vec from, to; 6375 const PetscScalar *array; 6376 6377 PetscFunctionBegin; 6378 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6379 from = pcbddc->coarse_vec; 6380 to = pcbddc->vec1_P; 6381 } else { /* from local to global -> put data in coarse right-hand side */ 6382 from = pcbddc->vec1_P; 6383 to = pcbddc->coarse_vec; 6384 } 6385 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6386 if (smode == SCATTER_FORWARD) { 6387 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6388 Vec tvec; 6389 6390 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6391 PetscCall(VecGetArrayRead(to, &array)); 6392 PetscCall(VecPlaceArray(tvec, array)); 6393 PetscCall(VecRestoreArrayRead(to, &array)); 6394 } 6395 } else { 6396 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6397 PetscCall(VecResetArray(from)); 6398 } 6399 } 6400 PetscFunctionReturn(PETSC_SUCCESS); 6401 } 6402 6403 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6404 { 6405 PC_IS *pcis = (PC_IS *)pc->data; 6406 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6407 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6408 /* one and zero */ 6409 PetscScalar one = 1.0, zero = 0.0; 6410 /* space to store constraints and their local indices */ 6411 PetscScalar *constraints_data; 6412 PetscInt *constraints_idxs, *constraints_idxs_B; 6413 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 6414 PetscInt *constraints_n; 6415 /* iterators */ 6416 PetscInt i, j, k, total_counts, total_counts_cc, cum; 6417 /* BLAS integers */ 6418 PetscBLASInt lwork, lierr; 6419 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 6420 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 6421 /* reuse */ 6422 PetscInt olocal_primal_size, olocal_primal_size_cc; 6423 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 6424 /* change of basis */ 6425 PetscBool qr_needed; 6426 PetscBT change_basis, qr_needed_idx; 6427 /* auxiliary stuff */ 6428 PetscInt *nnz, *is_indices; 6429 PetscInt ncc; 6430 /* some quantities */ 6431 PetscInt n_vertices, total_primal_vertices, valid_constraints; 6432 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 6433 PetscReal tol; /* tolerance for retaining eigenmodes */ 6434 6435 PetscFunctionBegin; 6436 tol = PetscSqrtReal(PETSC_SMALL); 6437 /* Destroy Mat objects computed previously */ 6438 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6439 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6440 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6441 /* save info on constraints from previous setup (if any) */ 6442 olocal_primal_size = pcbddc->local_primal_size; 6443 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6444 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 6445 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 6446 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 6447 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 6448 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6449 6450 if (!pcbddc->adaptive_selection) { 6451 IS ISForVertices, *ISForFaces, *ISForEdges; 6452 MatNullSpace nearnullsp; 6453 const Vec *nearnullvecs; 6454 Vec *localnearnullsp; 6455 PetscScalar *array; 6456 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 6457 PetscBool nnsp_has_cnst; 6458 /* LAPACK working arrays for SVD or POD */ 6459 PetscBool skip_lapack, boolforchange; 6460 PetscScalar *work; 6461 PetscReal *singular_vals; 6462 #if defined(PETSC_USE_COMPLEX) 6463 PetscReal *rwork; 6464 #endif 6465 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 6466 PetscBLASInt dummy_int = 1; 6467 PetscScalar dummy_scalar = 1.; 6468 PetscBool use_pod = PETSC_FALSE; 6469 6470 /* MKL SVD with same input gives different results on different processes! */ 6471 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6472 use_pod = PETSC_TRUE; 6473 #endif 6474 /* Get index sets for faces, edges and vertices from graph */ 6475 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 6476 o_nf = n_ISForFaces; 6477 o_ne = n_ISForEdges; 6478 n_vertices = 0; 6479 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 6480 /* print some info */ 6481 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6482 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6483 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 6484 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6485 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6486 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 6487 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 6488 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 6489 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6490 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6491 } 6492 6493 if (!pcbddc->use_vertices) n_vertices = 0; 6494 if (!pcbddc->use_edges) n_ISForEdges = 0; 6495 if (!pcbddc->use_faces) n_ISForFaces = 0; 6496 6497 /* check if near null space is attached to global mat */ 6498 if (pcbddc->use_nnsp) { 6499 PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 6500 } else nearnullsp = NULL; 6501 6502 if (nearnullsp) { 6503 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 6504 /* remove any stored info */ 6505 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6506 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6507 /* store information for BDDC solver reuse */ 6508 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6509 pcbddc->onearnullspace = nearnullsp; 6510 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 6511 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 6512 } else { /* if near null space is not provided BDDC uses constants by default */ 6513 nnsp_size = 0; 6514 nnsp_has_cnst = PETSC_TRUE; 6515 } 6516 /* get max number of constraints on a single cc */ 6517 max_constraints = nnsp_size; 6518 if (nnsp_has_cnst) max_constraints++; 6519 6520 /* 6521 Evaluate maximum storage size needed by the procedure 6522 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6523 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6524 There can be multiple constraints per connected component 6525 */ 6526 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 6527 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 6528 6529 total_counts = n_ISForFaces + n_ISForEdges; 6530 total_counts *= max_constraints; 6531 total_counts += n_vertices; 6532 PetscCall(PetscBTCreate(total_counts, &change_basis)); 6533 6534 total_counts = 0; 6535 max_size_of_constraint = 0; 6536 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 6537 IS used_is; 6538 if (i < n_ISForEdges) { 6539 used_is = ISForEdges[i]; 6540 } else { 6541 used_is = ISForFaces[i - n_ISForEdges]; 6542 } 6543 PetscCall(ISGetSize(used_is, &j)); 6544 total_counts += j; 6545 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 6546 } 6547 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 6548 6549 /* get local part of global near null space vectors */ 6550 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 6551 for (k = 0; k < nnsp_size; k++) { 6552 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 6553 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6554 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6555 } 6556 6557 /* whether or not to skip lapack calls */ 6558 skip_lapack = PETSC_TRUE; 6559 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6560 6561 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6562 if (!skip_lapack) { 6563 PetscScalar temp_work; 6564 6565 if (use_pod) { 6566 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6567 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 6568 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 6569 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 6570 #if defined(PETSC_USE_COMPLEX) 6571 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 6572 #endif 6573 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6574 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6575 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 6576 lwork = -1; 6577 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6578 #if !defined(PETSC_USE_COMPLEX) 6579 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 6580 #else 6581 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 6582 #endif 6583 PetscCall(PetscFPTrapPop()); 6584 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr); 6585 } else { 6586 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6587 /* SVD */ 6588 PetscInt max_n, min_n; 6589 max_n = max_size_of_constraint; 6590 min_n = max_constraints; 6591 if (max_size_of_constraint < max_constraints) { 6592 min_n = max_size_of_constraint; 6593 max_n = max_constraints; 6594 } 6595 PetscCall(PetscMalloc1(min_n, &singular_vals)); 6596 #if defined(PETSC_USE_COMPLEX) 6597 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 6598 #endif 6599 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6600 lwork = -1; 6601 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 6602 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 6603 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 6604 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6605 #if !defined(PETSC_USE_COMPLEX) 6606 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6607 #else 6608 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6609 #endif 6610 PetscCall(PetscFPTrapPop()); 6611 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr); 6612 #else 6613 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6614 #endif /* on missing GESVD */ 6615 } 6616 /* Allocate optimal workspace */ 6617 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6618 PetscCall(PetscMalloc1(lwork, &work)); 6619 } 6620 /* Now we can loop on constraining sets */ 6621 total_counts = 0; 6622 constraints_idxs_ptr[0] = 0; 6623 constraints_data_ptr[0] = 0; 6624 /* vertices */ 6625 if (n_vertices) { 6626 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6627 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6628 for (i = 0; i < n_vertices; i++) { 6629 constraints_n[total_counts] = 1; 6630 constraints_data[total_counts] = 1.0; 6631 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6632 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6633 total_counts++; 6634 } 6635 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6636 } 6637 6638 /* edges and faces */ 6639 total_counts_cc = total_counts; 6640 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6641 IS used_is; 6642 PetscBool idxs_copied = PETSC_FALSE; 6643 6644 if (ncc < n_ISForEdges) { 6645 used_is = ISForEdges[ncc]; 6646 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6647 } else { 6648 used_is = ISForFaces[ncc - n_ISForEdges]; 6649 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6650 } 6651 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6652 6653 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6654 if (!size_of_constraint) continue; 6655 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6656 if (nnsp_has_cnst) { 6657 PetscScalar quad_value; 6658 6659 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6660 idxs_copied = PETSC_TRUE; 6661 6662 if (!pcbddc->use_nnsp_true) { 6663 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6664 } else { 6665 quad_value = 1.0; 6666 } 6667 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6668 temp_constraints++; 6669 total_counts++; 6670 } 6671 for (k = 0; k < nnsp_size; k++) { 6672 PetscReal real_value; 6673 PetscScalar *ptr_to_data; 6674 6675 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6676 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6677 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6678 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6679 /* check if array is null on the connected component */ 6680 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6681 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6682 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6683 temp_constraints++; 6684 total_counts++; 6685 if (!idxs_copied) { 6686 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6687 idxs_copied = PETSC_TRUE; 6688 } 6689 } 6690 } 6691 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6692 valid_constraints = temp_constraints; 6693 if (!pcbddc->use_nnsp_true && temp_constraints) { 6694 if (temp_constraints == 1) { /* just normalize the constraint */ 6695 PetscScalar norm, *ptr_to_data; 6696 6697 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6698 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6699 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6700 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6701 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6702 } else { /* perform SVD */ 6703 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6704 6705 if (use_pod) { 6706 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6707 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6708 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6709 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6710 from that computed using LAPACKgesvd 6711 -> This is due to a different computation of eigenvectors in LAPACKheev 6712 -> The quality of the POD-computed basis will be the same */ 6713 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6714 /* Store upper triangular part of correlation matrix */ 6715 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6716 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6717 for (j = 0; j < temp_constraints; j++) { 6718 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6719 } 6720 /* compute eigenvalues and eigenvectors of correlation matrix */ 6721 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6722 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6723 #if !defined(PETSC_USE_COMPLEX) 6724 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6725 #else 6726 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6727 #endif 6728 PetscCall(PetscFPTrapPop()); 6729 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr); 6730 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6731 j = 0; 6732 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6733 total_counts = total_counts - j; 6734 valid_constraints = temp_constraints - j; 6735 /* scale and copy POD basis into used quadrature memory */ 6736 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6737 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6738 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6739 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6740 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6741 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6742 if (j < temp_constraints) { 6743 PetscInt ii; 6744 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6745 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6746 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6747 PetscCall(PetscFPTrapPop()); 6748 for (k = 0; k < temp_constraints - j; k++) { 6749 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6750 } 6751 } 6752 } else { 6753 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6754 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6755 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6756 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6757 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6758 #if !defined(PETSC_USE_COMPLEX) 6759 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6760 #else 6761 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6762 #endif 6763 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 6764 PetscCall(PetscFPTrapPop()); 6765 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6766 k = temp_constraints; 6767 if (k > size_of_constraint) k = size_of_constraint; 6768 j = 0; 6769 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6770 valid_constraints = k - j; 6771 total_counts = total_counts - temp_constraints + valid_constraints; 6772 #else 6773 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6774 #endif /* on missing GESVD */ 6775 } 6776 } 6777 } 6778 /* update pointers information */ 6779 if (valid_constraints) { 6780 constraints_n[total_counts_cc] = valid_constraints; 6781 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6782 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6783 /* set change_of_basis flag */ 6784 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6785 total_counts_cc++; 6786 } 6787 } 6788 /* free workspace */ 6789 if (!skip_lapack) { 6790 PetscCall(PetscFree(work)); 6791 #if defined(PETSC_USE_COMPLEX) 6792 PetscCall(PetscFree(rwork)); 6793 #endif 6794 PetscCall(PetscFree(singular_vals)); 6795 PetscCall(PetscFree(correlation_mat)); 6796 PetscCall(PetscFree(temp_basis)); 6797 } 6798 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6799 PetscCall(PetscFree(localnearnullsp)); 6800 /* free index sets of faces, edges and vertices */ 6801 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6802 } else { 6803 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6804 6805 total_counts = 0; 6806 n_vertices = 0; 6807 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6808 max_constraints = 0; 6809 total_counts_cc = 0; 6810 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6811 total_counts += pcbddc->adaptive_constraints_n[i]; 6812 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6813 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6814 } 6815 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6816 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6817 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6818 constraints_data = pcbddc->adaptive_constraints_data; 6819 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6820 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6821 total_counts_cc = 0; 6822 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6823 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6824 } 6825 6826 max_size_of_constraint = 0; 6827 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6828 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6829 /* Change of basis */ 6830 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6831 if (pcbddc->use_change_of_basis) { 6832 for (i = 0; i < sub_schurs->n_subs; i++) { 6833 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6834 } 6835 } 6836 } 6837 pcbddc->local_primal_size = total_counts; 6838 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6839 6840 /* map constraints_idxs in boundary numbering */ 6841 if (pcbddc->use_change_of_basis) { 6842 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6843 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6844 } 6845 6846 /* Create constraint matrix */ 6847 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6848 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6849 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6850 6851 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6852 /* determine if a QR strategy is needed for change of basis */ 6853 qr_needed = pcbddc->use_qr_single; 6854 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6855 total_primal_vertices = 0; 6856 pcbddc->local_primal_size_cc = 0; 6857 for (i = 0; i < total_counts_cc; i++) { 6858 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6859 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6860 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6861 pcbddc->local_primal_size_cc += 1; 6862 } else if (PetscBTLookup(change_basis, i)) { 6863 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6864 pcbddc->local_primal_size_cc += constraints_n[i]; 6865 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6866 PetscCall(PetscBTSet(qr_needed_idx, i)); 6867 qr_needed = PETSC_TRUE; 6868 } 6869 } else { 6870 pcbddc->local_primal_size_cc += 1; 6871 } 6872 } 6873 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6874 pcbddc->n_vertices = total_primal_vertices; 6875 /* permute indices in order to have a sorted set of vertices */ 6876 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 6877 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 6878 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 6879 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 6880 6881 /* nonzero structure of constraint matrix */ 6882 /* and get reference dof for local constraints */ 6883 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 6884 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 6885 6886 j = total_primal_vertices; 6887 total_counts = total_primal_vertices; 6888 cum = total_primal_vertices; 6889 for (i = n_vertices; i < total_counts_cc; i++) { 6890 if (!PetscBTLookup(change_basis, i)) { 6891 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6892 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6893 cum++; 6894 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6895 for (k = 0; k < constraints_n[i]; k++) { 6896 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6897 nnz[j + k] = size_of_constraint; 6898 } 6899 j += constraints_n[i]; 6900 } 6901 } 6902 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 6903 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6904 PetscCall(PetscFree(nnz)); 6905 6906 /* set values in constraint matrix */ 6907 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 6908 total_counts = total_primal_vertices; 6909 for (i = n_vertices; i < total_counts_cc; i++) { 6910 if (!PetscBTLookup(change_basis, i)) { 6911 PetscInt *cols; 6912 6913 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6914 cols = constraints_idxs + constraints_idxs_ptr[i]; 6915 for (k = 0; k < constraints_n[i]; k++) { 6916 PetscInt row = total_counts + k; 6917 PetscScalar *vals; 6918 6919 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 6920 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 6921 } 6922 total_counts += constraints_n[i]; 6923 } 6924 } 6925 /* assembling */ 6926 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6927 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6928 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 6929 6930 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6931 if (pcbddc->use_change_of_basis) { 6932 /* dual and primal dofs on a single cc */ 6933 PetscInt dual_dofs, primal_dofs; 6934 /* working stuff for GEQRF */ 6935 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 6936 PetscBLASInt lqr_work; 6937 /* working stuff for UNGQR */ 6938 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 6939 PetscBLASInt lgqr_work; 6940 /* working stuff for TRTRS */ 6941 PetscScalar *trs_rhs = NULL; 6942 PetscBLASInt Blas_NRHS; 6943 /* pointers for values insertion into change of basis matrix */ 6944 PetscInt *start_rows, *start_cols; 6945 PetscScalar *start_vals; 6946 /* working stuff for values insertion */ 6947 PetscBT is_primal; 6948 PetscInt *aux_primal_numbering_B; 6949 /* matrix sizes */ 6950 PetscInt global_size, local_size; 6951 /* temporary change of basis */ 6952 Mat localChangeOfBasisMatrix; 6953 /* extra space for debugging */ 6954 PetscScalar *dbg_work = NULL; 6955 6956 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 6957 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 6958 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 6959 /* nonzeros for local mat */ 6960 PetscCall(PetscMalloc1(pcis->n, &nnz)); 6961 if (!pcbddc->benign_change || pcbddc->fake_change) { 6962 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 6963 } else { 6964 const PetscInt *ii; 6965 PetscInt n; 6966 PetscBool flg_row; 6967 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6968 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 6969 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6970 } 6971 for (i = n_vertices; i < total_counts_cc; i++) { 6972 if (PetscBTLookup(change_basis, i)) { 6973 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6974 if (PetscBTLookup(qr_needed_idx, i)) { 6975 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 6976 } else { 6977 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6978 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 6979 } 6980 } 6981 } 6982 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 6983 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6984 PetscCall(PetscFree(nnz)); 6985 /* Set interior change in the matrix */ 6986 if (!pcbddc->benign_change || pcbddc->fake_change) { 6987 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 6988 } else { 6989 const PetscInt *ii, *jj; 6990 PetscScalar *aa; 6991 PetscInt n; 6992 PetscBool flg_row; 6993 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6994 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 6995 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 6996 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 6997 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6998 } 6999 7000 if (pcbddc->dbg_flag) { 7001 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 7002 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 7003 } 7004 7005 /* Now we loop on the constraints which need a change of basis */ 7006 /* 7007 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 7008 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 7009 7010 Basic blocks of change of basis matrix T computed: 7011 7012 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 7013 7014 | 1 0 ... 0 s_1/S | 7015 | 0 1 ... 0 s_2/S | 7016 | ... | 7017 | 0 ... 1 s_{n-1}/S | 7018 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 7019 7020 with S = \sum_{i=1}^n s_i^2 7021 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 7022 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 7023 7024 - QR decomposition of constraints otherwise 7025 */ 7026 if (qr_needed && max_size_of_constraint) { 7027 /* space to store Q */ 7028 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 7029 /* array to store scaling factors for reflectors */ 7030 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 7031 /* first we issue queries for optimal work */ 7032 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7033 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 7034 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7035 lqr_work = -1; 7036 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 7037 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr); 7038 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 7039 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 7040 lgqr_work = -1; 7041 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7042 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 7043 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 7044 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7045 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 7046 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 7047 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr); 7048 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 7049 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 7050 /* array to store rhs and solution of triangular solver */ 7051 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 7052 /* allocating workspace for check */ 7053 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 7054 } 7055 /* array to store whether a node is primal or not */ 7056 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 7057 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 7058 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 7059 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 7060 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 7061 PetscCall(PetscFree(aux_primal_numbering_B)); 7062 7063 /* loop on constraints and see whether or not they need a change of basis and compute it */ 7064 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 7065 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 7066 if (PetscBTLookup(change_basis, total_counts)) { 7067 /* get constraint info */ 7068 primal_dofs = constraints_n[total_counts]; 7069 dual_dofs = size_of_constraint - primal_dofs; 7070 7071 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 7072 7073 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 7074 7075 /* copy quadrature constraints for change of basis check */ 7076 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7077 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 7078 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7079 7080 /* compute QR decomposition of constraints */ 7081 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7082 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7083 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7084 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7085 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 7086 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr); 7087 PetscCall(PetscFPTrapPop()); 7088 7089 /* explicitly compute R^-T */ 7090 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 7091 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 7092 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7093 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 7094 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7095 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7096 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7097 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 7098 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr); 7099 PetscCall(PetscFPTrapPop()); 7100 7101 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 7102 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7103 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7104 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7105 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7106 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7107 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 7108 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr); 7109 PetscCall(PetscFPTrapPop()); 7110 7111 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 7112 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 7113 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 7114 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7115 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7116 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7117 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7118 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7119 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 7120 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7121 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 7122 PetscCall(PetscFPTrapPop()); 7123 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7124 7125 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 7126 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 7127 /* insert cols for primal dofs */ 7128 for (j = 0; j < primal_dofs; j++) { 7129 start_vals = &qr_basis[j * size_of_constraint]; 7130 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7131 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7132 } 7133 /* insert cols for dual dofs */ 7134 for (j = 0, k = 0; j < dual_dofs; k++) { 7135 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 7136 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 7137 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7138 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7139 j++; 7140 } 7141 } 7142 7143 /* check change of basis */ 7144 if (pcbddc->dbg_flag) { 7145 PetscInt ii, jj; 7146 PetscBool valid_qr = PETSC_TRUE; 7147 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 7148 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7149 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 7150 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7151 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 7152 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 7153 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7154 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 7155 PetscCall(PetscFPTrapPop()); 7156 for (jj = 0; jj < size_of_constraint; jj++) { 7157 for (ii = 0; ii < primal_dofs; ii++) { 7158 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 7159 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 7160 } 7161 } 7162 if (!valid_qr) { 7163 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 7164 for (jj = 0; jj < size_of_constraint; jj++) { 7165 for (ii = 0; ii < primal_dofs; ii++) { 7166 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 7167 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7168 } 7169 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 7170 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7171 } 7172 } 7173 } 7174 } else { 7175 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 7176 } 7177 } 7178 } else { /* simple transformation block */ 7179 PetscInt row, col; 7180 PetscScalar val, norm; 7181 7182 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7183 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 7184 for (j = 0; j < size_of_constraint; j++) { 7185 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 7186 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7187 if (!PetscBTLookup(is_primal, row_B)) { 7188 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 7189 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 7190 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 7191 } else { 7192 for (k = 0; k < size_of_constraint; k++) { 7193 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7194 if (row != col) { 7195 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 7196 } else { 7197 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 7198 } 7199 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 7200 } 7201 } 7202 } 7203 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 7204 } 7205 } else { 7206 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 7207 } 7208 } 7209 7210 /* free workspace */ 7211 if (qr_needed) { 7212 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 7213 PetscCall(PetscFree(trs_rhs)); 7214 PetscCall(PetscFree(qr_tau)); 7215 PetscCall(PetscFree(qr_work)); 7216 PetscCall(PetscFree(gqr_work)); 7217 PetscCall(PetscFree(qr_basis)); 7218 } 7219 PetscCall(PetscBTDestroy(&is_primal)); 7220 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7221 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7222 7223 /* assembling of global change of variable */ 7224 if (!pcbddc->fake_change) { 7225 Mat tmat; 7226 PetscInt bs; 7227 7228 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 7229 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 7230 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 7231 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 7232 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 7233 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 7234 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix)); 7235 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ)); 7236 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 7237 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs)); 7238 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size)); 7239 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE)); 7240 PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 7241 PetscCall(MatDestroy(&tmat)); 7242 PetscCall(VecSet(pcis->vec1_global, 0.0)); 7243 PetscCall(VecSet(pcis->vec1_N, 1.0)); 7244 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7245 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7246 PetscCall(VecReciprocal(pcis->vec1_global)); 7247 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 7248 7249 /* check */ 7250 if (pcbddc->dbg_flag) { 7251 PetscReal error; 7252 Vec x, x_change; 7253 7254 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 7255 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 7256 PetscCall(VecSetRandom(x, NULL)); 7257 PetscCall(VecCopy(x, pcis->vec1_global)); 7258 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7259 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7260 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 7261 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7262 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7263 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 7264 PetscCall(VecAXPY(x, -1.0, x_change)); 7265 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 7266 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 7267 PetscCall(VecDestroy(&x)); 7268 PetscCall(VecDestroy(&x_change)); 7269 } 7270 /* adapt sub_schurs computed (if any) */ 7271 if (pcbddc->use_deluxe_scaling) { 7272 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 7273 7274 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 7275 if (sub_schurs && sub_schurs->S_Ej_all) { 7276 Mat S_new, tmat; 7277 IS is_all_N, is_V_Sall = NULL; 7278 7279 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 7280 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 7281 if (pcbddc->deluxe_zerorows) { 7282 ISLocalToGlobalMapping NtoSall; 7283 IS is_V; 7284 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 7285 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 7286 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 7287 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 7288 PetscCall(ISDestroy(&is_V)); 7289 } 7290 PetscCall(ISDestroy(&is_all_N)); 7291 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7292 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 7293 PetscCall(PetscObjectReference((PetscObject)S_new)); 7294 if (pcbddc->deluxe_zerorows) { 7295 const PetscScalar *array; 7296 const PetscInt *idxs_V, *idxs_all; 7297 PetscInt i, n_V; 7298 7299 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7300 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 7301 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 7302 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 7303 PetscCall(VecGetArrayRead(pcis->D, &array)); 7304 for (i = 0; i < n_V; i++) { 7305 PetscScalar val; 7306 PetscInt idx; 7307 7308 idx = idxs_V[i]; 7309 val = array[idxs_all[idxs_V[i]]]; 7310 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 7311 } 7312 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 7313 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 7314 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 7315 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 7316 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 7317 } 7318 sub_schurs->S_Ej_all = S_new; 7319 PetscCall(MatDestroy(&S_new)); 7320 if (sub_schurs->sum_S_Ej_all) { 7321 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7322 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7323 PetscCall(PetscObjectReference((PetscObject)S_new)); 7324 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7325 sub_schurs->sum_S_Ej_all = S_new; 7326 PetscCall(MatDestroy(&S_new)); 7327 } 7328 PetscCall(ISDestroy(&is_V_Sall)); 7329 PetscCall(MatDestroy(&tmat)); 7330 } 7331 /* destroy any change of basis context in sub_schurs */ 7332 if (sub_schurs && sub_schurs->change) { 7333 PetscInt i; 7334 7335 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 7336 PetscCall(PetscFree(sub_schurs->change)); 7337 } 7338 } 7339 if (pcbddc->switch_static) { /* need to save the local change */ 7340 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7341 } else { 7342 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7343 } 7344 /* determine if any process has changed the pressures locally */ 7345 pcbddc->change_interior = pcbddc->benign_have_null; 7346 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7347 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7348 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7349 pcbddc->use_qr_single = qr_needed; 7350 } 7351 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7352 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7353 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7354 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7355 } else { 7356 Mat benign_global = NULL; 7357 if (pcbddc->benign_have_null) { 7358 Mat M; 7359 7360 pcbddc->change_interior = PETSC_TRUE; 7361 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 7362 PetscCall(VecReciprocal(pcis->vec1_N)); 7363 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 7364 if (pcbddc->benign_change) { 7365 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 7366 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 7367 } else { 7368 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 7369 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 7370 } 7371 PetscCall(MatISSetLocalMat(benign_global, M)); 7372 PetscCall(MatDestroy(&M)); 7373 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 7374 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 7375 } 7376 if (pcbddc->user_ChangeOfBasisMatrix) { 7377 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix)); 7378 PetscCall(MatDestroy(&benign_global)); 7379 } else if (pcbddc->benign_have_null) { 7380 pcbddc->ChangeOfBasisMatrix = benign_global; 7381 } 7382 } 7383 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7384 IS is_global; 7385 const PetscInt *gidxs; 7386 7387 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 7388 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 7389 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 7390 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 7391 PetscCall(ISDestroy(&is_global)); 7392 } 7393 } 7394 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 7395 7396 if (!pcbddc->fake_change) { 7397 /* add pressure dofs to set of primal nodes for numbering purposes */ 7398 for (i = 0; i < pcbddc->benign_n; i++) { 7399 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7400 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7401 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7402 pcbddc->local_primal_size_cc++; 7403 pcbddc->local_primal_size++; 7404 } 7405 7406 /* check if a new primal space has been introduced (also take into account benign trick) */ 7407 pcbddc->new_primal_space_local = PETSC_TRUE; 7408 if (olocal_primal_size == pcbddc->local_primal_size) { 7409 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7410 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7411 if (!pcbddc->new_primal_space_local) { 7412 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7413 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7414 } 7415 } 7416 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7417 PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7418 } 7419 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 7420 7421 /* flush dbg viewer */ 7422 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7423 7424 /* free workspace */ 7425 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7426 PetscCall(PetscBTDestroy(&change_basis)); 7427 if (!pcbddc->adaptive_selection) { 7428 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 7429 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 7430 } else { 7431 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 7432 PetscCall(PetscFree(constraints_n)); 7433 PetscCall(PetscFree(constraints_idxs_B)); 7434 } 7435 PetscFunctionReturn(PETSC_SUCCESS); 7436 } 7437 7438 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7439 { 7440 ISLocalToGlobalMapping map; 7441 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7442 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 7443 PetscInt i, N; 7444 PetscBool rcsr = PETSC_FALSE; 7445 7446 PetscFunctionBegin; 7447 if (pcbddc->recompute_topography) { 7448 pcbddc->graphanalyzed = PETSC_FALSE; 7449 /* Reset previously computed graph */ 7450 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7451 /* Init local Graph struct */ 7452 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 7453 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 7454 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 7455 7456 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 7457 /* Check validity of the csr graph passed in by the user */ 7458 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 7459 pcbddc->mat_graph->nvtxs); 7460 7461 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7462 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7463 PetscInt *xadj, *adjncy; 7464 PetscInt nvtxs; 7465 PetscBool flg_row; 7466 Mat A; 7467 7468 PetscCall(PetscObjectReference((PetscObject)matis->A)); 7469 A = matis->A; 7470 for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) { 7471 Mat AtA; 7472 7473 PetscCall(MatProductCreate(A, A, NULL, &AtA)); 7474 PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_")); 7475 PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB)); 7476 PetscCall(MatProductSetFromOptions(AtA)); 7477 PetscCall(MatProductSymbolic(AtA)); 7478 PetscCall(MatProductClear(AtA)); 7479 /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */ 7480 AtA->assembled = PETSC_TRUE; 7481 PetscCall(MatDestroy(&A)); 7482 A = AtA; 7483 } 7484 PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7485 if (flg_row) { 7486 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 7487 pcbddc->computed_rowadj = PETSC_TRUE; 7488 PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7489 rcsr = PETSC_TRUE; 7490 } 7491 PetscCall(MatDestroy(&A)); 7492 } 7493 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7494 7495 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7496 PetscReal *lcoords; 7497 PetscInt n; 7498 MPI_Datatype dimrealtype; 7499 7500 /* TODO: support for blocked */ 7501 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 7502 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7503 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 7504 PetscCallMPI(MPI_Type_contiguous((PetscMPIInt)pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype)); 7505 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7506 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7507 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7508 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7509 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7510 7511 pcbddc->mat_graph->coords = lcoords; 7512 pcbddc->mat_graph->cloc = PETSC_TRUE; 7513 pcbddc->mat_graph->cnloc = n; 7514 } 7515 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 7516 pcbddc->mat_graph->nvtxs); 7517 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7518 7519 /* attach info on disconnected subdomains if present */ 7520 if (pcbddc->n_local_subs) { 7521 PetscInt *local_subs, n, totn; 7522 7523 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7524 PetscCall(PetscMalloc1(n, &local_subs)); 7525 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 7526 for (i = 0; i < pcbddc->n_local_subs; i++) { 7527 const PetscInt *idxs; 7528 PetscInt nl, j; 7529 7530 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 7531 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 7532 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 7533 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 7534 } 7535 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 7536 pcbddc->mat_graph->n_local_subs = totn + 1; 7537 pcbddc->mat_graph->local_subs = local_subs; 7538 } 7539 7540 /* Setup of Graph */ 7541 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 7542 } 7543 7544 if (!pcbddc->graphanalyzed) { 7545 /* Graph's connected components analysis */ 7546 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7547 pcbddc->graphanalyzed = PETSC_TRUE; 7548 pcbddc->corner_selected = pcbddc->corner_selection; 7549 } 7550 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7551 PetscFunctionReturn(PETSC_SUCCESS); 7552 } 7553 7554 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7555 { 7556 PetscInt i, j, n; 7557 PetscScalar *alphas; 7558 PetscReal norm, *onorms; 7559 7560 PetscFunctionBegin; 7561 n = *nio; 7562 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 7563 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 7564 PetscCall(VecNormalize(vecs[0], &norm)); 7565 if (norm < PETSC_SMALL) { 7566 onorms[0] = 0.0; 7567 PetscCall(VecSet(vecs[0], 0.0)); 7568 } else { 7569 onorms[0] = norm; 7570 } 7571 7572 for (i = 1; i < n; i++) { 7573 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 7574 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 7575 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 7576 PetscCall(VecNormalize(vecs[i], &norm)); 7577 if (norm < PETSC_SMALL) { 7578 onorms[i] = 0.0; 7579 PetscCall(VecSet(vecs[i], 0.0)); 7580 } else { 7581 onorms[i] = norm; 7582 } 7583 } 7584 /* push nonzero vectors at the beginning */ 7585 for (i = 0; i < n; i++) { 7586 if (onorms[i] == 0.0) { 7587 for (j = i + 1; j < n; j++) { 7588 if (onorms[j] != 0.0) { 7589 PetscCall(VecCopy(vecs[j], vecs[i])); 7590 onorms[j] = 0.0; 7591 } 7592 } 7593 } 7594 } 7595 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7596 PetscCall(PetscFree2(alphas, onorms)); 7597 PetscFunctionReturn(PETSC_SUCCESS); 7598 } 7599 7600 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 7601 { 7602 ISLocalToGlobalMapping mapping; 7603 Mat A; 7604 PetscInt n_neighs, *neighs, *n_shared, **shared; 7605 PetscMPIInt size, rank, color; 7606 PetscInt *xadj, *adjncy; 7607 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 7608 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 7609 PetscInt void_procs, *procs_candidates = NULL; 7610 PetscInt xadj_count, *count; 7611 PetscBool ismatis, use_vwgt = PETSC_FALSE; 7612 PetscSubcomm psubcomm; 7613 MPI_Comm subcomm; 7614 7615 PetscFunctionBegin; 7616 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7617 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7618 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7619 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 7620 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 7621 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7622 7623 if (have_void) *have_void = PETSC_FALSE; 7624 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7625 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7626 PetscCall(MatISGetLocalMat(mat, &A)); 7627 PetscCall(MatGetLocalSize(A, &n, NULL)); 7628 im_active = !!n; 7629 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7630 void_procs = size - active_procs; 7631 /* get ranks of non-active processes in mat communicator */ 7632 if (void_procs) { 7633 PetscInt ncand; 7634 7635 if (have_void) *have_void = PETSC_TRUE; 7636 PetscCall(PetscMalloc1(size, &procs_candidates)); 7637 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7638 for (i = 0, ncand = 0; i < size; i++) { 7639 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7640 } 7641 /* force n_subdomains to be not greater that the number of non-active processes */ 7642 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7643 } 7644 7645 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7646 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7647 PetscCall(MatGetSize(mat, &N, NULL)); 7648 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7649 PetscInt issize, isidx, dest; 7650 if (*n_subdomains == 1) dest = 0; 7651 else dest = rank; 7652 if (im_active) { 7653 issize = 1; 7654 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7655 isidx = procs_candidates[dest]; 7656 } else { 7657 isidx = dest; 7658 } 7659 } else { 7660 issize = 0; 7661 isidx = -1; 7662 } 7663 if (*n_subdomains != 1) *n_subdomains = active_procs; 7664 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7665 PetscCall(PetscFree(procs_candidates)); 7666 PetscFunctionReturn(PETSC_SUCCESS); 7667 } 7668 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL)); 7669 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL)); 7670 threshold = PetscMax(threshold, 2); 7671 7672 /* Get info on mapping */ 7673 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7674 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7675 7676 /* build local CSR graph of subdomains' connectivity */ 7677 PetscCall(PetscMalloc1(2, &xadj)); 7678 xadj[0] = 0; 7679 xadj[1] = PetscMax(n_neighs - 1, 0); 7680 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7681 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7682 PetscCall(PetscCalloc1(n, &count)); 7683 for (i = 1; i < n_neighs; i++) 7684 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7685 7686 xadj_count = 0; 7687 for (i = 1; i < n_neighs; i++) { 7688 for (j = 0; j < n_shared[i]; j++) { 7689 if (count[shared[i][j]] < threshold) { 7690 adjncy[xadj_count] = neighs[i]; 7691 adjncy_wgt[xadj_count] = n_shared[i]; 7692 xadj_count++; 7693 break; 7694 } 7695 } 7696 } 7697 xadj[1] = xadj_count; 7698 PetscCall(PetscFree(count)); 7699 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7700 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7701 7702 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7703 7704 /* Restrict work on active processes only */ 7705 PetscCall(PetscMPIIntCast(im_active, &color)); 7706 if (void_procs) { 7707 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7708 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7709 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7710 subcomm = PetscSubcommChild(psubcomm); 7711 } else { 7712 psubcomm = NULL; 7713 subcomm = PetscObjectComm((PetscObject)mat); 7714 } 7715 7716 v_wgt = NULL; 7717 if (!color) { 7718 PetscCall(PetscFree(xadj)); 7719 PetscCall(PetscFree(adjncy)); 7720 PetscCall(PetscFree(adjncy_wgt)); 7721 } else { 7722 Mat subdomain_adj; 7723 IS new_ranks, new_ranks_contig; 7724 MatPartitioning partitioner; 7725 PetscInt rstart, rend; 7726 PetscMPIInt irstart = 0, irend = 0; 7727 PetscInt *is_indices, *oldranks; 7728 PetscMPIInt size; 7729 PetscBool aggregate; 7730 7731 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7732 if (void_procs) { 7733 PetscInt prank = rank; 7734 PetscCall(PetscMalloc1(size, &oldranks)); 7735 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7736 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7737 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7738 } else { 7739 oldranks = NULL; 7740 } 7741 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7742 if (aggregate) { /* TODO: all this part could be made more efficient */ 7743 PetscInt lrows, row, ncols, *cols; 7744 PetscMPIInt nrank; 7745 PetscScalar *vals; 7746 7747 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7748 lrows = 0; 7749 if (nrank < redprocs) { 7750 lrows = size / redprocs; 7751 if (nrank < size % redprocs) lrows++; 7752 } 7753 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7754 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7755 irstart = (PetscMPIInt)rstart; /* from construction these are always less than size */ 7756 irend = (PetscMPIInt)rend; 7757 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7758 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7759 row = nrank; 7760 ncols = xadj[1] - xadj[0]; 7761 cols = adjncy; 7762 PetscCall(PetscMalloc1(ncols, &vals)); 7763 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7764 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7765 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7766 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7767 PetscCall(PetscFree(xadj)); 7768 PetscCall(PetscFree(adjncy)); 7769 PetscCall(PetscFree(adjncy_wgt)); 7770 PetscCall(PetscFree(vals)); 7771 if (use_vwgt) { 7772 Vec v; 7773 const PetscScalar *array; 7774 PetscInt nl; 7775 7776 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7777 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7778 PetscCall(VecAssemblyBegin(v)); 7779 PetscCall(VecAssemblyEnd(v)); 7780 PetscCall(VecGetLocalSize(v, &nl)); 7781 PetscCall(VecGetArrayRead(v, &array)); 7782 PetscCall(PetscMalloc1(nl, &v_wgt)); 7783 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7784 PetscCall(VecRestoreArrayRead(v, &array)); 7785 PetscCall(VecDestroy(&v)); 7786 } 7787 } else { 7788 PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7789 if (use_vwgt) { 7790 PetscCall(PetscMalloc1(1, &v_wgt)); 7791 v_wgt[0] = n; 7792 } 7793 } 7794 /* PetscCall(MatView(subdomain_adj,0)); */ 7795 7796 /* Partition */ 7797 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7798 #if defined(PETSC_HAVE_PTSCOTCH) 7799 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7800 #elif defined(PETSC_HAVE_PARMETIS) 7801 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7802 #else 7803 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7804 #endif 7805 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7806 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7807 *n_subdomains = PetscMin((PetscInt)size, *n_subdomains); 7808 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7809 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7810 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7811 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7812 7813 /* renumber new_ranks to avoid "holes" in new set of processors */ 7814 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7815 PetscCall(ISDestroy(&new_ranks)); 7816 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7817 if (!aggregate) { 7818 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7819 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7820 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7821 } else if (oldranks) { 7822 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7823 } else { 7824 ranks_send_to_idx[0] = is_indices[0]; 7825 } 7826 } else { 7827 PetscInt idx = 0; 7828 PetscMPIInt tag; 7829 MPI_Request *reqs; 7830 7831 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7832 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7833 for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7834 PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7835 PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE)); 7836 PetscCall(PetscFree(reqs)); 7837 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7838 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7839 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7840 } else if (oldranks) { 7841 ranks_send_to_idx[0] = oldranks[idx]; 7842 } else { 7843 ranks_send_to_idx[0] = idx; 7844 } 7845 } 7846 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7847 /* clean up */ 7848 PetscCall(PetscFree(oldranks)); 7849 PetscCall(ISDestroy(&new_ranks_contig)); 7850 PetscCall(MatDestroy(&subdomain_adj)); 7851 PetscCall(MatPartitioningDestroy(&partitioner)); 7852 } 7853 PetscCall(PetscSubcommDestroy(&psubcomm)); 7854 PetscCall(PetscFree(procs_candidates)); 7855 7856 /* assemble parallel IS for sends */ 7857 i = 1; 7858 if (!color) i = 0; 7859 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7860 PetscFunctionReturn(PETSC_SUCCESS); 7861 } 7862 7863 typedef enum { 7864 MATDENSE_PRIVATE = 0, 7865 MATAIJ_PRIVATE, 7866 MATBAIJ_PRIVATE, 7867 MATSBAIJ_PRIVATE 7868 } MatTypePrivate; 7869 7870 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7871 { 7872 Mat local_mat; 7873 IS is_sends_internal; 7874 PetscInt rows, cols, new_local_rows; 7875 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 7876 PetscBool ismatis, isdense, newisdense, destroy_mat; 7877 ISLocalToGlobalMapping l2gmap; 7878 PetscInt *l2gmap_indices; 7879 const PetscInt *is_indices; 7880 MatType new_local_type; 7881 /* buffers */ 7882 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 7883 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 7884 PetscInt *recv_buffer_idxs_local; 7885 PetscScalar *ptr_vals, *recv_buffer_vals; 7886 const PetscScalar *send_buffer_vals; 7887 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 7888 /* MPI */ 7889 MPI_Comm comm, comm_n; 7890 PetscSubcomm subcomm; 7891 PetscMPIInt n_sends, n_recvs, size; 7892 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 7893 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 7894 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 7895 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 7896 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 7897 7898 PetscFunctionBegin; 7899 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7900 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7901 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7902 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 7903 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 7904 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 7905 PetscValidLogicalCollectiveBool(mat, reuse, 6); 7906 PetscValidLogicalCollectiveInt(mat, nis, 8); 7907 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 7908 if (nvecs) { 7909 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 7910 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 7911 } 7912 /* further checks */ 7913 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7914 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7915 /* XXX hack for multi_element */ 7916 if (!isdense) PetscCall(MatConvert(local_mat, MATDENSE, MAT_INPLACE_MATRIX, &local_mat)); 7917 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7918 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7919 7920 PetscCall(MatGetSize(local_mat, &rows, &cols)); 7921 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 7922 if (reuse && *mat_n) { 7923 PetscInt mrows, mcols, mnrows, mncols; 7924 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 7925 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 7926 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 7927 PetscCall(MatGetSize(mat, &mrows, &mcols)); 7928 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 7929 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 7930 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 7931 } 7932 PetscCall(MatGetBlockSize(local_mat, &bs)); 7933 PetscValidLogicalCollectiveInt(mat, bs, 1); 7934 7935 /* prepare IS for sending if not provided */ 7936 if (!is_sends) { 7937 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 7938 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 7939 } else { 7940 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7941 is_sends_internal = is_sends; 7942 } 7943 7944 /* get comm */ 7945 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 7946 7947 /* compute number of sends */ 7948 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 7949 PetscCall(PetscMPIIntCast(i, &n_sends)); 7950 7951 /* compute number of receives */ 7952 PetscCallMPI(MPI_Comm_size(comm, &size)); 7953 PetscCall(PetscMalloc1(size, &iflags)); 7954 PetscCall(PetscArrayzero(iflags, size)); 7955 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 7956 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 7957 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 7958 PetscCall(PetscFree(iflags)); 7959 7960 /* restrict comm if requested */ 7961 subcomm = NULL; 7962 destroy_mat = PETSC_FALSE; 7963 if (restrict_comm) { 7964 PetscMPIInt color, subcommsize; 7965 7966 color = 0; 7967 if (restrict_full) { 7968 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7969 } else { 7970 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7971 } 7972 PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 7973 subcommsize = size - subcommsize; 7974 /* check if reuse has been requested */ 7975 if (reuse) { 7976 if (*mat_n) { 7977 PetscMPIInt subcommsize2; 7978 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 7979 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 7980 comm_n = PetscObjectComm((PetscObject)*mat_n); 7981 } else { 7982 comm_n = PETSC_COMM_SELF; 7983 } 7984 } else { /* MAT_INITIAL_MATRIX */ 7985 PetscMPIInt rank; 7986 7987 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7988 PetscCall(PetscSubcommCreate(comm, &subcomm)); 7989 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 7990 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 7991 comm_n = PetscSubcommChild(subcomm); 7992 } 7993 /* flag to destroy *mat_n if not significative */ 7994 if (color) destroy_mat = PETSC_TRUE; 7995 } else { 7996 comm_n = comm; 7997 } 7998 7999 /* prepare send/receive buffers */ 8000 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 8001 PetscCall(PetscArrayzero(ilengths_idxs, size)); 8002 PetscCall(PetscMalloc1(size, &ilengths_vals)); 8003 PetscCall(PetscArrayzero(ilengths_vals, size)); 8004 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 8005 8006 /* Get data from local matrices */ 8007 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 8008 /* TODO: See below some guidelines on how to prepare the local buffers */ 8009 /* 8010 send_buffer_vals should contain the raw values of the local matrix 8011 send_buffer_idxs should contain: 8012 - MatType_PRIVATE type 8013 - PetscInt size_of_l2gmap 8014 - PetscInt global_row_indices[size_of_l2gmap] 8015 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 8016 */ 8017 { 8018 ISLocalToGlobalMapping mapping; 8019 8020 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 8021 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 8022 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 8023 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 8024 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 8025 send_buffer_idxs[1] = i; 8026 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 8027 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 8028 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 8029 PetscCall(PetscMPIIntCast(i, &len)); 8030 for (i = 0; i < n_sends; i++) { 8031 ilengths_vals[is_indices[i]] = len * len; 8032 ilengths_idxs[is_indices[i]] = len + 2; 8033 } 8034 } 8035 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 8036 /* additional is (if any) */ 8037 if (nis) { 8038 PetscMPIInt psum; 8039 PetscInt j; 8040 for (j = 0, psum = 0; j < nis; j++) { 8041 PetscInt plen; 8042 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8043 PetscCall(PetscMPIIntCast(plen, &len)); 8044 psum += len + 1; /* indices + length */ 8045 } 8046 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 8047 for (j = 0, psum = 0; j < nis; j++) { 8048 PetscInt plen; 8049 const PetscInt *is_array_idxs; 8050 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8051 send_buffer_idxs_is[psum] = plen; 8052 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 8053 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 8054 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 8055 psum += plen + 1; /* indices + length */ 8056 } 8057 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 8058 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 8059 } 8060 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8061 8062 buf_size_idxs = 0; 8063 buf_size_vals = 0; 8064 buf_size_idxs_is = 0; 8065 buf_size_vecs = 0; 8066 for (i = 0; i < n_recvs; i++) { 8067 buf_size_idxs += (PetscInt)olengths_idxs[i]; 8068 buf_size_vals += (PetscInt)olengths_vals[i]; 8069 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 8070 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 8071 } 8072 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 8073 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 8074 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 8075 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 8076 8077 /* get new tags for clean communications */ 8078 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 8079 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 8080 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 8081 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 8082 8083 /* allocate for requests */ 8084 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 8085 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 8086 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 8087 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 8088 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 8089 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 8090 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 8091 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 8092 8093 /* communications */ 8094 ptr_idxs = recv_buffer_idxs; 8095 ptr_vals = recv_buffer_vals; 8096 ptr_idxs_is = recv_buffer_idxs_is; 8097 ptr_vecs = recv_buffer_vecs; 8098 for (i = 0; i < n_recvs; i++) { 8099 source_dest = onodes[i]; 8100 PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i])); 8101 PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i])); 8102 ptr_idxs += olengths_idxs[i]; 8103 ptr_vals += olengths_vals[i]; 8104 if (nis) { 8105 source_dest = onodes_is[i]; 8106 PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i])); 8107 ptr_idxs_is += olengths_idxs_is[i]; 8108 } 8109 if (nvecs) { 8110 source_dest = onodes[i]; 8111 PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i])); 8112 ptr_vecs += olengths_idxs[i] - 2; 8113 } 8114 } 8115 for (i = 0; i < n_sends; i++) { 8116 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 8117 PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 8118 PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 8119 if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 8120 if (nvecs) { 8121 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8122 PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 8123 } 8124 } 8125 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 8126 PetscCall(ISDestroy(&is_sends_internal)); 8127 8128 /* assemble new l2g map */ 8129 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 8130 ptr_idxs = recv_buffer_idxs; 8131 new_local_rows = 0; 8132 for (i = 0; i < n_recvs; i++) { 8133 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8134 ptr_idxs += olengths_idxs[i]; 8135 } 8136 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 8137 ptr_idxs = recv_buffer_idxs; 8138 new_local_rows = 0; 8139 for (i = 0; i < n_recvs; i++) { 8140 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 8141 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8142 ptr_idxs += olengths_idxs[i]; 8143 } 8144 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 8145 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 8146 PetscCall(PetscFree(l2gmap_indices)); 8147 8148 /* infer new local matrix type from received local matrices type */ 8149 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 8150 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 8151 if (n_recvs) { 8152 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 8153 ptr_idxs = recv_buffer_idxs; 8154 for (i = 0; i < n_recvs; i++) { 8155 if ((PetscInt)new_local_type_private != *ptr_idxs) { 8156 new_local_type_private = MATAIJ_PRIVATE; 8157 break; 8158 } 8159 ptr_idxs += olengths_idxs[i]; 8160 } 8161 switch (new_local_type_private) { 8162 case MATDENSE_PRIVATE: 8163 new_local_type = MATSEQAIJ; 8164 bs = 1; 8165 break; 8166 case MATAIJ_PRIVATE: 8167 new_local_type = MATSEQAIJ; 8168 bs = 1; 8169 break; 8170 case MATBAIJ_PRIVATE: 8171 new_local_type = MATSEQBAIJ; 8172 break; 8173 case MATSBAIJ_PRIVATE: 8174 new_local_type = MATSEQSBAIJ; 8175 break; 8176 default: 8177 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 8178 } 8179 } else { /* by default, new_local_type is seqaij */ 8180 new_local_type = MATSEQAIJ; 8181 bs = 1; 8182 } 8183 8184 /* create MATIS object if needed */ 8185 if (!reuse) { 8186 PetscCall(MatGetSize(mat, &rows, &cols)); 8187 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8188 } else { 8189 /* it also destroys the local matrices */ 8190 if (*mat_n) { 8191 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 8192 } else { /* this is a fake object */ 8193 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8194 } 8195 } 8196 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 8197 PetscCall(MatSetType(local_mat, new_local_type)); 8198 8199 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 8200 8201 /* Global to local map of received indices */ 8202 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 8203 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 8204 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 8205 8206 /* restore attributes -> type of incoming data and its size */ 8207 buf_size_idxs = 0; 8208 for (i = 0; i < n_recvs; i++) { 8209 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 8210 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 8211 buf_size_idxs += (PetscInt)olengths_idxs[i]; 8212 } 8213 PetscCall(PetscFree(recv_buffer_idxs)); 8214 8215 /* set preallocation */ 8216 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 8217 if (!newisdense) { 8218 PetscInt *new_local_nnz = NULL; 8219 8220 ptr_idxs = recv_buffer_idxs_local; 8221 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 8222 for (i = 0; i < n_recvs; i++) { 8223 PetscInt j; 8224 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 8225 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 8226 } else { 8227 /* TODO */ 8228 } 8229 ptr_idxs += olengths_idxs[i]; 8230 } 8231 if (new_local_nnz) { 8232 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 8233 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 8234 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 8235 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8236 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 8237 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8238 } else { 8239 PetscCall(MatSetUp(local_mat)); 8240 } 8241 PetscCall(PetscFree(new_local_nnz)); 8242 } else { 8243 PetscCall(MatSetUp(local_mat)); 8244 } 8245 8246 /* set values */ 8247 ptr_vals = recv_buffer_vals; 8248 ptr_idxs = recv_buffer_idxs_local; 8249 for (i = 0; i < n_recvs; i++) { 8250 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 8251 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 8252 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 8253 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 8254 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 8255 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 8256 } else { 8257 /* TODO */ 8258 } 8259 ptr_idxs += olengths_idxs[i]; 8260 ptr_vals += olengths_vals[i]; 8261 } 8262 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 8263 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 8264 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 8265 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 8266 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 8267 PetscCall(PetscFree(recv_buffer_vals)); 8268 8269 #if 0 8270 if (!restrict_comm) { /* check */ 8271 Vec lvec,rvec; 8272 PetscReal infty_error; 8273 8274 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 8275 PetscCall(VecSetRandom(rvec,NULL)); 8276 PetscCall(MatMult(mat,rvec,lvec)); 8277 PetscCall(VecScale(lvec,-1.0)); 8278 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 8279 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 8280 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 8281 PetscCall(VecDestroy(&rvec)); 8282 PetscCall(VecDestroy(&lvec)); 8283 } 8284 #endif 8285 8286 /* assemble new additional is (if any) */ 8287 if (nis) { 8288 PetscInt **temp_idxs, *count_is, j, psum; 8289 8290 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 8291 PetscCall(PetscCalloc1(nis, &count_is)); 8292 ptr_idxs = recv_buffer_idxs_is; 8293 psum = 0; 8294 for (i = 0; i < n_recvs; i++) { 8295 for (j = 0; j < nis; j++) { 8296 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8297 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8298 psum += plen; 8299 ptr_idxs += plen + 1; /* shift pointer to received data */ 8300 } 8301 } 8302 PetscCall(PetscMalloc1(nis, &temp_idxs)); 8303 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 8304 for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]); 8305 PetscCall(PetscArrayzero(count_is, nis)); 8306 ptr_idxs = recv_buffer_idxs_is; 8307 for (i = 0; i < n_recvs; i++) { 8308 for (j = 0; j < nis; j++) { 8309 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8310 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 8311 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8312 ptr_idxs += plen + 1; /* shift pointer to received data */ 8313 } 8314 } 8315 for (i = 0; i < nis; i++) { 8316 PetscCall(ISDestroy(&isarray[i])); 8317 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 8318 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 8319 } 8320 PetscCall(PetscFree(count_is)); 8321 PetscCall(PetscFree(temp_idxs[0])); 8322 PetscCall(PetscFree(temp_idxs)); 8323 } 8324 /* free workspace */ 8325 PetscCall(PetscFree(recv_buffer_idxs_is)); 8326 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 8327 PetscCall(PetscFree(send_buffer_idxs)); 8328 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 8329 if (isdense) { 8330 PetscCall(MatISGetLocalMat(mat, &local_mat)); 8331 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 8332 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8333 } else { 8334 /* PetscCall(PetscFree(send_buffer_vals)); */ 8335 } 8336 if (nis) { 8337 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 8338 PetscCall(PetscFree(send_buffer_idxs_is)); 8339 } 8340 8341 if (nvecs) { 8342 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 8343 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 8344 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8345 PetscCall(VecDestroy(&nnsp_vec[0])); 8346 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 8347 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 8348 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 8349 /* set values */ 8350 ptr_vals = recv_buffer_vecs; 8351 ptr_idxs = recv_buffer_idxs_local; 8352 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8353 for (i = 0; i < n_recvs; i++) { 8354 PetscInt j; 8355 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 8356 ptr_idxs += olengths_idxs[i]; 8357 ptr_vals += olengths_idxs[i] - 2; 8358 } 8359 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8360 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8361 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8362 } 8363 8364 PetscCall(PetscFree(recv_buffer_vecs)); 8365 PetscCall(PetscFree(recv_buffer_idxs_local)); 8366 PetscCall(PetscFree(recv_req_idxs)); 8367 PetscCall(PetscFree(recv_req_vals)); 8368 PetscCall(PetscFree(recv_req_vecs)); 8369 PetscCall(PetscFree(recv_req_idxs_is)); 8370 PetscCall(PetscFree(send_req_idxs)); 8371 PetscCall(PetscFree(send_req_vals)); 8372 PetscCall(PetscFree(send_req_vecs)); 8373 PetscCall(PetscFree(send_req_idxs_is)); 8374 PetscCall(PetscFree(ilengths_vals)); 8375 PetscCall(PetscFree(ilengths_idxs)); 8376 PetscCall(PetscFree(olengths_vals)); 8377 PetscCall(PetscFree(olengths_idxs)); 8378 PetscCall(PetscFree(onodes)); 8379 if (nis) { 8380 PetscCall(PetscFree(ilengths_idxs_is)); 8381 PetscCall(PetscFree(olengths_idxs_is)); 8382 PetscCall(PetscFree(onodes_is)); 8383 } 8384 PetscCall(PetscSubcommDestroy(&subcomm)); 8385 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8386 PetscCall(MatDestroy(mat_n)); 8387 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 8388 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8389 PetscCall(VecDestroy(&nnsp_vec[0])); 8390 } 8391 *mat_n = NULL; 8392 } 8393 PetscFunctionReturn(PETSC_SUCCESS); 8394 } 8395 8396 /* temporary hack into ksp private data structure */ 8397 #include <petsc/private/kspimpl.h> 8398 8399 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat) 8400 { 8401 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8402 PC_IS *pcis = (PC_IS *)pc->data; 8403 PCBDDCGraph graph = pcbddc->mat_graph; 8404 Mat coarse_mat, coarse_mat_is; 8405 Mat coarsedivudotp = NULL; 8406 Mat coarseG, t_coarse_mat_is; 8407 MatNullSpace CoarseNullSpace = NULL; 8408 ISLocalToGlobalMapping coarse_islg; 8409 IS coarse_is, *isarray, corners; 8410 PetscInt i, im_active = -1, active_procs = -1; 8411 PetscInt nis, nisdofs, nisneu, nisvert; 8412 PetscInt coarse_eqs_per_proc, coarsening_ratio; 8413 PC pc_temp; 8414 PCType coarse_pc_type; 8415 KSPType coarse_ksp_type; 8416 PetscBool multilevel_requested, multilevel_allowed; 8417 PetscBool coarse_reuse, multi_element = graph->multi_element; 8418 PetscInt ncoarse, nedcfield; 8419 PetscBool compute_vecs = PETSC_FALSE; 8420 PetscScalar *array; 8421 MatReuse coarse_mat_reuse; 8422 PetscBool restr, full_restr, have_void; 8423 PetscMPIInt size; 8424 8425 PetscFunctionBegin; 8426 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8427 /* Assign global numbering to coarse dofs */ 8428 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8429 PetscInt ocoarse_size; 8430 compute_vecs = PETSC_TRUE; 8431 8432 pcbddc->new_primal_space = PETSC_TRUE; 8433 ocoarse_size = pcbddc->coarse_size; 8434 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8435 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 8436 /* see if we can avoid some work */ 8437 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8438 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8439 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8440 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8441 coarse_reuse = PETSC_FALSE; 8442 } else { /* we can safely reuse already computed coarse matrix */ 8443 coarse_reuse = PETSC_TRUE; 8444 } 8445 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8446 coarse_reuse = PETSC_FALSE; 8447 } 8448 /* reset any subassembling information */ 8449 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8450 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8451 coarse_reuse = PETSC_TRUE; 8452 } 8453 if (coarse_reuse && pcbddc->coarse_ksp) { 8454 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 8455 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8456 coarse_mat_reuse = MAT_REUSE_MATRIX; 8457 } else { 8458 coarse_mat = NULL; 8459 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8460 } 8461 8462 /* creates temporary l2gmap and IS for coarse indexes */ 8463 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 8464 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 8465 8466 /* creates temporary MATIS object for coarse matrix */ 8467 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is)); 8468 PetscCall(MatSetType(t_coarse_mat_is, MATIS)); 8469 PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size)); 8470 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_TRUE)); 8471 PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg)); 8472 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat)); 8473 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8474 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8475 PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view")); 8476 8477 /* count "active" (i.e. with positive local size) and "void" processes */ 8478 im_active = !!pcis->n; 8479 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8480 8481 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8482 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8483 /* full_restr : just use the receivers from the subassembling pattern */ 8484 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 8485 coarse_mat_is = NULL; 8486 multilevel_allowed = PETSC_FALSE; 8487 multilevel_requested = PETSC_FALSE; 8488 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 8489 if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1); 8490 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8491 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8492 coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio; 8493 if (multilevel_requested) { 8494 ncoarse = active_procs / coarsening_ratio; 8495 restr = PETSC_FALSE; 8496 full_restr = PETSC_FALSE; 8497 } else { 8498 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 8499 restr = PETSC_TRUE; 8500 full_restr = PETSC_TRUE; 8501 } 8502 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8503 ncoarse = PetscMax(1, ncoarse); 8504 if (!pcbddc->coarse_subassembling) { 8505 if (coarsening_ratio > 1) { 8506 if (multilevel_requested) { 8507 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8508 } else { 8509 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8510 } 8511 } else { 8512 PetscMPIInt rank; 8513 8514 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 8515 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8516 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 8517 } 8518 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8519 PetscInt psum; 8520 if (pcbddc->coarse_ksp) psum = 1; 8521 else psum = 0; 8522 PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8523 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8524 } 8525 /* determine if we can go multilevel */ 8526 if (multilevel_requested) { 8527 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8528 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8529 } 8530 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8531 8532 /* dump subassembling pattern */ 8533 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 8534 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8535 nedcfield = -1; 8536 corners = NULL; 8537 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8538 PetscInt *tidxs, *tidxs2, nout, tsize, i; 8539 const PetscInt *idxs; 8540 ISLocalToGlobalMapping tmap; 8541 8542 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8543 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 8544 /* allocate space for temporary storage */ 8545 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 8546 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 8547 /* allocate for IS array */ 8548 nisdofs = pcbddc->n_ISForDofsLocal; 8549 if (pcbddc->nedclocal) { 8550 if (pcbddc->nedfield > -1) { 8551 nedcfield = pcbddc->nedfield; 8552 } else { 8553 nedcfield = 0; 8554 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 8555 nisdofs = 1; 8556 } 8557 } 8558 nisneu = !!pcbddc->NeumannBoundariesLocal; 8559 nisvert = 0; /* nisvert is not used */ 8560 nis = nisdofs + nisneu + nisvert; 8561 PetscCall(PetscMalloc1(nis, &isarray)); 8562 /* dofs splitting */ 8563 for (i = 0; i < nisdofs; i++) { 8564 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8565 if (nedcfield != i) { 8566 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 8567 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8568 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8569 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8570 } else { 8571 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 8572 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 8573 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8574 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8575 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 8576 } 8577 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8578 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 8579 /* PetscCall(ISView(isarray[i],0)); */ 8580 } 8581 /* neumann boundaries */ 8582 if (pcbddc->NeumannBoundariesLocal) { 8583 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8584 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 8585 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8586 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8587 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8588 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8589 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 8590 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8591 } 8592 /* coordinates */ 8593 if (pcbddc->corner_selected) { 8594 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8595 PetscCall(ISGetLocalSize(corners, &tsize)); 8596 PetscCall(ISGetIndices(corners, &idxs)); 8597 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8598 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8599 PetscCall(ISRestoreIndices(corners, &idxs)); 8600 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8601 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8602 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 8603 } 8604 PetscCall(PetscFree(tidxs)); 8605 PetscCall(PetscFree(tidxs2)); 8606 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8607 } else { 8608 nis = 0; 8609 nisdofs = 0; 8610 nisneu = 0; 8611 nisvert = 0; 8612 isarray = NULL; 8613 } 8614 /* destroy no longer needed map */ 8615 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8616 8617 /* subassemble */ 8618 if (multilevel_allowed) { 8619 Vec vp[1]; 8620 PetscInt nvecs = 0; 8621 PetscBool reuse; 8622 8623 vp[0] = NULL; 8624 /* XXX HDIV also */ 8625 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8626 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 8627 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 8628 PetscCall(VecSetType(vp[0], VECSTANDARD)); 8629 nvecs = 1; 8630 8631 if (pcbddc->divudotp) { 8632 Mat B, loc_divudotp; 8633 Vec v, p; 8634 IS dummy; 8635 PetscInt np; 8636 8637 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8638 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8639 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8640 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8641 PetscCall(MatCreateVecs(B, &v, &p)); 8642 PetscCall(VecSet(p, 1.)); 8643 PetscCall(MatMultTranspose(B, p, v)); 8644 PetscCall(VecDestroy(&p)); 8645 PetscCall(MatDestroy(&B)); 8646 PetscCall(VecGetArray(vp[0], &array)); 8647 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8648 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8649 PetscCall(VecResetArray(pcbddc->vec1_P)); 8650 PetscCall(VecRestoreArray(vp[0], &array)); 8651 PetscCall(ISDestroy(&dummy)); 8652 PetscCall(VecDestroy(&v)); 8653 } 8654 } 8655 if (coarse_mat) reuse = PETSC_TRUE; 8656 else reuse = PETSC_FALSE; 8657 if (multi_element) { 8658 /* XXX divudotp */ 8659 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_FALSE)); 8660 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8661 coarse_mat_is = t_coarse_mat_is; 8662 } else { 8663 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8664 if (reuse) { 8665 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8666 } else { 8667 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8668 } 8669 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8670 PetscScalar *arraym; 8671 const PetscScalar *arrayv; 8672 PetscInt nl; 8673 PetscCall(VecGetLocalSize(vp[0], &nl)); 8674 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8675 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8676 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8677 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8678 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8679 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8680 PetscCall(VecDestroy(&vp[0])); 8681 } else { 8682 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8683 } 8684 } 8685 } else { 8686 if (ncoarse != size) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8687 else { 8688 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8689 coarse_mat_is = t_coarse_mat_is; 8690 } 8691 } 8692 if (coarse_mat_is || coarse_mat) { 8693 if (!multilevel_allowed) { 8694 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8695 } else { 8696 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8697 if (coarse_mat_is) { 8698 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8699 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8700 coarse_mat = coarse_mat_is; 8701 } 8702 } 8703 } 8704 PetscCall(MatDestroy(&t_coarse_mat_is)); 8705 PetscCall(MatDestroy(&coarse_mat_is)); 8706 8707 /* create local to global scatters for coarse problem */ 8708 if (compute_vecs) { 8709 PetscInt lrows; 8710 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8711 if (coarse_mat) { 8712 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8713 } else { 8714 lrows = 0; 8715 } 8716 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8717 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8718 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8719 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8720 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8721 } 8722 PetscCall(ISDestroy(&coarse_is)); 8723 8724 /* set defaults for coarse KSP and PC */ 8725 if (multilevel_allowed) { 8726 coarse_ksp_type = KSPRICHARDSON; 8727 coarse_pc_type = PCBDDC; 8728 } else { 8729 coarse_ksp_type = KSPPREONLY; 8730 coarse_pc_type = PCREDUNDANT; 8731 } 8732 8733 /* print some info if requested */ 8734 if (pcbddc->dbg_flag) { 8735 if (!multilevel_allowed) { 8736 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8737 if (multilevel_requested) { 8738 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio)); 8739 } else if (pcbddc->max_levels) { 8740 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8741 } 8742 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8743 } 8744 } 8745 8746 /* communicate coarse discrete gradient */ 8747 coarseG = NULL; 8748 if (pcbddc->nedcG && multilevel_allowed) { 8749 MPI_Comm ccomm; 8750 if (coarse_mat) { 8751 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8752 } else { 8753 ccomm = MPI_COMM_NULL; 8754 } 8755 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8756 } 8757 8758 /* create the coarse KSP object only once with defaults */ 8759 if (coarse_mat) { 8760 PetscBool isredundant, isbddc, force, valid; 8761 PetscViewer dbg_viewer = NULL; 8762 PetscBool isset, issym, isher, isspd; 8763 8764 if (pcbddc->dbg_flag) { 8765 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8766 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8767 } 8768 if (!pcbddc->coarse_ksp) { 8769 char prefix[256], str_level[16]; 8770 size_t len; 8771 8772 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8773 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel)); 8774 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8775 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8776 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1)); 8777 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8778 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8779 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8780 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8781 /* TODO is this logic correct? should check for coarse_mat type */ 8782 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8783 /* prefix */ 8784 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8785 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8786 if (!pcbddc->current_level) { 8787 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8788 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8789 } else { 8790 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8791 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8792 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8793 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8794 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8795 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)pcbddc->current_level)); 8796 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8797 } 8798 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8799 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8800 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8801 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8802 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8803 /* allow user customization */ 8804 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8805 /* get some info after set from options */ 8806 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8807 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8808 force = PETSC_FALSE; 8809 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8810 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8811 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8812 if (multilevel_allowed && !force && !valid) { 8813 isbddc = PETSC_TRUE; 8814 PetscCall(PCSetType(pc_temp, PCBDDC)); 8815 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8816 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8817 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8818 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8819 PetscObjectOptionsBegin((PetscObject)pc_temp); 8820 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8821 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8822 PetscOptionsEnd(); 8823 pc_temp->setfromoptionscalled++; 8824 } 8825 } 8826 } 8827 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8828 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8829 if (nisdofs) { 8830 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8831 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8832 } 8833 if (nisneu) { 8834 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8835 PetscCall(ISDestroy(&isarray[nisdofs])); 8836 } 8837 if (nisvert) { 8838 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8839 PetscCall(ISDestroy(&isarray[nis - 1])); 8840 } 8841 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8842 8843 /* get some info after set from options */ 8844 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8845 8846 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8847 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8848 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8849 force = PETSC_FALSE; 8850 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8851 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8852 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8853 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8854 if (isredundant) { 8855 KSP inner_ksp; 8856 PC inner_pc; 8857 8858 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8859 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8860 } 8861 8862 /* parameters which miss an API */ 8863 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8864 if (isbddc) { 8865 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8866 8867 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8868 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8869 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8870 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8871 if (pcbddc_coarse->benign_saddle_point) { 8872 Mat coarsedivudotp_is; 8873 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8874 IS row, col; 8875 const PetscInt *gidxs; 8876 PetscInt n, st, M, N; 8877 8878 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 8879 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 8880 st = st - n; 8881 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 8882 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 8883 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 8884 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 8885 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 8886 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 8887 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 8888 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 8889 PetscCall(ISGetSize(row, &M)); 8890 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 8891 PetscCall(ISDestroy(&row)); 8892 PetscCall(ISDestroy(&col)); 8893 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 8894 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 8895 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 8896 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 8897 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8898 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8899 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 8900 PetscCall(MatDestroy(&coarsedivudotp)); 8901 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 8902 PetscCall(MatDestroy(&coarsedivudotp_is)); 8903 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8904 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8905 } 8906 } 8907 8908 /* propagate symmetry info of coarse matrix */ 8909 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 8910 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 8911 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 8912 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 8913 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 8914 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 8915 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 8916 8917 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 8918 /* set operators */ 8919 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 8920 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 8921 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8922 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 8923 } 8924 PetscCall(MatDestroy(&coarseG)); 8925 PetscCall(PetscFree(isarray)); 8926 #if 0 8927 { 8928 PetscViewer viewer; 8929 char filename[256]; 8930 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 8931 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8932 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8933 PetscCall(MatView(coarse_mat,viewer)); 8934 PetscCall(PetscViewerPopFormat(viewer)); 8935 PetscCall(PetscViewerDestroy(&viewer)); 8936 } 8937 #endif 8938 8939 if (corners) { 8940 Vec gv; 8941 IS is; 8942 const PetscInt *idxs; 8943 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 8944 PetscScalar *coords; 8945 8946 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 8947 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 8948 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 8949 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 8950 PetscCall(VecSetBlockSize(gv, cdim)); 8951 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 8952 PetscCall(VecSetType(gv, VECSTANDARD)); 8953 PetscCall(VecSetFromOptions(gv)); 8954 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8955 8956 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8957 PetscCall(ISGetLocalSize(is, &n)); 8958 PetscCall(ISGetIndices(is, &idxs)); 8959 PetscCall(PetscMalloc1(n * cdim, &coords)); 8960 for (i = 0; i < n; i++) { 8961 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 8962 } 8963 PetscCall(ISRestoreIndices(is, &idxs)); 8964 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8965 8966 PetscCall(ISGetLocalSize(corners, &n)); 8967 PetscCall(ISGetIndices(corners, &idxs)); 8968 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 8969 PetscCall(ISRestoreIndices(corners, &idxs)); 8970 PetscCall(PetscFree(coords)); 8971 PetscCall(VecAssemblyBegin(gv)); 8972 PetscCall(VecAssemblyEnd(gv)); 8973 PetscCall(VecGetArray(gv, &coords)); 8974 if (pcbddc->coarse_ksp) { 8975 PC coarse_pc; 8976 PetscBool isbddc; 8977 8978 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 8979 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 8980 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8981 PetscReal *realcoords; 8982 8983 PetscCall(VecGetLocalSize(gv, &n)); 8984 #if defined(PETSC_USE_COMPLEX) 8985 PetscCall(PetscMalloc1(n, &realcoords)); 8986 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 8987 #else 8988 realcoords = coords; 8989 #endif 8990 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 8991 #if defined(PETSC_USE_COMPLEX) 8992 PetscCall(PetscFree(realcoords)); 8993 #endif 8994 } 8995 } 8996 PetscCall(VecRestoreArray(gv, &coords)); 8997 PetscCall(VecDestroy(&gv)); 8998 } 8999 PetscCall(ISDestroy(&corners)); 9000 9001 if (pcbddc->coarse_ksp) { 9002 Vec crhs, csol; 9003 9004 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 9005 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 9006 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL)); 9007 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs)); 9008 } 9009 PetscCall(MatDestroy(&coarsedivudotp)); 9010 9011 /* compute null space for coarse solver if the benign trick has been requested */ 9012 if (pcbddc->benign_null) { 9013 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 9014 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 9015 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 9016 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 9017 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9018 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9019 if (coarse_mat) { 9020 Vec nullv; 9021 PetscScalar *array, *array2; 9022 PetscInt nl; 9023 9024 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 9025 PetscCall(VecGetLocalSize(nullv, &nl)); 9026 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9027 PetscCall(VecGetArray(nullv, &array2)); 9028 PetscCall(PetscArraycpy(array2, array, nl)); 9029 PetscCall(VecRestoreArray(nullv, &array2)); 9030 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9031 PetscCall(VecNormalize(nullv, NULL)); 9032 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 9033 PetscCall(VecDestroy(&nullv)); 9034 } 9035 } 9036 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 9037 9038 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9039 if (pcbddc->coarse_ksp) { 9040 PetscBool ispreonly; 9041 9042 if (CoarseNullSpace) { 9043 PetscBool isnull; 9044 9045 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 9046 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 9047 /* TODO: add local nullspaces (if any) */ 9048 } 9049 /* setup coarse ksp */ 9050 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 9051 /* Check coarse problem if in debug mode or if solving with an iterative method */ 9052 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 9053 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 9054 KSP check_ksp; 9055 KSPType check_ksp_type; 9056 PC check_pc; 9057 Vec check_vec, coarse_vec; 9058 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 9059 PetscInt its; 9060 PetscBool compute_eigs; 9061 PetscReal *eigs_r, *eigs_c; 9062 PetscInt neigs; 9063 const char *prefix; 9064 9065 /* Create ksp object suitable for estimation of extreme eigenvalues */ 9066 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 9067 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel)); 9068 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 9069 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 9070 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 9071 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size)); 9072 /* prevent from setup unneeded object */ 9073 PetscCall(KSPGetPC(check_ksp, &check_pc)); 9074 PetscCall(PCSetType(check_pc, PCNONE)); 9075 if (ispreonly) { 9076 check_ksp_type = KSPPREONLY; 9077 compute_eigs = PETSC_FALSE; 9078 } else { 9079 check_ksp_type = KSPGMRES; 9080 compute_eigs = PETSC_TRUE; 9081 } 9082 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 9083 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 9084 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 9085 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 9086 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 9087 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 9088 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 9089 PetscCall(KSPSetFromOptions(check_ksp)); 9090 PetscCall(KSPSetUp(check_ksp)); 9091 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 9092 PetscCall(KSPSetPC(check_ksp, check_pc)); 9093 /* create random vec */ 9094 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 9095 PetscCall(VecSetRandom(check_vec, NULL)); 9096 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9097 /* solve coarse problem */ 9098 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 9099 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 9100 /* set eigenvalue estimation if preonly has not been requested */ 9101 if (compute_eigs) { 9102 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 9103 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 9104 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 9105 if (neigs) { 9106 lambda_max = eigs_r[neigs - 1]; 9107 lambda_min = eigs_r[0]; 9108 if (pcbddc->use_coarse_estimates) { 9109 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 9110 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 9111 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 9112 } 9113 } 9114 } 9115 } 9116 9117 /* check coarse problem residual error */ 9118 if (pcbddc->dbg_flag) { 9119 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 9120 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9121 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 9122 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 9123 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9124 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 9125 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 9126 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer)); 9127 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer)); 9128 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 9129 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 9130 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 9131 if (compute_eigs) { 9132 PetscReal lambda_max_s, lambda_min_s; 9133 KSPConvergedReason reason; 9134 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 9135 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 9136 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 9137 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 9138 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 9139 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 9140 } 9141 PetscCall(PetscViewerFlush(dbg_viewer)); 9142 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9143 } 9144 PetscCall(VecDestroy(&check_vec)); 9145 PetscCall(VecDestroy(&coarse_vec)); 9146 PetscCall(KSPDestroy(&check_ksp)); 9147 if (compute_eigs) { 9148 PetscCall(PetscFree(eigs_r)); 9149 PetscCall(PetscFree(eigs_c)); 9150 } 9151 } 9152 } 9153 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 9154 /* print additional info */ 9155 if (pcbddc->dbg_flag) { 9156 /* waits until all processes reaches this point */ 9157 PetscCall(PetscBarrier((PetscObject)pc)); 9158 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 9159 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9160 } 9161 9162 /* free memory */ 9163 PetscCall(MatDestroy(&coarse_mat)); 9164 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9165 PetscFunctionReturn(PETSC_SUCCESS); 9166 } 9167 9168 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 9169 { 9170 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9171 PC_IS *pcis = (PC_IS *)pc->data; 9172 IS subset, subset_mult, subset_n; 9173 PetscInt local_size, coarse_size = 0; 9174 PetscInt *local_primal_indices = NULL; 9175 const PetscInt *t_local_primal_indices; 9176 9177 PetscFunctionBegin; 9178 /* Compute global number of coarse dofs */ 9179 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 9180 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 9181 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 9182 PetscCall(ISDestroy(&subset_n)); 9183 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 9184 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 9185 PetscCall(ISDestroy(&subset)); 9186 PetscCall(ISDestroy(&subset_mult)); 9187 PetscCall(ISGetLocalSize(subset_n, &local_size)); 9188 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 9189 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 9190 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 9191 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 9192 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 9193 PetscCall(ISDestroy(&subset_n)); 9194 9195 if (pcbddc->dbg_flag) { 9196 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9197 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 9198 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size)); 9199 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9200 } 9201 9202 /* get back data */ 9203 *coarse_size_n = coarse_size; 9204 *local_primal_indices_n = local_primal_indices; 9205 PetscFunctionReturn(PETSC_SUCCESS); 9206 } 9207 9208 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 9209 { 9210 IS localis_t; 9211 PetscInt i, lsize, *idxs, n; 9212 PetscScalar *vals; 9213 9214 PetscFunctionBegin; 9215 /* get indices in local ordering exploiting local to global map */ 9216 PetscCall(ISGetLocalSize(globalis, &lsize)); 9217 PetscCall(PetscMalloc1(lsize, &vals)); 9218 for (i = 0; i < lsize; i++) vals[i] = 1.0; 9219 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 9220 PetscCall(VecSet(gwork, 0.0)); 9221 PetscCall(VecSet(lwork, 0.0)); 9222 if (idxs) { /* multilevel guard */ 9223 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 9224 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 9225 } 9226 PetscCall(VecAssemblyBegin(gwork)); 9227 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 9228 PetscCall(PetscFree(vals)); 9229 PetscCall(VecAssemblyEnd(gwork)); 9230 /* now compute set in local ordering */ 9231 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9232 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9233 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 9234 PetscCall(VecGetSize(lwork, &n)); 9235 for (i = 0, lsize = 0; i < n; i++) { 9236 if (PetscRealPart(vals[i]) > 0.5) lsize++; 9237 } 9238 PetscCall(PetscMalloc1(lsize, &idxs)); 9239 for (i = 0, lsize = 0; i < n; i++) { 9240 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 9241 } 9242 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 9243 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 9244 *localis = localis_t; 9245 PetscFunctionReturn(PETSC_SUCCESS); 9246 } 9247 9248 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 9249 { 9250 PC_IS *pcis = (PC_IS *)pc->data; 9251 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9252 PC_IS *pcisf; 9253 PC_BDDC *pcbddcf; 9254 PC pcf; 9255 9256 PetscFunctionBegin; 9257 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 9258 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 9259 PetscCall(PCSetType(pcf, PCBDDC)); 9260 9261 pcisf = (PC_IS *)pcf->data; 9262 pcbddcf = (PC_BDDC *)pcf->data; 9263 9264 pcisf->is_B_local = pcis->is_B_local; 9265 pcisf->vec1_N = pcis->vec1_N; 9266 pcisf->BtoNmap = pcis->BtoNmap; 9267 pcisf->n = pcis->n; 9268 pcisf->n_B = pcis->n_B; 9269 9270 PetscCall(PetscFree(pcbddcf->mat_graph)); 9271 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9272 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 9273 pcbddcf->sub_schurs = schurs; 9274 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 9275 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 9276 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 9277 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 9278 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9279 pcbddcf->use_faces = PETSC_TRUE; 9280 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9281 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9282 pcbddcf->use_qr_single = (PetscBool)!constraints; 9283 pcbddcf->fake_change = PETSC_TRUE; 9284 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9285 9286 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9287 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9288 9289 *change = pcbddcf->ConstraintMatrix; 9290 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 9291 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 9292 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9293 9294 if (schurs) pcbddcf->sub_schurs = NULL; 9295 pcbddcf->ConstraintMatrix = NULL; 9296 pcbddcf->mat_graph = NULL; 9297 pcisf->is_B_local = NULL; 9298 pcisf->vec1_N = NULL; 9299 pcisf->BtoNmap = NULL; 9300 PetscCall(PCDestroy(&pcf)); 9301 PetscFunctionReturn(PETSC_SUCCESS); 9302 } 9303 9304 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9305 { 9306 PC_IS *pcis = (PC_IS *)pc->data; 9307 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9308 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 9309 Mat S_j; 9310 PetscInt *used_xadj, *used_adjncy; 9311 PetscBool free_used_adj; 9312 9313 PetscFunctionBegin; 9314 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9315 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9316 free_used_adj = PETSC_FALSE; 9317 if (pcbddc->sub_schurs_layers == -1) { 9318 used_xadj = NULL; 9319 used_adjncy = NULL; 9320 } else { 9321 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9322 used_xadj = pcbddc->mat_graph->xadj; 9323 used_adjncy = pcbddc->mat_graph->adjncy; 9324 } else if (pcbddc->computed_rowadj) { 9325 used_xadj = pcbddc->mat_graph->xadj; 9326 used_adjncy = pcbddc->mat_graph->adjncy; 9327 } else { 9328 PetscBool flg_row = PETSC_FALSE; 9329 const PetscInt *xadj, *adjncy; 9330 PetscInt nvtxs; 9331 9332 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9333 if (flg_row) { 9334 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 9335 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 9336 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 9337 free_used_adj = PETSC_TRUE; 9338 } else { 9339 pcbddc->sub_schurs_layers = -1; 9340 used_xadj = NULL; 9341 used_adjncy = NULL; 9342 } 9343 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9344 } 9345 } 9346 9347 /* setup sub_schurs data */ 9348 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 9349 if (!sub_schurs->schur_explicit) { 9350 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9351 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 9352 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 9353 } else { 9354 Mat change = NULL; 9355 Vec scaling = NULL; 9356 IS change_primal = NULL, iP; 9357 PetscInt benign_n; 9358 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9359 PetscBool need_change = PETSC_FALSE; 9360 PetscBool discrete_harmonic = PETSC_FALSE; 9361 9362 if (!pcbddc->use_vertices && reuse_solvers) { 9363 PetscInt n_vertices; 9364 9365 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 9366 reuse_solvers = (PetscBool)!n_vertices; 9367 } 9368 if (!pcbddc->benign_change_explicit) { 9369 benign_n = pcbddc->benign_n; 9370 } else { 9371 benign_n = 0; 9372 } 9373 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9374 We need a global reduction to avoid possible deadlocks. 9375 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9376 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9377 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9378 PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 9379 need_change = (PetscBool)(!need_change); 9380 } 9381 /* If the user defines additional constraints, we import them here */ 9382 if (need_change) { 9383 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 9384 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 9385 } 9386 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9387 9388 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 9389 if (iP) { 9390 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 9391 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 9392 PetscOptionsEnd(); 9393 } 9394 if (discrete_harmonic) { 9395 Mat A; 9396 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 9397 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 9398 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 9399 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 9400 pcbddc->benign_zerodiag_subs, change, change_primal)); 9401 PetscCall(MatDestroy(&A)); 9402 } else { 9403 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 9404 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 9405 } 9406 PetscCall(MatDestroy(&change)); 9407 PetscCall(ISDestroy(&change_primal)); 9408 } 9409 PetscCall(MatDestroy(&S_j)); 9410 9411 /* free adjacency */ 9412 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 9413 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9414 PetscFunctionReturn(PETSC_SUCCESS); 9415 } 9416 9417 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9418 { 9419 PC_IS *pcis = (PC_IS *)pc->data; 9420 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9421 PCBDDCGraph graph; 9422 9423 PetscFunctionBegin; 9424 /* attach interface graph for determining subsets */ 9425 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9426 IS verticesIS, verticescomm; 9427 PetscInt vsize, *idxs; 9428 9429 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9430 PetscCall(ISGetSize(verticesIS, &vsize)); 9431 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 9432 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 9433 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 9434 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9435 PetscCall(PCBDDCGraphCreate(&graph)); 9436 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 9437 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 9438 PetscCall(ISDestroy(&verticescomm)); 9439 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9440 } else { 9441 graph = pcbddc->mat_graph; 9442 } 9443 /* print some info */ 9444 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9445 IS vertices; 9446 PetscInt nv, nedges, nfaces; 9447 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 9448 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9449 PetscCall(ISGetSize(vertices, &nv)); 9450 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9451 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 9452 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 9453 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 9454 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 9455 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9456 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9457 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9458 } 9459 9460 /* sub_schurs init */ 9461 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9462 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 9463 9464 /* free graph struct */ 9465 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 9466 PetscFunctionReturn(PETSC_SUCCESS); 9467 } 9468 9469 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer) 9470 { 9471 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 9472 PetscInt n = pc->pmat->rmap->n, ln, ni, st; 9473 const PetscInt *idxs; 9474 IS gis; 9475 9476 PetscFunctionBegin; 9477 if (!is) PetscFunctionReturn(PETSC_SUCCESS); 9478 PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL)); 9479 PetscCall(MatGetLocalSize(matis->A, NULL, &ln)); 9480 PetscCall(PetscArrayzero(matis->sf_leafdata, ln)); 9481 PetscCall(PetscArrayzero(matis->sf_rootdata, n)); 9482 PetscCall(ISGetLocalSize(is, &ni)); 9483 PetscCall(ISGetIndices(is, &idxs)); 9484 for (PetscInt i = 0; i < ni; i++) { 9485 if (idxs[i] < 0 || idxs[i] >= ln) continue; 9486 matis->sf_leafdata[idxs[i]] = 1; 9487 } 9488 PetscCall(ISRestoreIndices(is, &idxs)); 9489 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9490 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9491 ln = 0; 9492 for (PetscInt i = 0; i < n; i++) { 9493 if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st; 9494 } 9495 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis)); 9496 PetscCall(ISView(gis, viewer)); 9497 PetscCall(ISDestroy(&gis)); 9498 PetscFunctionReturn(PETSC_SUCCESS); 9499 } 9500 9501 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile) 9502 { 9503 PetscInt header[11]; 9504 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9505 PetscViewer viewer; 9506 MPI_Comm comm = PetscObjectComm((PetscObject)pc); 9507 9508 PetscFunctionBegin; 9509 PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer)); 9510 if (load) { 9511 IS is; 9512 Mat A; 9513 9514 PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT)); 9515 PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9516 PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9517 PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9518 PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9519 PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9520 PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9521 PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9522 PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9523 PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9524 PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9525 if (header[0]) { 9526 PetscCall(ISCreate(comm, &is)); 9527 PetscCall(ISLoad(is, viewer)); 9528 PetscCall(PCBDDCSetDirichletBoundaries(pc, is)); 9529 PetscCall(ISDestroy(&is)); 9530 } 9531 if (header[1]) { 9532 PetscCall(ISCreate(comm, &is)); 9533 PetscCall(ISLoad(is, viewer)); 9534 PetscCall(PCBDDCSetNeumannBoundaries(pc, is)); 9535 PetscCall(ISDestroy(&is)); 9536 } 9537 if (header[2]) { 9538 IS *isarray; 9539 9540 PetscCall(PetscMalloc1(header[2], &isarray)); 9541 for (PetscInt i = 0; i < header[2]; i++) { 9542 PetscCall(ISCreate(comm, &isarray[i])); 9543 PetscCall(ISLoad(isarray[i], viewer)); 9544 } 9545 PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray)); 9546 for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i])); 9547 PetscCall(PetscFree(isarray)); 9548 } 9549 if (header[3]) { 9550 PetscCall(ISCreate(comm, &is)); 9551 PetscCall(ISLoad(is, viewer)); 9552 PetscCall(PCBDDCSetPrimalVerticesIS(pc, is)); 9553 PetscCall(ISDestroy(&is)); 9554 } 9555 if (header[4]) { 9556 PetscCall(MatCreate(comm, &A)); 9557 PetscCall(MatSetType(A, MATAIJ)); 9558 PetscCall(MatLoad(A, viewer)); 9559 PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8])); 9560 PetscCall(MatDestroy(&A)); 9561 } 9562 if (header[9]) { 9563 PetscCall(MatCreate(comm, &A)); 9564 PetscCall(MatSetType(A, MATIS)); 9565 PetscCall(MatLoad(A, viewer)); 9566 PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL)); 9567 PetscCall(MatDestroy(&A)); 9568 } 9569 } else { 9570 header[0] = (PetscInt) !!pcbddc->DirichletBoundariesLocal; 9571 header[1] = (PetscInt) !!pcbddc->NeumannBoundariesLocal; 9572 header[2] = pcbddc->n_ISForDofsLocal; 9573 header[3] = (PetscInt) !!pcbddc->user_primal_vertices_local; 9574 header[4] = (PetscInt) !!pcbddc->discretegradient; 9575 header[5] = pcbddc->nedorder; 9576 header[6] = pcbddc->nedfield; 9577 header[7] = (PetscInt)pcbddc->nedglobal; 9578 header[8] = (PetscInt)pcbddc->conforming; 9579 header[9] = (PetscInt) !!pcbddc->divudotp; 9580 header[10] = (PetscInt)pcbddc->divudotp_trans; 9581 if (header[4]) header[3] = 0; 9582 9583 PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT)); 9584 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer)); 9585 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer)); 9586 for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer)); 9587 if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer)); 9588 if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer)); 9589 if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer)); 9590 } 9591 PetscCall(PetscViewerDestroy(&viewer)); 9592 PetscFunctionReturn(PETSC_SUCCESS); 9593 } 9594 9595 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9596 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9597 { 9598 Mat At; 9599 IS rows; 9600 PetscInt rst, ren; 9601 PetscLayout rmap; 9602 9603 PetscFunctionBegin; 9604 rst = ren = 0; 9605 if (ccomm != MPI_COMM_NULL) { 9606 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 9607 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 9608 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 9609 PetscCall(PetscLayoutSetUp(rmap)); 9610 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 9611 } 9612 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 9613 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 9614 PetscCall(ISDestroy(&rows)); 9615 9616 if (ccomm != MPI_COMM_NULL) { 9617 Mat_MPIAIJ *a, *b; 9618 IS from, to; 9619 Vec gvec; 9620 PetscInt lsize; 9621 9622 PetscCall(MatCreate(ccomm, B)); 9623 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 9624 PetscCall(MatSetType(*B, MATAIJ)); 9625 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 9626 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9627 a = (Mat_MPIAIJ *)At->data; 9628 b = (Mat_MPIAIJ *)(*B)->data; 9629 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 9630 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 9631 PetscCall(PetscObjectReference((PetscObject)a->A)); 9632 PetscCall(PetscObjectReference((PetscObject)a->B)); 9633 b->A = a->A; 9634 b->B = a->B; 9635 9636 b->donotstash = a->donotstash; 9637 b->roworiented = a->roworiented; 9638 b->rowindices = NULL; 9639 b->rowvalues = NULL; 9640 b->getrowactive = PETSC_FALSE; 9641 9642 (*B)->rmap = rmap; 9643 (*B)->factortype = A->factortype; 9644 (*B)->assembled = PETSC_TRUE; 9645 (*B)->insertmode = NOT_SET_VALUES; 9646 (*B)->preallocated = PETSC_TRUE; 9647 9648 if (a->colmap) { 9649 #if defined(PETSC_USE_CTABLE) 9650 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 9651 #else 9652 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9653 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9654 #endif 9655 } else b->colmap = NULL; 9656 if (a->garray) { 9657 PetscInt len; 9658 len = a->B->cmap->n; 9659 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9660 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9661 } else b->garray = NULL; 9662 9663 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9664 b->lvec = a->lvec; 9665 9666 /* cannot use VecScatterCopy */ 9667 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9668 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9669 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9670 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9671 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9672 PetscCall(ISDestroy(&from)); 9673 PetscCall(ISDestroy(&to)); 9674 PetscCall(VecDestroy(&gvec)); 9675 } 9676 PetscCall(MatDestroy(&At)); 9677 PetscFunctionReturn(PETSC_SUCCESS); 9678 } 9679 9680 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */ 9681 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA) 9682 { 9683 PetscBool isaij; 9684 MPI_Comm comm; 9685 9686 PetscFunctionBegin; 9687 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9688 PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, "")); 9689 PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented"); 9690 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij)); 9691 if (isaij) { /* SeqAIJ supports repeated rows */ 9692 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA)); 9693 } else { 9694 Mat A_loc; 9695 Mat_SeqAIJ *da; 9696 PetscSF sf; 9697 PetscInt ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata; 9698 PetscScalar *daa; 9699 const PetscInt *idxs; 9700 const PetscSFNode *iremotes; 9701 PetscSFNode *remotes; 9702 9703 /* SF for incoming rows */ 9704 PetscCall(PetscSFCreate(comm, &sf)); 9705 PetscCall(ISGetLocalSize(rows, &ni)); 9706 PetscCall(ISGetIndices(rows, &idxs)); 9707 PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs)); 9708 PetscCall(ISRestoreIndices(rows, &idxs)); 9709 9710 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc)); 9711 da = (Mat_SeqAIJ *)A_loc->data; 9712 PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata)); 9713 for (PetscInt i = 0; i < m; i++) { 9714 rdata[2 * i + 0] = da->i[i + 1] - da->i[i]; 9715 rdata[2 * i + 1] = da->i[i]; 9716 } 9717 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9718 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9719 PetscCall(PetscMalloc1(ni + 1, &di)); 9720 di[0] = 0; 9721 for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0]; 9722 PetscCall(PetscMalloc1(di[ni], &dj)); 9723 PetscCall(PetscMalloc1(di[ni], &daa)); 9724 PetscCall(PetscMalloc1(di[ni], &remotes)); 9725 9726 PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes)); 9727 9728 /* SF graph for nonzeros */ 9729 c = 0; 9730 for (PetscInt i = 0; i < ni; i++) { 9731 const PetscMPIInt rank = (PetscMPIInt)iremotes[i].rank; 9732 const PetscInt rsize = ldata[2 * i]; 9733 for (PetscInt j = 0; j < rsize; j++) { 9734 remotes[c].rank = rank; 9735 remotes[c].index = ldata[2 * i + 1] + j; 9736 c++; 9737 } 9738 } 9739 PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]); 9740 PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER)); 9741 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9742 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9743 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9744 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9745 9746 PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA)); 9747 PetscCall(MatDestroy(&A_loc)); 9748 PetscCall(PetscSFDestroy(&sf)); 9749 PetscCall(PetscFree(di)); 9750 PetscCall(PetscFree(dj)); 9751 PetscCall(PetscFree(daa)); 9752 PetscCall(PetscFree(remotes)); 9753 PetscCall(PetscFree2(ldata, rdata)); 9754 } 9755 PetscFunctionReturn(PETSC_SUCCESS); 9756 } 9757