1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <petsc/private/kernels/blockinvert.h> 5 #include <../src/mat/impls/dense/seq/dense.h> 6 #include <petscdmplex.h> 7 #include <petscblaslapack.h> 8 #include <petsc/private/sfimpl.h> 9 #include <petsc/private/dmpleximpl.h> 10 #include <petscdmda.h> 11 12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 13 14 /* if range is true, it returns B s.t. span{B} = range(A) 15 if range is false, it returns B s.t. range(B) _|_ range(A) */ 16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 17 { 18 PetscScalar *uwork, *data, *U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM, bN, lwork, lierr, di = 1; 21 PetscInt ulw, i, nr, nc, n; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 PetscCall(MatGetSize(A, &nr, &nc)); 28 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 33 PetscCall(PetscMalloc1(ulw, &uwork)); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr, nc); 39 if (!rwork) { 40 PetscCall(PetscMalloc1(n, &sing)); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 PetscCall(PetscMalloc1(nr * nr, &U)); 47 PetscCall(PetscBLASIntCast(nr, &bM)); 48 PetscCall(PetscBLASIntCast(nc, &bN)); 49 PetscCall(PetscBLASIntCast(ulw, &lwork)); 50 PetscCall(MatDenseGetArray(A, &data)); 51 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 54 #else 55 PetscCall(PetscMalloc1(5 * n, &rwork2)); 56 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 57 PetscCall(PetscFree(rwork2)); 58 #endif 59 PetscCall(PetscFPTrapPop()); 60 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 61 PetscCall(MatDenseRestoreArray(A, &data)); 62 for (i = 0; i < n; i++) 63 if (sing[i] < PETSC_SMALL) break; 64 if (!rwork) PetscCall(PetscFree(sing)); 65 if (!work) PetscCall(PetscFree(uwork)); 66 /* create B */ 67 if (!range) { 68 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 69 PetscCall(MatDenseGetArray(*B, &data)); 70 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 71 } else { 72 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 73 PetscCall(MatDenseGetArray(*B, &data)); 74 PetscCall(PetscArraycpy(data, U, i * nr)); 75 } 76 PetscCall(MatDenseRestoreArray(*B, &data)); 77 PetscCall(PetscFree(U)); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* TODO REMOVE */ 82 #if defined(PRINT_GDET) 83 static int inc = 0; 84 static int lev = 0; 85 #endif 86 87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 88 { 89 Mat GE, GEd; 90 PetscInt rsize, csize, esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 PetscCall(ISGetSize(edge, &esize)); 95 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 96 PetscCall(ISGetSize(extrow, &rsize)); 97 PetscCall(ISGetSize(extcol, &csize)); 98 99 /* gradients */ 100 ptr = work + 5 * esize; 101 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 102 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 103 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 104 PetscCall(MatDestroy(&GE)); 105 106 /* constants */ 107 ptr += rsize * csize; 108 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 109 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 110 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 111 PetscCall(MatDestroy(&GE)); 112 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 113 PetscCall(MatDestroy(&GEd)); 114 115 if (corners) { 116 Mat GEc; 117 const PetscScalar *vals; 118 PetscScalar v; 119 120 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 121 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 122 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 123 /* v = PetscAbsScalar(vals[0]); */ 124 v = 1.; 125 cvals[0] = vals[0] / v; 126 cvals[1] = vals[1] / v; 127 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 128 PetscCall(MatScale(*GKins, 1. / v)); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 134 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 135 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 136 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 137 PetscCall(MatView(GEc, viewer)); 138 PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK")); 139 PetscCall(MatView(*GKins, viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 141 PetscCall(MatView(GEd, viewer)); 142 PetscCall(PetscViewerDestroy(&viewer)); 143 } 144 #endif 145 PetscCall(MatDestroy(&GEd)); 146 PetscCall(MatDestroy(&GEc)); 147 } 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *); 152 153 PetscErrorCode PCBDDCNedelecSupport(PC pc) 154 { 155 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 156 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 157 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 160 MPI_Comm comm; 161 IS lned, primals, allprimals, nedfieldlocal, elements_corners = NULL; 162 IS *eedges, *extrows, *extcols, *alleedges; 163 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 164 PetscScalar *vals, *work; 165 PetscReal *rwork; 166 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 167 PetscInt ne, nv, Lv, order, n, field; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 187 /* Command line customization */ 188 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 189 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 190 /* print debug info and adaptive order TODO: to be removed */ 191 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 192 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 193 PetscOptionsEnd(); 194 195 /* Return if there are no edges in the decomposition */ 196 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 197 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 198 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 199 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 200 lrc[0] = PETSC_FALSE; 201 for (i = 0; i < n; i++) { 202 if (PetscRealPart(vals[i]) > 2.) { 203 lrc[0] = PETSC_TRUE; 204 break; 205 } 206 } 207 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 208 PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm)); 209 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 210 211 /* Get Nedelec field */ 212 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 213 if (pcbddc->n_ISForDofsLocal && field >= 0) { 214 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 215 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 216 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 217 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 218 ne = n; 219 nedfieldlocal = NULL; 220 global = PETSC_TRUE; 221 } else if (field == PETSC_DECIDE) { 222 PetscInt rst, ren, *idx; 223 224 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 225 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 226 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 227 for (i = rst; i < ren; i++) { 228 PetscInt nc; 229 230 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 231 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 232 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 233 } 234 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 235 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 236 PetscCall(PetscMalloc1(n, &idx)); 237 for (i = 0, ne = 0; i < n; i++) 238 if (matis->sf_leafdata[i]) idx[ne++] = i; 239 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 240 } else { 241 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 242 } 243 244 /* Sanity checks */ 245 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 246 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 247 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 248 249 /* Just set primal dofs and return */ 250 if (setprimal) { 251 IS enedfieldlocal; 252 PetscInt *eidxs; 253 254 PetscCall(PetscMalloc1(ne, &eidxs)); 255 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 256 if (nedfieldlocal) { 257 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 258 for (i = 0, cum = 0; i < ne; i++) { 259 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 260 } 261 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 262 } else { 263 for (i = 0, cum = 0; i < ne; i++) { 264 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 265 } 266 } 267 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 268 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 269 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 270 PetscCall(PetscFree(eidxs)); 271 PetscCall(ISDestroy(&nedfieldlocal)); 272 PetscCall(ISDestroy(&enedfieldlocal)); 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 /* Compute some l2g maps */ 277 if (nedfieldlocal) { 278 IS is; 279 280 /* need to map from the local Nedelec field to local numbering */ 281 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 282 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 283 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 284 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 285 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 286 if (global) { 287 PetscCall(PetscObjectReference((PetscObject)al2g)); 288 el2g = al2g; 289 } else { 290 IS gis; 291 292 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 293 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 294 PetscCall(ISDestroy(&gis)); 295 } 296 PetscCall(ISDestroy(&is)); 297 } else { 298 /* one ref for the destruction of al2g, one for el2g */ 299 PetscCall(PetscObjectReference((PetscObject)al2g)); 300 PetscCall(PetscObjectReference((PetscObject)al2g)); 301 el2g = al2g; 302 fl2g = NULL; 303 } 304 305 /* Start communication to drop connections for interior edges (for cc analysis only) */ 306 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 307 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 308 if (nedfieldlocal) { 309 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 310 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 311 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 312 } else { 313 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 314 } 315 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 316 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 317 318 /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting 319 Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */ 320 if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners)); 321 322 /* drop connections with interior edges to avoid unneeded communications and memory movements */ 323 PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view")); 324 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 325 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 326 if (global) { 327 PetscInt rst; 328 329 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 330 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 331 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 332 } 333 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 334 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 335 } else { 336 PetscInt *tbz; 337 338 PetscCall(PetscMalloc1(ne, &tbz)); 339 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 340 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 341 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 342 for (i = 0, cum = 0; i < ne; i++) 343 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 344 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 345 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 346 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 347 PetscCall(PetscFree(tbz)); 348 } 349 350 /* Extract subdomain relevant rows of G */ 351 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 352 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 353 PetscCall(MatAIJExtractRows(G, lned, &lGall)); 354 /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */ 355 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 356 PetscCall(ISDestroy(&lned)); 357 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 358 PetscCall(MatDestroy(&lGall)); 359 PetscCall(MatISGetLocalMat(lGis, &lG)); 360 if (matis->allow_repeated) { /* multi-element support */ 361 Mat *lGn, B; 362 IS *is_rows, *tcols, tmap, nmap; 363 PetscInt subnv; 364 const PetscInt *subvidxs; 365 ISLocalToGlobalMapping mapn; 366 367 PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn)); 368 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows)); 369 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols)); 370 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 371 if (fl2g) { 372 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i])); 373 } else { 374 PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i])); 375 is_rows[i] = pcbddc->local_subs[i]; 376 } 377 PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)])); 378 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn)); 379 PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv)); 380 PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs)); 381 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i])); 382 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs)); 383 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 384 } 385 386 /* Create new MATIS with repeated vertices */ 387 PetscCall(MatCreate(comm, &B)); 388 PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N)); 389 PetscCall(MatSetType(B, MATIS)); 390 PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE)); 391 PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap)); 392 PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap)); 393 PetscCall(ISDestroy(&tmap)); 394 PetscCall(ISGetLocalSize(nmap, &subnv)); 395 PetscCall(ISGetIndices(nmap, &subvidxs)); 396 PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap)); 397 PetscCall(ISRestoreIndices(nmap, &subvidxs)); 398 PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn)); 399 PetscCall(ISDestroy(&tmap)); 400 PetscCall(ISDestroy(&nmap)); 401 PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn)); 402 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 403 PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG)); 404 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 405 PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)])); 406 PetscCall(ISDestroy(&is_rows[i])); 407 PetscCall(ISDestroy(&tcols[i])); 408 } 409 PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG)); 410 PetscCall(PetscFree(lGn)); 411 PetscCall(PetscFree(is_rows)); 412 PetscCall(PetscFree(tcols)); 413 PetscCall(MatISSetLocalMat(B, lG)); 414 PetscCall(MatDestroy(&lG)); 415 416 PetscCall(MatDestroy(&lGis)); 417 lGis = B; 418 419 lGis->assembled = PETSC_TRUE; 420 } 421 PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view")); 422 423 /* SF for nodal dofs communications */ 424 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 425 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 426 PetscCall(PetscObjectReference((PetscObject)vl2g)); 427 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 428 PetscCall(PetscSFCreate(comm, &sfv)); 429 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 430 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 431 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 432 433 if (elements_corners) { 434 IS tmp; 435 Vec global, local; 436 Mat_IS *tGis = (Mat_IS *)lGis->data; 437 438 PetscCall(MatCreateVecs(lGis, &global, NULL)); 439 PetscCall(MatCreateVecs(tGis->A, &local, NULL)); 440 PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp)); 441 PetscCall(VecDestroy(&global)); 442 PetscCall(VecDestroy(&local)); 443 elements_corners = tmp; 444 } 445 446 /* Destroy temporary G */ 447 PetscCall(MatISGetLocalMat(lGis, &lG)); 448 PetscCall(PetscObjectReference((PetscObject)lG)); 449 PetscCall(MatDestroy(&G)); 450 PetscCall(MatDestroy(&lGis)); 451 452 if (print) { 453 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 454 PetscCall(MatView(lG, NULL)); 455 } 456 457 /* Save lG for values insertion in change of basis */ 458 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 459 460 /* Analyze the edge-nodes connections (duplicate lG) */ 461 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 462 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 463 PetscCall(PetscBTCreate(nv, &btv)); 464 PetscCall(PetscBTCreate(ne, &bte)); 465 PetscCall(PetscBTCreate(ne, &btb)); 466 PetscCall(PetscBTCreate(ne, &btbd)); 467 /* need to import the boundary specification to ensure the 468 proper detection of coarse edges' endpoints */ 469 if (pcbddc->DirichletBoundariesLocal) { 470 IS is; 471 472 if (fl2g) { 473 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 474 } else { 475 is = pcbddc->DirichletBoundariesLocal; 476 } 477 PetscCall(ISGetLocalSize(is, &cum)); 478 PetscCall(ISGetIndices(is, &idxs)); 479 for (i = 0; i < cum; i++) { 480 if (idxs[i] >= 0 && idxs[i] < ne) { 481 PetscCall(PetscBTSet(btb, idxs[i])); 482 PetscCall(PetscBTSet(btbd, idxs[i])); 483 } 484 } 485 PetscCall(ISRestoreIndices(is, &idxs)); 486 if (fl2g) PetscCall(ISDestroy(&is)); 487 } 488 if (pcbddc->NeumannBoundariesLocal) { 489 IS is; 490 491 if (fl2g) { 492 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 493 } else { 494 is = pcbddc->NeumannBoundariesLocal; 495 } 496 PetscCall(ISGetLocalSize(is, &cum)); 497 PetscCall(ISGetIndices(is, &idxs)); 498 for (i = 0; i < cum; i++) { 499 if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i])); 500 } 501 PetscCall(ISRestoreIndices(is, &idxs)); 502 if (fl2g) PetscCall(ISDestroy(&is)); 503 } 504 505 /* Count neighs per dof */ 506 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL)); 507 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL)); 508 509 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 510 for proper detection of coarse edges' endpoints */ 511 PetscCall(PetscBTCreate(ne, &btee)); 512 for (i = 0; i < ne; i++) { 513 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 514 } 515 PetscCall(PetscMalloc1(ne, &marks)); 516 if (!conforming) { 517 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 518 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 519 } 520 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 521 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 522 cum = 0; 523 for (i = 0; i < ne; i++) { 524 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 525 if (!PetscBTLookup(btee, i)) { 526 marks[cum++] = i; 527 continue; 528 } 529 /* set badly connected edge dofs as primal */ 530 if (!conforming) { 531 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 532 marks[cum++] = i; 533 PetscCall(PetscBTSet(bte, i)); 534 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 535 } else { 536 /* every edge dofs should be connected through a certain number of nodal dofs 537 to other edge dofs belonging to coarse edges 538 - at most 2 endpoints 539 - order-1 interior nodal dofs 540 - no undefined nodal dofs (nconn < order) 541 */ 542 PetscInt ends = 0, ints = 0, undef = 0; 543 for (j = ii[i]; j < ii[i + 1]; j++) { 544 PetscInt v = jj[j], k; 545 PetscInt nconn = iit[v + 1] - iit[v]; 546 for (k = iit[v]; k < iit[v + 1]; k++) 547 if (!PetscBTLookup(btee, jjt[k])) nconn--; 548 if (nconn > order) ends++; 549 else if (nconn == order) ints++; 550 else undef++; 551 } 552 if (undef || ends > 2 || ints != order - 1) { 553 marks[cum++] = i; 554 PetscCall(PetscBTSet(bte, i)); 555 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 556 } 557 } 558 } 559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 560 if (!order && ii[i + 1] != ii[i]) { 561 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 562 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 563 } 564 } 565 PetscCall(PetscBTDestroy(&btee)); 566 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 567 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 568 if (!conforming) { 569 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 570 PetscCall(MatDestroy(&lGt)); 571 } 572 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 573 574 /* identify splitpoints and corner candidates */ 575 PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots)); 576 PetscCall(PetscBTCreate(nv, &btvcand)); 577 if (elements_corners) { 578 PetscCall(ISGetLocalSize(elements_corners, &cum)); 579 PetscCall(ISGetIndices(elements_corners, &idxs)); 580 for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i])); 581 PetscCall(ISRestoreIndices(elements_corners, &idxs)); 582 } 583 584 if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */ 585 PetscSF emlsf, vmlsf; 586 PetscInt *eleaves, *vleaves, *meleaves, *mvleaves; 587 PetscInt cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl; 588 589 PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs)); 590 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 591 PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs)); 592 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 593 594 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf)); 595 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf)); 596 597 PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL)); 598 for (i = 0, j = 0; i < ne; i++) j += ecount[i]; 599 PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne); 600 PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j); 601 602 PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL)); 603 for (i = 0, j = 0; i < nv; i++) j += vcount[i]; 604 PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv); 605 PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j); 606 607 PetscCall(PetscMalloc1(ne, &eleaves)); 608 PetscCall(PetscMalloc1(nv, &vleaves)); 609 for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX; 610 for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX; 611 PetscCall(PetscMalloc1(emnl, &meleaves)); 612 PetscCall(PetscMalloc1(vmnl, &mvleaves)); 613 614 PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm)); 615 PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 616 for (i = 0; i < n_subs; i++) { 617 const PetscInt *idxs; 618 const PetscInt subid = cum_subs + i; 619 PetscInt ns; 620 621 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns)); 622 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 623 for (j = 0; j < ns; j++) { 624 const PetscInt e = idxs[j]; 625 626 eleaves[e] = subid; 627 for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid; 628 } 629 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 630 } 631 PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 632 PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 633 PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 634 PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 635 PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 636 PetscCall(PetscFree(eleaves)); 637 PetscCall(PetscFree(vleaves)); 638 639 PetscCall(PetscMalloc1(ne + 1, &eneighs)); 640 eneighs[0] = meleaves; 641 for (i = 0; i < ne; i++) { 642 PetscCall(PetscSortInt(ecount[i], eneighs[i])); 643 eneighs[i + 1] = eneighs[i] + ecount[i]; 644 } 645 PetscCall(PetscMalloc1(nv + 1, &vneighs)); 646 vneighs[0] = mvleaves; 647 for (i = 0; i < nv; i++) { 648 PetscCall(PetscSortInt(vcount[i], vneighs[i])); 649 vneighs[i + 1] = vneighs[i] + vcount[i]; 650 } 651 } else { 652 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs)); 653 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs)); 654 } 655 656 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 657 if (print) { 658 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 659 PetscCall(MatView(lGe, NULL)); 660 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 661 PetscCall(MatView(lGt, NULL)); 662 } 663 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 664 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 665 for (i = 0; i < nv; i++) { 666 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 667 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 668 if (!order) { /* variable order */ 669 PetscReal vorder = 0.; 670 671 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 672 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 673 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 674 ord = 1; 675 } 676 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 677 const PetscInt e = jj[j]; 678 679 if (PetscBTLookup(btbd, e)) { 680 bdir = PETSC_TRUE; 681 break; 682 } 683 if (vc != ecount[e]) { 684 sneighs = PETSC_FALSE; 685 } else { 686 const PetscInt *vn = vneighs[i], *en = eneighs[e]; 687 688 for (PetscInt k = 0; k < vc; k++) { 689 if (vn[k] != en[k]) { 690 sneighs = PETSC_FALSE; 691 break; 692 } 693 } 694 } 695 } 696 if (elements_corners) test = 0; 697 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 698 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 699 PetscCall(PetscBTSet(btv, i)); 700 } else if (test == ord) { 701 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 702 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 703 PetscCall(PetscBTSet(btv, i)); 704 } else if (!elements_corners) { 705 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 706 PetscCall(PetscBTSet(btvcand, i)); 707 } 708 } 709 } 710 PetscCall(PetscBTDestroy(&btbd)); 711 712 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 713 if (order != 1) { 714 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 715 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 716 for (i = 0; i < nv; i++) { 717 if (PetscBTLookup(btvcand, i)) { 718 PetscBool found = PETSC_FALSE; 719 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 720 PetscInt k, e = jj[j]; 721 if (PetscBTLookup(bte, e)) continue; 722 for (k = iit[e]; k < iit[e + 1]; k++) { 723 PetscInt v = jjt[k]; 724 if (v != i && PetscBTLookup(btvcand, v)) { 725 found = PETSC_TRUE; 726 break; 727 } 728 } 729 } 730 if (!found) { 731 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 732 PetscCall(PetscBTClear(btvcand, i)); 733 } else { 734 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 735 } 736 } 737 } 738 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 739 } 740 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 741 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 742 PetscCall(MatDestroy(&lGe)); 743 744 /* Get the local G^T explicitly */ 745 PetscCall(MatDestroy(&lGt)); 746 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 747 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 748 749 /* Mark shared nodal dofs */ 750 PetscCall(PetscBTCreate(nv, &btvi)); 751 for (i = 0; i < nv; i++) { 752 if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i)); 753 } 754 755 if (matis->allow_repeated) { 756 PetscCall(PetscFree(eneighs[0])); 757 PetscCall(PetscFree(vneighs[0])); 758 PetscCall(PetscFree(eneighs)); 759 PetscCall(PetscFree(vneighs)); 760 } 761 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 762 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 763 764 /* communicate corners and splitpoints */ 765 PetscCall(PetscMalloc1(nv, &vmarks)); 766 PetscCall(PetscArrayzero(sfvleaves, nv)); 767 PetscCall(PetscArrayzero(sfvroots, Lv)); 768 for (i = 0; i < nv; i++) 769 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 770 771 if (print) { 772 IS tbz; 773 774 cum = 0; 775 for (i = 0; i < nv; i++) 776 if (sfvleaves[i]) vmarks[cum++] = i; 777 778 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 779 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 780 PetscCall(ISView(tbz, NULL)); 781 PetscCall(ISDestroy(&tbz)); 782 } 783 784 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 785 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 786 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 787 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 788 789 /* Zero rows of lGt corresponding to identified corners 790 and interior nodal dofs */ 791 cum = 0; 792 for (i = 0; i < nv; i++) { 793 if (sfvleaves[i]) { 794 vmarks[cum++] = i; 795 PetscCall(PetscBTSet(btv, i)); 796 } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 797 } 798 PetscCall(PetscBTDestroy(&btvi)); 799 if (print) { 800 IS tbz; 801 802 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 803 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 804 PetscCall(ISView(tbz, NULL)); 805 PetscCall(ISDestroy(&tbz)); 806 } 807 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 808 PetscCall(PetscFree(vmarks)); 809 PetscCall(PetscSFDestroy(&sfv)); 810 PetscCall(PetscFree2(sfvleaves, sfvroots)); 811 812 /* Recompute G */ 813 PetscCall(MatDestroy(&lG)); 814 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 815 if (print) { 816 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 817 PetscCall(MatView(lG, NULL)); 818 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 819 PetscCall(MatView(lGt, NULL)); 820 } 821 822 /* Get primal dofs (if any) */ 823 cum = 0; 824 for (i = 0; i < ne; i++) { 825 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 826 } 827 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 828 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 829 if (print) { 830 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 831 PetscCall(ISView(primals, NULL)); 832 } 833 PetscCall(PetscBTDestroy(&bte)); 834 /* TODO: what if the user passed in some of them ? */ 835 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 836 PetscCall(ISDestroy(&primals)); 837 838 /* Compute edge connectivity */ 839 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 840 841 /* Symbolic conn = lG*lGt */ 842 if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */ 843 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 844 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 845 PetscCall(MatProductSetAlgorithm(conn, "default")); 846 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 847 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 848 PetscCall(MatProductSetFromOptions(conn)); 849 PetscCall(MatProductSymbolic(conn)); 850 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 851 if (fl2g) { 852 PetscBT btf; 853 PetscInt *iia, *jja, *iiu, *jju; 854 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 855 856 /* create CSR for all local dofs */ 857 PetscCall(PetscMalloc1(n + 1, &iia)); 858 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 859 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 860 iiu = pcbddc->mat_graph->xadj; 861 jju = pcbddc->mat_graph->adjncy; 862 } else if (pcbddc->use_local_adj) { 863 rest = PETSC_TRUE; 864 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 865 } else { 866 free = PETSC_TRUE; 867 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 868 iiu[0] = 0; 869 for (i = 0; i < n; i++) { 870 iiu[i + 1] = i + 1; 871 jju[i] = -1; 872 } 873 } 874 875 /* import sizes of CSR */ 876 iia[0] = 0; 877 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 878 879 /* overwrite entries corresponding to the Nedelec field */ 880 PetscCall(PetscBTCreate(n, &btf)); 881 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 882 for (i = 0; i < ne; i++) { 883 PetscCall(PetscBTSet(btf, idxs[i])); 884 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 885 } 886 887 /* iia in CSR */ 888 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 889 890 /* jja in CSR */ 891 PetscCall(PetscMalloc1(iia[n], &jja)); 892 for (i = 0; i < n; i++) 893 if (!PetscBTLookup(btf, i)) 894 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 895 896 /* map edge dofs connectivity */ 897 if (jj) { 898 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 899 for (i = 0; i < ne; i++) { 900 PetscInt e = idxs[i]; 901 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 902 } 903 } 904 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 905 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES)); 906 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 907 if (free) PetscCall(PetscFree2(iiu, jju)); 908 PetscCall(PetscBTDestroy(&btf)); 909 } else { 910 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES)); 911 } 912 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 913 PetscCall(MatDestroy(&conn)); 914 } 915 916 /* Analyze interface for edge dofs */ 917 PetscCall(PCBDDCAnalyzeInterface(pc)); 918 pcbddc->mat_graph->twodim = PETSC_FALSE; 919 920 /* Get coarse edges in the edge space */ 921 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 922 923 if (fl2g) { 924 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 925 PetscCall(PetscMalloc1(nee, &eedges)); 926 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 927 } else { 928 eedges = alleedges; 929 primals = allprimals; 930 } 931 932 /* Mark fine edge dofs with their coarse edge id */ 933 PetscCall(PetscArrayzero(marks, ne)); 934 PetscCall(ISGetLocalSize(primals, &cum)); 935 PetscCall(ISGetIndices(primals, &idxs)); 936 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 937 PetscCall(ISRestoreIndices(primals, &idxs)); 938 if (print) { 939 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 940 PetscCall(ISView(primals, NULL)); 941 } 942 943 maxsize = 0; 944 for (i = 0; i < nee; i++) { 945 PetscInt size, mark = i + 1; 946 947 PetscCall(ISGetLocalSize(eedges[i], &size)); 948 PetscCall(ISGetIndices(eedges[i], &idxs)); 949 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 950 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 951 maxsize = PetscMax(maxsize, size); 952 } 953 954 /* Find coarse edge endpoints */ 955 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 956 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 957 for (i = 0; i < nee; i++) { 958 PetscInt mark = i + 1, size; 959 960 PetscCall(ISGetLocalSize(eedges[i], &size)); 961 if (!size && nedfieldlocal) continue; 962 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 963 PetscCall(ISGetIndices(eedges[i], &idxs)); 964 if (print) { 965 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 966 PetscCall(ISView(eedges[i], NULL)); 967 } 968 for (j = 0; j < size; j++) { 969 PetscInt k, ee = idxs[j]; 970 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 971 for (k = ii[ee]; k < ii[ee + 1]; k++) { 972 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 973 if (PetscBTLookup(btv, jj[k])) { 974 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 975 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 976 PetscInt k2; 977 PetscBool corner = PETSC_FALSE; 978 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 979 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 980 /* it's a corner if either is connected with an edge dof belonging to a different cc or 981 if the edge dof lie on the natural part of the boundary */ 982 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 983 corner = PETSC_TRUE; 984 break; 985 } 986 } 987 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 988 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 989 PetscCall(PetscBTSet(btv, jj[k])); 990 } else { 991 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 992 } 993 } 994 } 995 } 996 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 997 } 998 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 999 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1000 PetscCall(PetscBTDestroy(&btb)); 1001 1002 /* Reset marked primal dofs */ 1003 PetscCall(ISGetLocalSize(primals, &cum)); 1004 PetscCall(ISGetIndices(primals, &idxs)); 1005 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 1006 PetscCall(ISRestoreIndices(primals, &idxs)); 1007 1008 /* Now use the initial lG */ 1009 PetscCall(MatDestroy(&lG)); 1010 PetscCall(MatDestroy(&lGt)); 1011 lG = lGinit; 1012 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 1013 1014 /* Compute extended cols indices */ 1015 PetscCall(PetscBTCreate(nv, &btvc)); 1016 PetscCall(PetscBTCreate(nee, &bter)); 1017 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1018 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 1019 i *= maxsize; 1020 PetscCall(PetscCalloc1(nee, &extcols)); 1021 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 1022 eerr = PETSC_FALSE; 1023 for (i = 0; i < nee; i++) { 1024 PetscInt size, found = 0; 1025 1026 cum = 0; 1027 PetscCall(ISGetLocalSize(eedges[i], &size)); 1028 if (!size && nedfieldlocal) continue; 1029 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1030 PetscCall(ISGetIndices(eedges[i], &idxs)); 1031 PetscCall(PetscBTMemzero(nv, btvc)); 1032 for (j = 0; j < size; j++) { 1033 PetscInt k, ee = idxs[j]; 1034 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1035 PetscInt vv = jj[k]; 1036 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 1037 else if (!PetscBTLookupSet(btvc, vv)) found++; 1038 } 1039 } 1040 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1041 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1042 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1043 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1044 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1045 /* it may happen that endpoints are not defined at this point 1046 if it is the case, mark this edge for a second pass */ 1047 if (cum != size - 1 || found != 2) { 1048 PetscCall(PetscBTSet(bter, i)); 1049 if (print) { 1050 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 1051 PetscCall(ISView(eedges[i], NULL)); 1052 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 1053 PetscCall(ISView(extcols[i], NULL)); 1054 } 1055 eerr = PETSC_TRUE; 1056 } 1057 } 1058 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 1059 PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm)); 1060 if (done) { 1061 PetscInt *newprimals; 1062 1063 PetscCall(PetscMalloc1(ne, &newprimals)); 1064 PetscCall(ISGetLocalSize(primals, &cum)); 1065 PetscCall(ISGetIndices(primals, &idxs)); 1066 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 1067 PetscCall(ISRestoreIndices(primals, &idxs)); 1068 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1069 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 1070 for (i = 0; i < nee; i++) { 1071 PetscBool has_candidates = PETSC_FALSE; 1072 if (PetscBTLookup(bter, i)) { 1073 PetscInt size, mark = i + 1; 1074 1075 PetscCall(ISGetLocalSize(eedges[i], &size)); 1076 PetscCall(ISGetIndices(eedges[i], &idxs)); 1077 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1078 for (j = 0; j < size; j++) { 1079 PetscInt k, ee = idxs[j]; 1080 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 1081 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1082 /* set all candidates located on the edge as corners */ 1083 if (PetscBTLookup(btvcand, jj[k])) { 1084 PetscInt k2, vv = jj[k]; 1085 has_candidates = PETSC_TRUE; 1086 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 1087 PetscCall(PetscBTSet(btv, vv)); 1088 /* set all edge dofs connected to candidate as primals */ 1089 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 1090 if (marks[jjt[k2]] == mark) { 1091 PetscInt k3, ee2 = jjt[k2]; 1092 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 1093 newprimals[cum++] = ee2; 1094 /* finally set the new corners */ 1095 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 1096 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 1097 PetscCall(PetscBTSet(btv, jj[k3])); 1098 } 1099 } 1100 } 1101 } else { 1102 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 1103 } 1104 } 1105 } 1106 if (!has_candidates) { /* circular edge */ 1107 PetscInt k, ee = idxs[0], *tmarks; 1108 1109 PetscCall(PetscCalloc1(ne, &tmarks)); 1110 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 1111 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1112 PetscInt k2; 1113 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 1114 PetscCall(PetscBTSet(btv, jj[k])); 1115 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 1116 } 1117 for (j = 0; j < size; j++) { 1118 if (tmarks[idxs[j]] > 1) { 1119 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 1120 newprimals[cum++] = idxs[j]; 1121 } 1122 } 1123 PetscCall(PetscFree(tmarks)); 1124 } 1125 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1126 } 1127 PetscCall(ISDestroy(&extcols[i])); 1128 } 1129 PetscCall(PetscFree(extcols)); 1130 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1131 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 1132 if (fl2g) { 1133 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 1134 PetscCall(ISDestroy(&primals)); 1135 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1136 PetscCall(PetscFree(eedges)); 1137 } 1138 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1139 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 1140 PetscCall(PetscFree(newprimals)); 1141 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 1142 PetscCall(ISDestroy(&primals)); 1143 PetscCall(PCBDDCAnalyzeInterface(pc)); 1144 pcbddc->mat_graph->twodim = PETSC_FALSE; 1145 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1146 if (fl2g) { 1147 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 1148 PetscCall(PetscMalloc1(nee, &eedges)); 1149 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 1150 } else { 1151 eedges = alleedges; 1152 primals = allprimals; 1153 } 1154 PetscCall(PetscCalloc1(nee, &extcols)); 1155 1156 /* Mark again */ 1157 PetscCall(PetscArrayzero(marks, ne)); 1158 for (i = 0; i < nee; i++) { 1159 PetscInt size, mark = i + 1; 1160 1161 PetscCall(ISGetLocalSize(eedges[i], &size)); 1162 PetscCall(ISGetIndices(eedges[i], &idxs)); 1163 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1164 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1165 } 1166 if (print) { 1167 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1168 PetscCall(ISView(primals, NULL)); 1169 } 1170 1171 /* Recompute extended cols */ 1172 eerr = PETSC_FALSE; 1173 for (i = 0; i < nee; i++) { 1174 PetscInt size; 1175 1176 cum = 0; 1177 PetscCall(ISGetLocalSize(eedges[i], &size)); 1178 if (!size && nedfieldlocal) continue; 1179 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1180 PetscCall(ISGetIndices(eedges[i], &idxs)); 1181 for (j = 0; j < size; j++) { 1182 PetscInt k, ee = idxs[j]; 1183 for (k = ii[ee]; k < ii[ee + 1]; k++) 1184 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1185 } 1186 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1187 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1188 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1189 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1190 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1191 if (cum != size - 1) { 1192 if (print) { 1193 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1194 PetscCall(ISView(eedges[i], NULL)); 1195 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1196 PetscCall(ISView(extcols[i], NULL)); 1197 } 1198 eerr = PETSC_TRUE; 1199 } 1200 } 1201 } 1202 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1203 PetscCall(PetscFree2(extrow, gidxs)); 1204 PetscCall(PetscBTDestroy(&bter)); 1205 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1206 /* an error should not occur at this point */ 1207 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1208 1209 /* Check the number of endpoints */ 1210 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1211 PetscCall(PetscMalloc1(2 * nee, &corners)); 1212 PetscCall(PetscMalloc1(nee, &cedges)); 1213 for (i = 0; i < nee; i++) { 1214 PetscInt size, found = 0, gc[2]; 1215 1216 /* init with defaults */ 1217 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1218 PetscCall(ISGetLocalSize(eedges[i], &size)); 1219 if (!size && nedfieldlocal) continue; 1220 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1221 PetscCall(ISGetIndices(eedges[i], &idxs)); 1222 PetscCall(PetscBTMemzero(nv, btvc)); 1223 for (j = 0; j < size; j++) { 1224 PetscInt k, ee = idxs[j]; 1225 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1226 PetscInt vv = jj[k]; 1227 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1228 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i); 1229 corners[i * 2 + found++] = vv; 1230 } 1231 } 1232 } 1233 if (found != 2) { 1234 PetscInt e; 1235 if (fl2g) { 1236 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1237 } else { 1238 e = idxs[0]; 1239 } 1240 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1241 } 1242 1243 /* get primal dof index on this coarse edge */ 1244 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1245 if (gc[0] > gc[1]) { 1246 PetscInt swap = corners[2 * i]; 1247 corners[2 * i] = corners[2 * i + 1]; 1248 corners[2 * i + 1] = swap; 1249 } 1250 cedges[i] = idxs[size - 1]; 1251 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1252 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1253 } 1254 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1255 PetscCall(PetscBTDestroy(&btvc)); 1256 1257 if (PetscDefined(USE_DEBUG)) { 1258 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1259 not interfere with neighbouring coarse edges */ 1260 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1261 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1262 for (i = 0; i < nv; i++) { 1263 PetscInt emax = 0, eemax = 0; 1264 1265 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1266 PetscCall(PetscArrayzero(emarks, nee + 1)); 1267 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1268 for (j = 1; j < nee + 1; j++) { 1269 if (emax < emarks[j]) { 1270 emax = emarks[j]; 1271 eemax = j; 1272 } 1273 } 1274 /* not relevant for edges */ 1275 if (!eemax) continue; 1276 1277 for (j = ii[i]; j < ii[i + 1]; j++) { 1278 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1279 } 1280 } 1281 PetscCall(PetscFree(emarks)); 1282 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1283 } 1284 1285 /* Compute extended rows indices for edge blocks of the change of basis */ 1286 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1287 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1288 extmem *= maxsize; 1289 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1290 PetscCall(PetscMalloc1(nee, &extrows)); 1291 PetscCall(PetscCalloc1(nee, &extrowcum)); 1292 for (i = 0; i < nv; i++) { 1293 PetscInt mark = 0, size, start; 1294 1295 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1296 for (j = ii[i]; j < ii[i + 1]; j++) 1297 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1298 1299 /* not relevant */ 1300 if (!mark) continue; 1301 1302 /* import extended row */ 1303 mark--; 1304 start = mark * extmem + extrowcum[mark]; 1305 size = ii[i + 1] - ii[i]; 1306 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1307 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1308 extrowcum[mark] += size; 1309 } 1310 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1311 PetscCall(MatDestroy(&lGt)); 1312 PetscCall(PetscFree(marks)); 1313 1314 /* Compress extrows */ 1315 cum = 0; 1316 for (i = 0; i < nee; i++) { 1317 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1318 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1319 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1320 cum = PetscMax(cum, size); 1321 } 1322 PetscCall(PetscFree(extrowcum)); 1323 PetscCall(PetscBTDestroy(&btv)); 1324 PetscCall(PetscBTDestroy(&btvcand)); 1325 1326 /* Workspace for lapack inner calls and VecSetValues */ 1327 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1328 1329 /* Create change of basis matrix (preallocation can be improved) */ 1330 PetscCall(MatCreate(comm, &T)); 1331 PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap)); 1332 PetscCall(MatSetType(T, MATAIJ)); 1333 PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL)); 1334 PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL)); 1335 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1336 PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1337 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1338 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1339 1340 /* Defaults to identity */ 1341 for (i = pc->mat->rmap->rstart; i < pc->mat->rmap->rend; i++) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES)); 1342 1343 /* Create discrete gradient for the coarser level if needed */ 1344 PetscCall(MatDestroy(&pcbddc->nedcG)); 1345 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1346 if (pcbddc->current_level < pcbddc->max_levels) { 1347 ISLocalToGlobalMapping cel2g, cvl2g; 1348 IS wis, gwis; 1349 PetscInt cnv, cne; 1350 1351 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1352 if (fl2g) { 1353 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1354 } else { 1355 PetscCall(PetscObjectReference((PetscObject)wis)); 1356 pcbddc->nedclocal = wis; 1357 } 1358 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1359 PetscCall(ISDestroy(&wis)); 1360 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1361 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1362 PetscCall(ISDestroy(&wis)); 1363 PetscCall(ISDestroy(&gwis)); 1364 1365 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1366 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1367 PetscCall(ISDestroy(&wis)); 1368 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1369 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1370 PetscCall(ISDestroy(&wis)); 1371 PetscCall(ISDestroy(&gwis)); 1372 1373 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1374 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1375 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1376 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1377 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1378 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1379 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1380 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1381 } 1382 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1383 1384 #if defined(PRINT_GDET) 1385 inc = 0; 1386 lev = pcbddc->current_level; 1387 #endif 1388 1389 /* Insert values in the change of basis matrix */ 1390 for (i = 0; i < nee; i++) { 1391 Mat Gins = NULL, GKins = NULL; 1392 IS cornersis = NULL; 1393 PetscScalar cvals[2]; 1394 1395 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1396 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1397 if (Gins && GKins) { 1398 const PetscScalar *data; 1399 const PetscInt *rows, *cols; 1400 PetscInt nrh, nch, nrc, ncc; 1401 1402 PetscCall(ISGetIndices(eedges[i], &cols)); 1403 /* H1 */ 1404 PetscCall(ISGetIndices(extrows[i], &rows)); 1405 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1406 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1407 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1408 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1409 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1410 /* complement */ 1411 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1412 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1413 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1414 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1415 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1416 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1417 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1418 1419 /* coarse discrete gradient */ 1420 if (pcbddc->nedcG) { 1421 PetscInt cols[2]; 1422 1423 cols[0] = 2 * i; 1424 cols[1] = 2 * i + 1; 1425 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1426 } 1427 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1428 } 1429 PetscCall(ISDestroy(&extrows[i])); 1430 PetscCall(ISDestroy(&extcols[i])); 1431 PetscCall(ISDestroy(&cornersis)); 1432 PetscCall(MatDestroy(&Gins)); 1433 PetscCall(MatDestroy(&GKins)); 1434 } 1435 1436 /* for FDM element-by-element: first dof on the edge only constraint. Why? */ 1437 if (elements_corners && pcbddc->mat_graph->multi_element) { 1438 ISLocalToGlobalMapping map; 1439 MatNullSpace nnsp; 1440 Vec quad_vec; 1441 1442 PetscCall(MatCreateVecs(pc->pmat, &quad_vec, NULL)); 1443 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, 1, &quad_vec, &nnsp)); 1444 PetscCall(VecLockReadPop(quad_vec)); 1445 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 1446 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1447 for (i = 0; i < nee; i++) { 1448 const PetscInt *idxs; 1449 PetscScalar one = 1.0; 1450 1451 PetscCall(ISGetLocalSize(alleedges[i], &cum)); 1452 if (!cum) continue; 1453 PetscCall(ISGetIndices(alleedges[i], &idxs)); 1454 PetscCall(VecSetValuesLocal(quad_vec, 1, idxs, &one, INSERT_VALUES)); 1455 PetscCall(ISRestoreIndices(alleedges[i], &idxs)); 1456 } 1457 PetscCall(VecLockReadPush(quad_vec)); 1458 PetscCall(VecDestroy(&quad_vec)); 1459 PetscCall(MatSetNearNullSpace(pc->pmat, nnsp)); 1460 PetscCall(MatNullSpaceDestroy(&nnsp)); 1461 } 1462 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1463 1464 /* Start assembling */ 1465 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1466 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1467 1468 /* Free */ 1469 if (fl2g) { 1470 PetscCall(ISDestroy(&primals)); 1471 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1472 PetscCall(PetscFree(eedges)); 1473 } 1474 1475 /* hack mat_graph with primal dofs on the coarse edges */ 1476 { 1477 PCBDDCGraph graph = pcbddc->mat_graph; 1478 PetscInt *oqueue = graph->queue; 1479 PetscInt *ocptr = graph->cptr; 1480 PetscInt ncc, *idxs; 1481 1482 /* find first primal edge */ 1483 if (pcbddc->nedclocal) { 1484 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1485 } else { 1486 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1487 idxs = cedges; 1488 } 1489 cum = 0; 1490 while (cum < nee && cedges[cum] < 0) cum++; 1491 1492 /* adapt connected components */ 1493 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1494 graph->cptr[0] = 0; 1495 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1496 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1497 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1498 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1499 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1500 ncc++; 1501 lc--; 1502 cum++; 1503 while (cum < nee && cedges[cum] < 0) cum++; 1504 } 1505 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1506 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1507 ncc++; 1508 } 1509 graph->ncc = ncc; 1510 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1511 PetscCall(PetscFree2(ocptr, oqueue)); 1512 } 1513 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1514 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1515 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1516 1517 PetscCall(ISDestroy(&nedfieldlocal)); 1518 PetscCall(PetscFree(extrow)); 1519 PetscCall(PetscFree2(work, rwork)); 1520 PetscCall(PetscFree(corners)); 1521 PetscCall(PetscFree(cedges)); 1522 PetscCall(PetscFree(extrows)); 1523 PetscCall(PetscFree(extcols)); 1524 PetscCall(MatDestroy(&lG)); 1525 1526 /* Complete assembling */ 1527 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1528 PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view")); 1529 if (pcbddc->nedcG) { 1530 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1531 PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view")); 1532 } 1533 1534 PetscCall(ISDestroy(&elements_corners)); 1535 1536 /* set change of basis */ 1537 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE)); 1538 PetscCall(MatDestroy(&T)); 1539 PetscFunctionReturn(PETSC_SUCCESS); 1540 } 1541 1542 /* the near-null space of BDDC carries information on quadrature weights, 1543 and these can be collinear -> so cheat with MatNullSpaceCreate 1544 and create a suitable set of basis vectors first */ 1545 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1546 { 1547 PetscInt i; 1548 1549 PetscFunctionBegin; 1550 for (i = 0; i < nvecs; i++) { 1551 PetscInt first, last; 1552 1553 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1554 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1555 if (i >= first && i < last) { 1556 PetscScalar *data; 1557 PetscCall(VecGetArray(quad_vecs[i], &data)); 1558 if (!has_const) { 1559 data[i - first] = 1.; 1560 } else { 1561 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1562 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1563 } 1564 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1565 } 1566 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1567 } 1568 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1569 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1570 PetscInt first, last; 1571 PetscCall(VecLockReadPop(quad_vecs[i])); 1572 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1573 if (i >= first && i < last) { 1574 PetscScalar *data; 1575 PetscCall(VecGetArray(quad_vecs[i], &data)); 1576 if (!has_const) { 1577 data[i - first] = 0.; 1578 } else { 1579 data[2 * i - first] = 0.; 1580 data[2 * i - first + 1] = 0.; 1581 } 1582 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1583 } 1584 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1585 PetscCall(VecLockReadPush(quad_vecs[i])); 1586 } 1587 PetscFunctionReturn(PETSC_SUCCESS); 1588 } 1589 1590 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1591 { 1592 Mat loc_divudotp; 1593 Vec p, v, quad_vec; 1594 ISLocalToGlobalMapping map; 1595 PetscScalar *array; 1596 1597 PetscFunctionBegin; 1598 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1599 if (!transpose) { 1600 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1601 } else { 1602 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1603 } 1604 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp)); 1605 PetscCall(VecLockReadPop(quad_vec)); 1606 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1607 1608 /* compute local quad vec */ 1609 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1610 if (!transpose) { 1611 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1612 } else { 1613 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1614 } 1615 /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */ 1616 PetscCall(VecSet(p, 1.)); 1617 if (!transpose) { 1618 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1619 } else { 1620 PetscCall(MatMult(loc_divudotp, p, v)); 1621 } 1622 PetscCall(VecDestroy(&p)); 1623 if (vl2l) { 1624 Mat lA; 1625 VecScatter sc; 1626 Vec vins; 1627 1628 PetscCall(MatISGetLocalMat(A, &lA)); 1629 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1630 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1631 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1632 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1633 PetscCall(VecScatterDestroy(&sc)); 1634 PetscCall(VecDestroy(&v)); 1635 v = vins; 1636 } 1637 1638 /* mask summation of interface values */ 1639 PetscInt n, *mmask, *mask, *idxs, nmr, nr; 1640 const PetscInt *degree; 1641 PetscSF msf; 1642 1643 PetscCall(VecGetLocalSize(v, &n)); 1644 PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL)); 1645 PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf)); 1646 PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL)); 1647 PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs)); 1648 PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, °ree)); 1649 PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, °ree)); 1650 for (PetscInt i = 0, c = 0; i < nr; i++) { 1651 mmask[c] = 1; 1652 c += degree[i]; 1653 } 1654 PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1655 PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1656 PetscCall(VecGetArray(v, &array)); 1657 for (PetscInt i = 0; i < n; i++) { 1658 array[i] *= mask[i]; 1659 idxs[i] = i; 1660 } 1661 PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES)); 1662 PetscCall(VecRestoreArray(v, &array)); 1663 PetscCall(PetscFree3(mmask, mask, idxs)); 1664 PetscCall(VecDestroy(&v)); 1665 PetscCall(VecAssemblyBegin(quad_vec)); 1666 PetscCall(VecAssemblyEnd(quad_vec)); 1667 PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view")); 1668 PetscCall(VecLockReadPush(quad_vec)); 1669 PetscCall(VecDestroy(&quad_vec)); 1670 PetscFunctionReturn(PETSC_SUCCESS); 1671 } 1672 1673 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1674 { 1675 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1676 1677 PetscFunctionBegin; 1678 if (primalv) { 1679 if (pcbddc->user_primal_vertices_local) { 1680 IS list[2], newp; 1681 1682 list[0] = primalv; 1683 list[1] = pcbddc->user_primal_vertices_local; 1684 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1685 PetscCall(ISSortRemoveDups(newp)); 1686 PetscCall(ISDestroy(&list[1])); 1687 pcbddc->user_primal_vertices_local = newp; 1688 } else { 1689 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1690 } 1691 } 1692 PetscFunctionReturn(PETSC_SUCCESS); 1693 } 1694 1695 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1696 { 1697 PetscInt f, *comp = (PetscInt *)ctx; 1698 1699 PetscFunctionBegin; 1700 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1701 PetscFunctionReturn(PETSC_SUCCESS); 1702 } 1703 1704 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1705 { 1706 Vec local, global; 1707 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1708 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1709 PetscBool monolithic = PETSC_FALSE; 1710 1711 PetscFunctionBegin; 1712 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1713 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1714 PetscOptionsEnd(); 1715 /* need to convert from global to local topology information and remove references to information in global ordering */ 1716 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1717 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1718 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1719 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1720 if (monolithic) { /* just get block size to properly compute vertices */ 1721 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1722 goto boundary; 1723 } 1724 1725 if (pcbddc->user_provided_isfordofs) { 1726 if (pcbddc->n_ISForDofs) { 1727 PetscInt i; 1728 1729 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1730 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1731 PetscInt bs; 1732 1733 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1734 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1735 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1736 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1737 } 1738 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1739 pcbddc->n_ISForDofs = 0; 1740 PetscCall(PetscFree(pcbddc->ISForDofs)); 1741 } 1742 } else { 1743 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1744 DM dm; 1745 1746 PetscCall(MatGetDM(pc->pmat, &dm)); 1747 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1748 if (dm) { 1749 IS *fields; 1750 PetscInt nf, i; 1751 1752 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1753 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1754 for (i = 0; i < nf; i++) { 1755 PetscInt bs; 1756 1757 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1758 PetscCall(ISGetBlockSize(fields[i], &bs)); 1759 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1760 PetscCall(ISDestroy(&fields[i])); 1761 } 1762 PetscCall(PetscFree(fields)); 1763 pcbddc->n_ISForDofsLocal = nf; 1764 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1765 PetscContainer c; 1766 1767 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1768 if (c) { 1769 MatISLocalFields lf; 1770 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1771 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1772 } else { /* fallback, create the default fields if bs > 1 */ 1773 PetscInt i, n = matis->A->rmap->n; 1774 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1775 if (i > 1) { 1776 pcbddc->n_ISForDofsLocal = i; 1777 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1778 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1779 } 1780 } 1781 } 1782 } else { 1783 PetscInt i; 1784 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1785 } 1786 } 1787 1788 boundary: 1789 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1790 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1791 } else if (pcbddc->DirichletBoundariesLocal) { 1792 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1793 } 1794 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1795 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1796 } else if (pcbddc->NeumannBoundariesLocal) { 1797 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1798 } 1799 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1800 PetscCall(VecDestroy(&global)); 1801 PetscCall(VecDestroy(&local)); 1802 /* detect local disconnected subdomains if requested or needed */ 1803 if (pcbddc->detect_disconnected || matis->allow_repeated) { 1804 IS primalv = NULL; 1805 PetscInt nel; 1806 PetscBool filter = pcbddc->detect_disconnected_filter; 1807 1808 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1809 PetscCall(PetscFree(pcbddc->local_subs)); 1810 PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL)); 1811 if (matis->allow_repeated && nel) { 1812 const PetscInt *elsizes; 1813 1814 pcbddc->n_local_subs = nel; 1815 PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes)); 1816 PetscCall(PetscMalloc1(nel, &pcbddc->local_subs)); 1817 for (PetscInt i = 0, c = 0; i < nel; i++) { 1818 PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i])); 1819 c += elsizes[i]; 1820 } 1821 } else { 1822 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1823 } 1824 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1825 PetscCall(ISDestroy(&primalv)); 1826 } 1827 /* early stage corner detection */ 1828 { 1829 DM dm; 1830 1831 PetscCall(MatGetDM(pc->pmat, &dm)); 1832 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1833 if (dm) { 1834 PetscBool isda; 1835 1836 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1837 if (isda) { 1838 ISLocalToGlobalMapping l2l; 1839 IS corners; 1840 Mat lA; 1841 PetscBool gl, lo; 1842 1843 { 1844 Vec cvec; 1845 const PetscScalar *coords; 1846 PetscInt dof, n, cdim; 1847 PetscBool memc = PETSC_TRUE; 1848 1849 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1850 PetscCall(DMGetCoordinates(dm, &cvec)); 1851 PetscCall(VecGetLocalSize(cvec, &n)); 1852 PetscCall(VecGetBlockSize(cvec, &cdim)); 1853 n /= cdim; 1854 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1855 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1856 PetscCall(VecGetArrayRead(cvec, &coords)); 1857 #if defined(PETSC_USE_COMPLEX) 1858 memc = PETSC_FALSE; 1859 #endif 1860 if (dof != 1) memc = PETSC_FALSE; 1861 if (memc) { 1862 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1863 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1864 PetscReal *bcoords = pcbddc->mat_graph->coords; 1865 PetscInt i, b, d; 1866 1867 for (i = 0; i < n; i++) { 1868 for (b = 0; b < dof; b++) { 1869 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1870 } 1871 } 1872 } 1873 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1874 pcbddc->mat_graph->cdim = cdim; 1875 pcbddc->mat_graph->cnloc = dof * n; 1876 pcbddc->mat_graph->cloc = PETSC_FALSE; 1877 } 1878 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 1879 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 1880 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 1881 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 1882 lo = (PetscBool)(l2l && corners); 1883 PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 1884 if (gl) { /* From PETSc's DMDA */ 1885 const PetscInt *idx; 1886 PetscInt dof, bs, *idxout, n; 1887 1888 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1889 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 1890 PetscCall(ISGetLocalSize(corners, &n)); 1891 PetscCall(ISGetIndices(corners, &idx)); 1892 if (bs == dof) { 1893 PetscCall(PetscMalloc1(n, &idxout)); 1894 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 1895 } else { /* the original DMDA local-to-local map have been modified */ 1896 PetscInt i, d; 1897 1898 PetscCall(PetscMalloc1(dof * n, &idxout)); 1899 for (i = 0; i < n; i++) 1900 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 1901 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 1902 1903 bs = 1; 1904 n *= dof; 1905 } 1906 PetscCall(ISRestoreIndices(corners, &idx)); 1907 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1908 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 1909 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 1910 PetscCall(ISDestroy(&corners)); 1911 pcbddc->corner_selected = PETSC_TRUE; 1912 pcbddc->corner_selection = PETSC_TRUE; 1913 } 1914 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1915 } 1916 } 1917 } 1918 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1919 DM dm; 1920 1921 PetscCall(MatGetDM(pc->pmat, &dm)); 1922 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1923 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1924 Vec vcoords; 1925 PetscSection section; 1926 PetscReal *coords; 1927 PetscInt d, cdim, nl, nf, **ctxs; 1928 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1929 /* debug coordinates */ 1930 PetscViewer viewer; 1931 PetscBool flg; 1932 PetscViewerFormat format; 1933 const char *prefix; 1934 1935 PetscCall(DMGetCoordinateDim(dm, &cdim)); 1936 PetscCall(DMGetLocalSection(dm, §ion)); 1937 PetscCall(PetscSectionGetNumFields(section, &nf)); 1938 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 1939 PetscCall(VecGetLocalSize(vcoords, &nl)); 1940 PetscCall(PetscMalloc1(nl * cdim, &coords)); 1941 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 1942 PetscCall(PetscMalloc1(nf, &ctxs[0])); 1943 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 1944 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 1945 1946 /* debug coordinates */ 1947 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1948 PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 1949 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 1950 for (d = 0; d < cdim; d++) { 1951 PetscInt i; 1952 const PetscScalar *v; 1953 char name[16]; 1954 1955 for (i = 0; i < nf; i++) ctxs[i][0] = d; 1956 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d)); 1957 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 1958 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 1959 if (flg) PetscCall(VecView(vcoords, viewer)); 1960 PetscCall(VecGetArrayRead(vcoords, &v)); 1961 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 1962 PetscCall(VecRestoreArrayRead(vcoords, &v)); 1963 } 1964 PetscCall(VecDestroy(&vcoords)); 1965 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 1966 PetscCall(PetscFree(coords)); 1967 PetscCall(PetscFree(ctxs[0])); 1968 PetscCall(PetscFree2(funcs, ctxs)); 1969 if (flg) { 1970 PetscCall(PetscViewerPopFormat(viewer)); 1971 PetscCall(PetscViewerDestroy(&viewer)); 1972 } 1973 } 1974 } 1975 PetscFunctionReturn(PETSC_SUCCESS); 1976 } 1977 1978 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1979 { 1980 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1981 IS nis; 1982 const PetscInt *idxs; 1983 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 1984 1985 PetscFunctionBegin; 1986 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 1987 if (mop == MPI_LAND) { 1988 /* init rootdata with true */ 1989 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 1990 } else { 1991 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 1992 } 1993 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 1994 PetscCall(ISGetLocalSize(*is, &nd)); 1995 PetscCall(ISGetIndices(*is, &idxs)); 1996 for (i = 0; i < nd; i++) 1997 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 1998 PetscCall(ISRestoreIndices(*is, &idxs)); 1999 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2000 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 2001 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2002 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 2003 if (mop == MPI_LAND) { 2004 PetscCall(PetscMalloc1(nd, &nidxs)); 2005 } else { 2006 PetscCall(PetscMalloc1(n, &nidxs)); 2007 } 2008 for (i = 0, nnd = 0; i < n; i++) 2009 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 2010 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 2011 PetscCall(ISDestroy(is)); 2012 *is = nis; 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 2017 { 2018 PC_IS *pcis = (PC_IS *)pc->data; 2019 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2020 2021 PetscFunctionBegin; 2022 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 2023 if (pcbddc->ChangeOfBasisMatrix) { 2024 Vec swap; 2025 2026 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 2027 swap = pcbddc->work_change; 2028 pcbddc->work_change = r; 2029 r = swap; 2030 } 2031 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2032 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 2033 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2034 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 2035 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 2036 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 2037 PetscCall(VecSet(z, 0.)); 2038 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2039 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 2040 if (pcbddc->ChangeOfBasisMatrix) { 2041 pcbddc->work_change = r; 2042 PetscCall(VecCopy(z, pcbddc->work_change)); 2043 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 2044 } 2045 PetscFunctionReturn(PETSC_SUCCESS); 2046 } 2047 2048 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 2049 { 2050 PCBDDCBenignMatMult_ctx ctx; 2051 PetscBool apply_right, apply_left, reset_x; 2052 2053 PetscFunctionBegin; 2054 PetscCall(MatShellGetContext(A, &ctx)); 2055 if (transpose) { 2056 apply_right = ctx->apply_left; 2057 apply_left = ctx->apply_right; 2058 } else { 2059 apply_right = ctx->apply_right; 2060 apply_left = ctx->apply_left; 2061 } 2062 reset_x = PETSC_FALSE; 2063 if (apply_right) { 2064 const PetscScalar *ax; 2065 PetscInt nl, i; 2066 2067 PetscCall(VecGetLocalSize(x, &nl)); 2068 PetscCall(VecGetArrayRead(x, &ax)); 2069 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 2070 PetscCall(VecRestoreArrayRead(x, &ax)); 2071 for (i = 0; i < ctx->benign_n; i++) { 2072 PetscScalar sum, val; 2073 const PetscInt *idxs; 2074 PetscInt nz, j; 2075 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2076 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2077 sum = 0.; 2078 if (ctx->apply_p0) { 2079 val = ctx->work[idxs[nz - 1]]; 2080 for (j = 0; j < nz - 1; j++) { 2081 sum += ctx->work[idxs[j]]; 2082 ctx->work[idxs[j]] += val; 2083 } 2084 } else { 2085 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 2086 } 2087 ctx->work[idxs[nz - 1]] -= sum; 2088 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2089 } 2090 PetscCall(VecPlaceArray(x, ctx->work)); 2091 reset_x = PETSC_TRUE; 2092 } 2093 if (transpose) { 2094 PetscCall(MatMultTranspose(ctx->A, x, y)); 2095 } else { 2096 PetscCall(MatMult(ctx->A, x, y)); 2097 } 2098 if (reset_x) PetscCall(VecResetArray(x)); 2099 if (apply_left) { 2100 PetscScalar *ay; 2101 PetscInt i; 2102 2103 PetscCall(VecGetArray(y, &ay)); 2104 for (i = 0; i < ctx->benign_n; i++) { 2105 PetscScalar sum, val; 2106 const PetscInt *idxs; 2107 PetscInt nz, j; 2108 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2109 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2110 val = -ay[idxs[nz - 1]]; 2111 if (ctx->apply_p0) { 2112 sum = 0.; 2113 for (j = 0; j < nz - 1; j++) { 2114 sum += ay[idxs[j]]; 2115 ay[idxs[j]] += val; 2116 } 2117 ay[idxs[nz - 1]] += sum; 2118 } else { 2119 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 2120 ay[idxs[nz - 1]] = 0.; 2121 } 2122 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2123 } 2124 PetscCall(VecRestoreArray(y, &ay)); 2125 } 2126 PetscFunctionReturn(PETSC_SUCCESS); 2127 } 2128 2129 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2130 { 2131 PetscFunctionBegin; 2132 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 2133 PetscFunctionReturn(PETSC_SUCCESS); 2134 } 2135 2136 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2137 { 2138 PetscFunctionBegin; 2139 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 2140 PetscFunctionReturn(PETSC_SUCCESS); 2141 } 2142 2143 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2144 { 2145 PC_IS *pcis = (PC_IS *)pc->data; 2146 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2147 PCBDDCBenignMatMult_ctx ctx; 2148 2149 PetscFunctionBegin; 2150 if (!restore) { 2151 Mat A_IB, A_BI; 2152 PetscScalar *work; 2153 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2154 2155 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 2156 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 2157 PetscCall(PetscMalloc1(pcis->n, &work)); 2158 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 2159 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 2160 PetscCall(MatSetType(A_IB, MATSHELL)); 2161 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private)); 2162 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2163 PetscCall(PetscNew(&ctx)); 2164 PetscCall(MatShellSetContext(A_IB, ctx)); 2165 ctx->apply_left = PETSC_TRUE; 2166 ctx->apply_right = PETSC_FALSE; 2167 ctx->apply_p0 = PETSC_FALSE; 2168 ctx->benign_n = pcbddc->benign_n; 2169 if (reuse) { 2170 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2171 ctx->free = PETSC_FALSE; 2172 } else { /* TODO: could be optimized for successive solves */ 2173 ISLocalToGlobalMapping N_to_D; 2174 PetscInt i; 2175 2176 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 2177 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 2178 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 2179 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2180 ctx->free = PETSC_TRUE; 2181 } 2182 ctx->A = pcis->A_IB; 2183 ctx->work = work; 2184 PetscCall(MatSetUp(A_IB)); 2185 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2186 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2187 pcis->A_IB = A_IB; 2188 2189 /* A_BI as A_IB^T */ 2190 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2191 pcbddc->benign_original_mat = pcis->A_BI; 2192 pcis->A_BI = A_BI; 2193 } else { 2194 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2195 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2196 PetscCall(MatDestroy(&pcis->A_IB)); 2197 pcis->A_IB = ctx->A; 2198 ctx->A = NULL; 2199 PetscCall(MatDestroy(&pcis->A_BI)); 2200 pcis->A_BI = pcbddc->benign_original_mat; 2201 pcbddc->benign_original_mat = NULL; 2202 if (ctx->free) { 2203 PetscInt i; 2204 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2205 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2206 } 2207 PetscCall(PetscFree(ctx->work)); 2208 PetscCall(PetscFree(ctx)); 2209 } 2210 PetscFunctionReturn(PETSC_SUCCESS); 2211 } 2212 2213 /* used just in bddc debug mode */ 2214 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2215 { 2216 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2217 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2218 Mat An; 2219 2220 PetscFunctionBegin; 2221 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2222 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2223 if (is1) { 2224 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2225 PetscCall(MatDestroy(&An)); 2226 } else { 2227 *B = An; 2228 } 2229 PetscFunctionReturn(PETSC_SUCCESS); 2230 } 2231 2232 /* TODO: add reuse flag */ 2233 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2234 { 2235 Mat Bt; 2236 PetscScalar *a, *bdata; 2237 const PetscInt *ii, *ij; 2238 PetscInt m, n, i, nnz, *bii, *bij; 2239 PetscBool flg_row; 2240 2241 PetscFunctionBegin; 2242 PetscCall(MatGetSize(A, &n, &m)); 2243 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2244 PetscCall(MatSeqAIJGetArray(A, &a)); 2245 nnz = n; 2246 for (i = 0; i < ii[n]; i++) { 2247 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2248 } 2249 PetscCall(PetscMalloc1(n + 1, &bii)); 2250 PetscCall(PetscMalloc1(nnz, &bij)); 2251 PetscCall(PetscMalloc1(nnz, &bdata)); 2252 nnz = 0; 2253 bii[0] = 0; 2254 for (i = 0; i < n; i++) { 2255 PetscInt j; 2256 for (j = ii[i]; j < ii[i + 1]; j++) { 2257 PetscScalar entry = a[j]; 2258 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2259 bij[nnz] = ij[j]; 2260 bdata[nnz] = entry; 2261 nnz++; 2262 } 2263 } 2264 bii[i + 1] = nnz; 2265 } 2266 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2267 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2268 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2269 { 2270 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data; 2271 b->free_a = PETSC_TRUE; 2272 b->free_ij = PETSC_TRUE; 2273 } 2274 if (*B == A) PetscCall(MatDestroy(&A)); 2275 *B = Bt; 2276 PetscFunctionReturn(PETSC_SUCCESS); 2277 } 2278 2279 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2280 { 2281 Mat B = NULL; 2282 DM dm; 2283 IS is_dummy, *cc_n; 2284 ISLocalToGlobalMapping l2gmap_dummy; 2285 PCBDDCGraph graph; 2286 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2287 PetscInt i, n; 2288 PetscInt *xadj, *adjncy; 2289 PetscBool isplex = PETSC_FALSE; 2290 2291 PetscFunctionBegin; 2292 if (ncc) *ncc = 0; 2293 if (cc) *cc = NULL; 2294 if (primalv) *primalv = NULL; 2295 PetscCall(PCBDDCGraphCreate(&graph)); 2296 PetscCall(MatGetDM(pc->pmat, &dm)); 2297 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2298 if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, "")); 2299 if (filter) isplex = PETSC_FALSE; 2300 2301 if (isplex) { /* this code has been modified from plexpartition.c */ 2302 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2303 PetscInt *adj = NULL; 2304 IS cellNumbering; 2305 const PetscInt *cellNum; 2306 PetscBool useCone, useClosure; 2307 PetscSection section; 2308 PetscSegBuffer adjBuffer; 2309 PetscSF sfPoint; 2310 2311 PetscCall(DMConvert(dm, DMPLEX, &dm)); 2312 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2313 PetscCall(DMGetPointSF(dm, &sfPoint)); 2314 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2315 /* Build adjacency graph via a section/segbuffer */ 2316 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2317 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2318 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2319 /* Always use FVM adjacency to create partitioner graph */ 2320 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2321 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2322 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2323 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2324 for (n = 0, p = pStart; p < pEnd; p++) { 2325 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2326 if (nroots > 0) { 2327 if (cellNum[p] < 0) continue; 2328 } 2329 adjSize = PETSC_DETERMINE; 2330 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2331 for (a = 0; a < adjSize; ++a) { 2332 const PetscInt point = adj[a]; 2333 if (pStart <= point && point < pEnd) { 2334 PetscInt *PETSC_RESTRICT pBuf; 2335 PetscCall(PetscSectionAddDof(section, p, 1)); 2336 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2337 *pBuf = point; 2338 } 2339 } 2340 n++; 2341 } 2342 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2343 /* Derive CSR graph from section/segbuffer */ 2344 PetscCall(PetscSectionSetUp(section)); 2345 PetscCall(PetscSectionGetStorageSize(section, &size)); 2346 PetscCall(PetscMalloc1(n + 1, &xadj)); 2347 for (idx = 0, p = pStart; p < pEnd; p++) { 2348 if (nroots > 0) { 2349 if (cellNum[p] < 0) continue; 2350 } 2351 PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++])); 2352 } 2353 xadj[n] = size; 2354 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2355 /* Clean up */ 2356 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2357 PetscCall(PetscSectionDestroy(§ion)); 2358 PetscCall(PetscFree(adj)); 2359 graph->xadj = xadj; 2360 graph->adjncy = adjncy; 2361 } else { 2362 Mat A; 2363 PetscBool isseqaij, flg_row; 2364 2365 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2366 if (!A->rmap->N || !A->cmap->N) { 2367 PetscCall(PCBDDCGraphDestroy(&graph)); 2368 PetscFunctionReturn(PETSC_SUCCESS); 2369 } 2370 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2371 if (!isseqaij && filter) { 2372 PetscBool isseqdense; 2373 2374 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2375 if (!isseqdense) { 2376 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2377 } else { /* TODO: rectangular case and LDA */ 2378 PetscScalar *array; 2379 PetscReal chop = 1.e-6; 2380 2381 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2382 PetscCall(MatDenseGetArray(B, &array)); 2383 PetscCall(MatGetSize(B, &n, NULL)); 2384 for (i = 0; i < n; i++) { 2385 PetscInt j; 2386 for (j = i + 1; j < n; j++) { 2387 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2388 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2389 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2390 } 2391 } 2392 PetscCall(MatDenseRestoreArray(B, &array)); 2393 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2394 } 2395 } else { 2396 PetscCall(PetscObjectReference((PetscObject)A)); 2397 B = A; 2398 } 2399 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2400 2401 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2402 if (filter) { 2403 PetscScalar *data; 2404 PetscInt j, cum; 2405 2406 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2407 PetscCall(MatSeqAIJGetArray(B, &data)); 2408 cum = 0; 2409 for (i = 0; i < n; i++) { 2410 PetscInt t; 2411 2412 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2413 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2414 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2415 } 2416 t = xadj_filtered[i]; 2417 xadj_filtered[i] = cum; 2418 cum += t; 2419 } 2420 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2421 graph->xadj = xadj_filtered; 2422 graph->adjncy = adjncy_filtered; 2423 } else { 2424 graph->xadj = xadj; 2425 graph->adjncy = adjncy; 2426 } 2427 } 2428 /* compute local connected components using PCBDDCGraph */ 2429 graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */ 2430 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2431 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2432 PetscCall(ISDestroy(&is_dummy)); 2433 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX)); 2434 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2435 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2436 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2437 2438 /* partial clean up */ 2439 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2440 if (B) { 2441 PetscBool flg_row; 2442 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2443 PetscCall(MatDestroy(&B)); 2444 } 2445 if (isplex) { 2446 PetscCall(PetscFree(xadj)); 2447 PetscCall(PetscFree(adjncy)); 2448 } 2449 2450 /* get back data */ 2451 if (isplex) { 2452 if (ncc) *ncc = graph->ncc; 2453 if (cc || primalv) { 2454 Mat A; 2455 PetscBT btv, btvt, btvc; 2456 PetscSection subSection; 2457 PetscInt *ids, cum, cump, *cids, *pids; 2458 PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd; 2459 2460 PetscCall(DMGetDimension(dm, &dim)); 2461 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2462 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 2463 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd)); 2464 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 2465 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 2466 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2467 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2468 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2469 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2470 PetscCall(PetscBTCreate(pEnd - pStart, &btvc)); 2471 2472 /* First see if we find corners for the subdomains, i.e. a vertex 2473 shared by at least dim subdomain boundary faces. This does not 2474 cover all the possible cases with simplices but it is enough 2475 for tensor cells */ 2476 if (vStart != fStart && dim <= 3) { 2477 for (PetscInt c = cStart; c < cEnd; c++) { 2478 PetscInt nf, cnt = 0, mcnt = dim, *cfaces; 2479 const PetscInt *faces; 2480 2481 PetscCall(DMPlexGetConeSize(dm, c, &nf)); 2482 PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces)); 2483 PetscCall(DMPlexGetCone(dm, c, &faces)); 2484 for (PetscInt f = 0; f < nf; f++) { 2485 PetscInt nc, ff; 2486 2487 PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc)); 2488 PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL)); 2489 if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f]; 2490 } 2491 if (cnt >= mcnt) { 2492 PetscInt size, *closure = NULL; 2493 2494 PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2495 for (PetscInt k = 0; k < 2 * size; k += 2) { 2496 PetscInt v = closure[k]; 2497 if (v >= vStart && v < vEnd) { 2498 PetscInt vsize, *vclosure = NULL; 2499 2500 cnt = 0; 2501 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2502 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) { 2503 PetscInt f = vclosure[vk]; 2504 if (f >= fStart && f < fEnd) { 2505 PetscInt nc, ff; 2506 PetscBool valid = PETSC_FALSE; 2507 2508 for (PetscInt fk = 0; fk < nf; fk++) 2509 if (f == cfaces[fk]) valid = PETSC_TRUE; 2510 if (!valid) continue; 2511 PetscCall(DMPlexGetSupportSize(dm, f, &nc)); 2512 PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL)); 2513 if (nc == 1 && f == ff) cnt++; 2514 } 2515 } 2516 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart)); 2517 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2518 } 2519 } 2520 PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2521 } 2522 PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces)); 2523 } 2524 } 2525 2526 cids[0] = 0; 2527 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2528 PetscInt j; 2529 2530 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2531 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2532 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2533 2534 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2535 for (k = 0; k < 2 * size; k += 2) { 2536 PetscInt s, pp, p = closure[k], off, dof, cdof; 2537 2538 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2539 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2540 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2541 for (s = 0; s < dof - cdof; s++) { 2542 if (PetscBTLookupSet(btvt, off + s)) continue; 2543 if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2544 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2545 else pids[cump++] = off + s; /* cross-vertex */ 2546 } 2547 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2548 if (pp != p) { 2549 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2550 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2551 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2552 for (s = 0; s < dof - cdof; s++) { 2553 if (PetscBTLookupSet(btvt, off + s)) continue; 2554 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2555 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2556 else pids[cump++] = off + s; /* cross-vertex */ 2557 } 2558 } 2559 } 2560 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2561 } 2562 cids[i + 1] = cum; 2563 /* mark dofs as already assigned */ 2564 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2565 } 2566 if (cc) { 2567 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2568 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2569 *cc = cc_n; 2570 } 2571 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2572 PetscCall(PetscFree3(ids, cids, pids)); 2573 PetscCall(PetscBTDestroy(&btv)); 2574 PetscCall(PetscBTDestroy(&btvt)); 2575 PetscCall(PetscBTDestroy(&btvc)); 2576 PetscCall(DMDestroy(&dm)); 2577 } 2578 } else { 2579 if (ncc) *ncc = graph->ncc; 2580 if (cc) { 2581 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2582 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2583 *cc = cc_n; 2584 } 2585 } 2586 /* clean up graph */ 2587 graph->xadj = NULL; 2588 graph->adjncy = NULL; 2589 PetscCall(PCBDDCGraphDestroy(&graph)); 2590 PetscFunctionReturn(PETSC_SUCCESS); 2591 } 2592 2593 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2594 { 2595 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2596 PC_IS *pcis = (PC_IS *)pc->data; 2597 IS dirIS = NULL; 2598 PetscInt i; 2599 2600 PetscFunctionBegin; 2601 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2602 if (zerodiag) { 2603 Mat A; 2604 Vec vec3_N; 2605 PetscScalar *vals; 2606 const PetscInt *idxs; 2607 PetscInt nz, *count; 2608 2609 /* p0 */ 2610 PetscCall(VecSet(pcis->vec1_N, 0.)); 2611 PetscCall(PetscMalloc1(pcis->n, &vals)); 2612 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2613 PetscCall(ISGetIndices(zerodiag, &idxs)); 2614 for (i = 0; i < nz; i++) vals[i] = 1.; 2615 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2616 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2617 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2618 /* v_I */ 2619 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2620 for (i = 0; i < nz; i++) vals[i] = 0.; 2621 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2622 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2623 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2624 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2625 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2626 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2627 if (dirIS) { 2628 PetscInt n; 2629 2630 PetscCall(ISGetLocalSize(dirIS, &n)); 2631 PetscCall(ISGetIndices(dirIS, &idxs)); 2632 for (i = 0; i < n; i++) vals[i] = 0.; 2633 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2634 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2635 } 2636 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2637 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2638 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2639 PetscCall(VecSet(vec3_N, 0.)); 2640 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2641 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2642 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2643 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2644 PetscCall(PetscFree(vals)); 2645 PetscCall(VecDestroy(&vec3_N)); 2646 2647 /* there should not be any pressure dofs lying on the interface */ 2648 PetscCall(PetscCalloc1(pcis->n, &count)); 2649 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2650 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2651 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2652 PetscCall(ISGetIndices(zerodiag, &idxs)); 2653 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2654 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2655 PetscCall(PetscFree(count)); 2656 } 2657 PetscCall(ISDestroy(&dirIS)); 2658 2659 /* check PCBDDCBenignGetOrSetP0 */ 2660 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2661 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2662 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2663 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2664 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2665 for (i = 0; i < pcbddc->benign_n; i++) { 2666 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2667 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2668 } 2669 PetscFunctionReturn(PETSC_SUCCESS); 2670 } 2671 2672 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2673 { 2674 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2675 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2676 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2677 PetscInt nz, n, benign_n, bsp = 1; 2678 PetscInt *interior_dofs, n_interior_dofs, nneu; 2679 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2680 2681 PetscFunctionBegin; 2682 if (reuse) goto project_b0; 2683 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2684 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2685 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2686 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2687 has_null_pressures = PETSC_TRUE; 2688 have_null = PETSC_TRUE; 2689 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2690 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2691 Checks if all the pressure dofs in each subdomain have a zero diagonal 2692 If not, a change of basis on pressures is not needed 2693 since the local Schur complements are already SPD 2694 */ 2695 if (pcbddc->n_ISForDofsLocal) { 2696 IS iP = NULL; 2697 PetscInt p, *pp; 2698 PetscBool flg, blocked = PETSC_FALSE; 2699 2700 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2701 n = pcbddc->n_ISForDofsLocal; 2702 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2703 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2704 PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL)); 2705 PetscOptionsEnd(); 2706 if (!flg) { 2707 n = 1; 2708 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2709 } 2710 2711 bsp = 0; 2712 for (p = 0; p < n; p++) { 2713 PetscInt bs = 1; 2714 2715 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2716 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2717 bsp += bs; 2718 } 2719 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2720 bsp = 0; 2721 for (p = 0; p < n; p++) { 2722 const PetscInt *idxs; 2723 PetscInt b, bs = 1, npl, *bidxs; 2724 2725 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2726 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2727 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2728 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2729 for (b = 0; b < bs; b++) { 2730 PetscInt i; 2731 2732 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2733 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2734 bsp++; 2735 } 2736 PetscCall(PetscFree(bidxs)); 2737 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2738 } 2739 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2740 2741 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2742 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2743 if (iP) { 2744 IS newpressures; 2745 2746 PetscCall(ISDifference(pressures, iP, &newpressures)); 2747 PetscCall(ISDestroy(&pressures)); 2748 pressures = newpressures; 2749 } 2750 PetscCall(ISSorted(pressures, &sorted)); 2751 if (!sorted) PetscCall(ISSort(pressures)); 2752 PetscCall(PetscFree(pp)); 2753 } 2754 2755 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2756 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2757 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2758 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2759 PetscCall(ISSorted(zerodiag, &sorted)); 2760 if (!sorted) PetscCall(ISSort(zerodiag)); 2761 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2762 zerodiag_save = zerodiag; 2763 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2764 if (!nz) { 2765 if (n) have_null = PETSC_FALSE; 2766 has_null_pressures = PETSC_FALSE; 2767 PetscCall(ISDestroy(&zerodiag)); 2768 } 2769 recompute_zerodiag = PETSC_FALSE; 2770 2771 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2772 zerodiag_subs = NULL; 2773 benign_n = 0; 2774 n_interior_dofs = 0; 2775 interior_dofs = NULL; 2776 nneu = 0; 2777 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2778 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2779 if (checkb) { /* need to compute interior nodes */ 2780 PetscInt n, i; 2781 PetscInt *count; 2782 ISLocalToGlobalMapping mapping; 2783 2784 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL)); 2785 PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL)); 2786 PetscCall(PetscMalloc1(n, &interior_dofs)); 2787 for (i = 0; i < n; i++) 2788 if (count[i] < 2) interior_dofs[n_interior_dofs++] = i; 2789 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL)); 2790 } 2791 if (has_null_pressures) { 2792 IS *subs; 2793 PetscInt nsubs, i, j, nl; 2794 const PetscInt *idxs; 2795 PetscScalar *array; 2796 Vec *work; 2797 2798 subs = pcbddc->local_subs; 2799 nsubs = pcbddc->n_local_subs; 2800 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2801 if (checkb) { 2802 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2803 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2804 PetscCall(ISGetIndices(zerodiag, &idxs)); 2805 /* work[0] = 1_p */ 2806 PetscCall(VecSet(work[0], 0.)); 2807 PetscCall(VecGetArray(work[0], &array)); 2808 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2809 PetscCall(VecRestoreArray(work[0], &array)); 2810 /* work[0] = 1_v */ 2811 PetscCall(VecSet(work[1], 1.)); 2812 PetscCall(VecGetArray(work[1], &array)); 2813 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2814 PetscCall(VecRestoreArray(work[1], &array)); 2815 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2816 } 2817 2818 if (nsubs > 1 || bsp > 1) { 2819 IS *is; 2820 PetscInt b, totb; 2821 2822 totb = bsp; 2823 is = bsp > 1 ? bzerodiag : &zerodiag; 2824 nsubs = PetscMax(nsubs, 1); 2825 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2826 for (b = 0; b < totb; b++) { 2827 for (i = 0; i < nsubs; i++) { 2828 ISLocalToGlobalMapping l2g; 2829 IS t_zerodiag_subs; 2830 PetscInt nl; 2831 2832 if (subs) { 2833 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2834 } else { 2835 IS tis; 2836 2837 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2838 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2839 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2840 PetscCall(ISDestroy(&tis)); 2841 } 2842 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2843 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2844 if (nl) { 2845 PetscBool valid = PETSC_TRUE; 2846 2847 if (checkb) { 2848 PetscCall(VecSet(matis->x, 0)); 2849 PetscCall(ISGetLocalSize(subs[i], &nl)); 2850 PetscCall(ISGetIndices(subs[i], &idxs)); 2851 PetscCall(VecGetArray(matis->x, &array)); 2852 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2853 PetscCall(VecRestoreArray(matis->x, &array)); 2854 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2855 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2856 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2857 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2858 PetscCall(VecGetArray(matis->y, &array)); 2859 for (j = 0; j < n_interior_dofs; j++) { 2860 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2861 valid = PETSC_FALSE; 2862 break; 2863 } 2864 } 2865 PetscCall(VecRestoreArray(matis->y, &array)); 2866 } 2867 if (valid && nneu) { 2868 const PetscInt *idxs; 2869 PetscInt nzb; 2870 2871 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2872 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2873 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2874 if (nzb) valid = PETSC_FALSE; 2875 } 2876 if (valid && pressures) { 2877 IS t_pressure_subs, tmp; 2878 PetscInt i1, i2; 2879 2880 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 2881 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 2882 PetscCall(ISGetLocalSize(tmp, &i1)); 2883 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 2884 if (i2 != i1) valid = PETSC_FALSE; 2885 PetscCall(ISDestroy(&t_pressure_subs)); 2886 PetscCall(ISDestroy(&tmp)); 2887 } 2888 if (valid) { 2889 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 2890 benign_n++; 2891 } else recompute_zerodiag = PETSC_TRUE; 2892 } 2893 PetscCall(ISDestroy(&t_zerodiag_subs)); 2894 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2895 } 2896 } 2897 } else { /* there's just one subdomain (or zero if they have not been detected */ 2898 PetscBool valid = PETSC_TRUE; 2899 2900 if (nneu) valid = PETSC_FALSE; 2901 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 2902 if (valid && checkb) { 2903 PetscCall(MatMult(matis->A, work[0], matis->x)); 2904 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 2905 PetscCall(VecGetArray(matis->x, &array)); 2906 for (j = 0; j < n_interior_dofs; j++) { 2907 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2908 valid = PETSC_FALSE; 2909 break; 2910 } 2911 } 2912 PetscCall(VecRestoreArray(matis->x, &array)); 2913 } 2914 if (valid) { 2915 benign_n = 1; 2916 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 2917 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2918 zerodiag_subs[0] = zerodiag; 2919 } 2920 } 2921 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 2922 } 2923 PetscCall(PetscFree(interior_dofs)); 2924 2925 if (!benign_n) { 2926 PetscInt n; 2927 2928 PetscCall(ISDestroy(&zerodiag)); 2929 recompute_zerodiag = PETSC_FALSE; 2930 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2931 if (n) have_null = PETSC_FALSE; 2932 } 2933 2934 /* final check for null pressures */ 2935 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 2936 2937 if (recompute_zerodiag) { 2938 PetscCall(ISDestroy(&zerodiag)); 2939 if (benign_n == 1) { 2940 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2941 zerodiag = zerodiag_subs[0]; 2942 } else { 2943 PetscInt i, nzn, *new_idxs; 2944 2945 nzn = 0; 2946 for (i = 0; i < benign_n; i++) { 2947 PetscInt ns; 2948 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2949 nzn += ns; 2950 } 2951 PetscCall(PetscMalloc1(nzn, &new_idxs)); 2952 nzn = 0; 2953 for (i = 0; i < benign_n; i++) { 2954 PetscInt ns, *idxs; 2955 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2956 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2957 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 2958 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2959 nzn += ns; 2960 } 2961 PetscCall(PetscSortInt(nzn, new_idxs)); 2962 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 2963 } 2964 have_null = PETSC_FALSE; 2965 } 2966 2967 /* determines if the coarse solver will be singular or not */ 2968 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2969 2970 /* Prepare matrix to compute no-net-flux */ 2971 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2972 Mat A, loc_divudotp; 2973 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 2974 IS row, col, isused = NULL; 2975 PetscInt M, N, n, st, n_isused; 2976 2977 if (pressures) { 2978 isused = pressures; 2979 } else { 2980 isused = zerodiag_save; 2981 } 2982 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 2983 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2984 PetscCall(MatGetLocalSize(A, &n, NULL)); 2985 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 2986 n_isused = 0; 2987 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 2988 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 2989 st = st - n_isused; 2990 if (n) { 2991 const PetscInt *gidxs; 2992 2993 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 2994 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 2995 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2996 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2997 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 2998 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 2999 } else { 3000 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 3001 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 3002 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 3003 } 3004 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 3005 PetscCall(ISGetSize(row, &M)); 3006 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 3007 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 3008 PetscCall(ISDestroy(&row)); 3009 PetscCall(ISDestroy(&col)); 3010 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 3011 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 3012 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 3013 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 3014 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 3015 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 3016 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 3017 PetscCall(MatDestroy(&loc_divudotp)); 3018 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3019 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 3020 } 3021 PetscCall(ISDestroy(&zerodiag_save)); 3022 PetscCall(ISDestroy(&pressures)); 3023 if (bzerodiag) { 3024 PetscInt i; 3025 3026 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 3027 PetscCall(PetscFree(bzerodiag)); 3028 } 3029 pcbddc->benign_n = benign_n; 3030 pcbddc->benign_zerodiag_subs = zerodiag_subs; 3031 3032 /* determines if the problem has subdomains with 0 pressure block */ 3033 have_null = (PetscBool)(!!pcbddc->benign_n); 3034 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 3035 3036 project_b0: 3037 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 3038 /* change of basis and p0 dofs */ 3039 if (pcbddc->benign_n) { 3040 PetscInt i, s, *nnz; 3041 3042 /* local change of basis for pressures */ 3043 PetscCall(MatDestroy(&pcbddc->benign_change)); 3044 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 3045 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 3046 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 3047 PetscCall(PetscMalloc1(n, &nnz)); 3048 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 3049 for (i = 0; i < pcbddc->benign_n; i++) { 3050 const PetscInt *idxs; 3051 PetscInt nzs, j; 3052 3053 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 3054 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3055 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 3056 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 3057 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 3058 } 3059 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 3060 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3061 PetscCall(PetscFree(nnz)); 3062 /* set identity by default */ 3063 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 3064 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3065 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 3066 /* set change on pressures */ 3067 for (s = 0; s < pcbddc->benign_n; s++) { 3068 PetscScalar *array; 3069 const PetscInt *idxs; 3070 PetscInt nzs; 3071 3072 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 3073 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3074 for (i = 0; i < nzs - 1; i++) { 3075 PetscScalar vals[2]; 3076 PetscInt cols[2]; 3077 3078 cols[0] = idxs[i]; 3079 cols[1] = idxs[nzs - 1]; 3080 vals[0] = 1.; 3081 vals[1] = 1.; 3082 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 3083 } 3084 PetscCall(PetscMalloc1(nzs, &array)); 3085 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 3086 array[nzs - 1] = 1.; 3087 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 3088 /* store local idxs for p0 */ 3089 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 3090 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3091 PetscCall(PetscFree(array)); 3092 } 3093 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3094 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3095 3096 /* project if needed */ 3097 if (pcbddc->benign_change_explicit) { 3098 Mat M; 3099 3100 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 3101 PetscCall(MatDestroy(&pcbddc->local_mat)); 3102 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 3103 PetscCall(MatDestroy(&M)); 3104 } 3105 /* store global idxs for p0 */ 3106 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 3107 } 3108 *zerodiaglocal = zerodiag; 3109 PetscFunctionReturn(PETSC_SUCCESS); 3110 } 3111 3112 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3113 { 3114 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3115 PetscScalar *array; 3116 3117 PetscFunctionBegin; 3118 if (!pcbddc->benign_sf) { 3119 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 3120 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 3121 } 3122 if (get) { 3123 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 3124 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3125 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3126 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 3127 } else { 3128 PetscCall(VecGetArray(v, &array)); 3129 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3130 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3131 PetscCall(VecRestoreArray(v, &array)); 3132 } 3133 PetscFunctionReturn(PETSC_SUCCESS); 3134 } 3135 3136 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3137 { 3138 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3139 3140 PetscFunctionBegin; 3141 /* TODO: add error checking 3142 - avoid nested pop (or push) calls. 3143 - cannot push before pop. 3144 - cannot call this if pcbddc->local_mat is NULL 3145 */ 3146 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 3147 if (pop) { 3148 if (pcbddc->benign_change_explicit) { 3149 IS is_p0; 3150 MatReuse reuse; 3151 3152 /* extract B_0 */ 3153 reuse = MAT_INITIAL_MATRIX; 3154 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 3155 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 3156 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 3157 /* remove rows and cols from local problem */ 3158 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 3159 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 3160 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 3161 PetscCall(ISDestroy(&is_p0)); 3162 } else { 3163 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 3164 PetscScalar *vals; 3165 PetscInt i, n, *idxs_ins; 3166 3167 PetscCall(VecGetLocalSize(matis->y, &n)); 3168 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 3169 if (!pcbddc->benign_B0) { 3170 PetscInt *nnz; 3171 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 3172 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 3173 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 3174 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 3175 for (i = 0; i < pcbddc->benign_n; i++) { 3176 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 3177 nnz[i] = n - nnz[i]; 3178 } 3179 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 3180 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3181 PetscCall(PetscFree(nnz)); 3182 } 3183 3184 for (i = 0; i < pcbddc->benign_n; i++) { 3185 PetscScalar *array; 3186 PetscInt *idxs, j, nz, cum; 3187 3188 PetscCall(VecSet(matis->x, 0.)); 3189 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 3190 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3191 for (j = 0; j < nz; j++) vals[j] = 1.; 3192 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 3193 PetscCall(VecAssemblyBegin(matis->x)); 3194 PetscCall(VecAssemblyEnd(matis->x)); 3195 PetscCall(VecSet(matis->y, 0.)); 3196 PetscCall(MatMult(matis->A, matis->x, matis->y)); 3197 PetscCall(VecGetArray(matis->y, &array)); 3198 cum = 0; 3199 for (j = 0; j < n; j++) { 3200 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3201 vals[cum] = array[j]; 3202 idxs_ins[cum] = j; 3203 cum++; 3204 } 3205 } 3206 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 3207 PetscCall(VecRestoreArray(matis->y, &array)); 3208 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3209 } 3210 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3211 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3212 PetscCall(PetscFree2(idxs_ins, vals)); 3213 } 3214 } else { /* push */ 3215 3216 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 3217 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 3218 PetscScalar *B0_vals; 3219 PetscInt *B0_cols, B0_ncol; 3220 3221 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3222 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 3223 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 3224 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 3225 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3226 } 3227 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3228 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3229 } 3230 PetscFunctionReturn(PETSC_SUCCESS); 3231 } 3232 3233 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3234 { 3235 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3236 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3237 PetscBLASInt B_dummyint, B_neigs, B_ierr, B_lwork; 3238 PetscBLASInt *B_iwork, *B_ifail; 3239 PetscScalar *work, lwork; 3240 PetscScalar *St, *S, *eigv; 3241 PetscScalar *Sarray, *Starray; 3242 PetscReal *eigs, thresh, lthresh, uthresh; 3243 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 3244 PetscBool allocated_S_St, upart; 3245 #if defined(PETSC_USE_COMPLEX) 3246 PetscReal *rwork; 3247 #endif 3248 3249 PetscFunctionBegin; 3250 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3251 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3252 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3253 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3254 sub_schurs->is_posdef); 3255 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3256 3257 if (pcbddc->dbg_flag) { 3258 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3259 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3260 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3261 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3262 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3263 } 3264 3265 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3266 3267 /* max size of subsets */ 3268 mss = 0; 3269 for (i = 0; i < sub_schurs->n_subs; i++) { 3270 PetscInt subset_size; 3271 3272 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3273 mss = PetscMax(mss, subset_size); 3274 } 3275 3276 /* min/max and threshold */ 3277 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3278 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3279 nmax = PetscMax(nmin, nmax); 3280 allocated_S_St = PETSC_FALSE; 3281 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3282 allocated_S_St = PETSC_TRUE; 3283 } 3284 3285 /* allocate lapack workspace */ 3286 cum = cum2 = 0; 3287 maxneigs = 0; 3288 for (i = 0; i < sub_schurs->n_subs; i++) { 3289 PetscInt n, subset_size; 3290 3291 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3292 n = PetscMin(subset_size, nmax); 3293 cum += subset_size; 3294 cum2 += subset_size * n; 3295 maxneigs = PetscMax(maxneigs, n); 3296 } 3297 lwork = 0; 3298 if (mss) { 3299 PetscScalar sdummy = 0.; 3300 PetscBLASInt B_itype = 1; 3301 PetscBLASInt B_N, idummy = 0; 3302 PetscReal rdummy = 0., zero = 0.0; 3303 PetscReal eps = 0.0; /* dlamch? */ 3304 3305 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3306 PetscCall(PetscBLASIntCast(mss, &B_N)); 3307 B_lwork = -1; 3308 /* some implementations may complain about NULL pointers, even if we are querying */ 3309 S = &sdummy; 3310 St = &sdummy; 3311 eigs = &rdummy; 3312 eigv = &sdummy; 3313 B_iwork = &idummy; 3314 B_ifail = &idummy; 3315 #if defined(PETSC_USE_COMPLEX) 3316 rwork = &rdummy; 3317 #endif 3318 thresh = 1.0; 3319 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3320 #if defined(PETSC_USE_COMPLEX) 3321 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3322 #else 3323 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3324 #endif 3325 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr); 3326 PetscCall(PetscFPTrapPop()); 3327 } 3328 3329 nv = 0; 3330 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3331 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3332 } 3333 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3334 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3335 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3336 #if defined(PETSC_USE_COMPLEX) 3337 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3338 #endif 3339 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3340 &pcbddc->adaptive_constraints_data)); 3341 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3342 3343 maxneigs = 0; 3344 cum = cumarray = 0; 3345 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3346 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3347 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3348 const PetscInt *idxs; 3349 3350 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3351 for (cum = 0; cum < nv; cum++) { 3352 pcbddc->adaptive_constraints_n[cum] = 1; 3353 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3354 pcbddc->adaptive_constraints_data[cum] = 1.0; 3355 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3356 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3357 } 3358 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3359 } 3360 3361 if (mss) { /* multilevel */ 3362 if (sub_schurs->gdsw) { 3363 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3364 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3365 } else { 3366 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3367 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3368 } 3369 } 3370 3371 lthresh = pcbddc->adaptive_threshold[0]; 3372 uthresh = pcbddc->adaptive_threshold[1]; 3373 upart = pcbddc->use_deluxe_scaling; 3374 for (i = 0; i < sub_schurs->n_subs; i++) { 3375 const PetscInt *idxs; 3376 PetscReal upper, lower; 3377 PetscInt j, subset_size, eigs_start = 0; 3378 PetscBLASInt B_N; 3379 PetscBool same_data = PETSC_FALSE; 3380 PetscBool scal = PETSC_FALSE; 3381 3382 if (upart) { 3383 upper = PETSC_MAX_REAL; 3384 lower = uthresh; 3385 } else { 3386 if (sub_schurs->gdsw) { 3387 upper = uthresh; 3388 lower = PETSC_MIN_REAL; 3389 } else { 3390 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3391 upper = 1. / uthresh; 3392 lower = 0.; 3393 } 3394 } 3395 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3396 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3397 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3398 /* this is experimental: we assume the dofs have been properly grouped to have 3399 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3400 if (!sub_schurs->is_posdef) { 3401 Mat T; 3402 3403 for (j = 0; j < subset_size; j++) { 3404 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3405 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3406 PetscCall(MatScale(T, -1.0)); 3407 PetscCall(MatDestroy(&T)); 3408 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3409 PetscCall(MatScale(T, -1.0)); 3410 PetscCall(MatDestroy(&T)); 3411 if (sub_schurs->change_primal_sub) { 3412 PetscInt nz, k; 3413 const PetscInt *idxs; 3414 3415 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3416 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3417 for (k = 0; k < nz; k++) { 3418 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3419 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3420 } 3421 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3422 } 3423 scal = PETSC_TRUE; 3424 break; 3425 } 3426 } 3427 } 3428 3429 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3430 if (sub_schurs->is_symmetric) { 3431 PetscInt j, k; 3432 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3433 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3434 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3435 } 3436 for (j = 0; j < subset_size; j++) { 3437 for (k = j; k < subset_size; k++) { 3438 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3439 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3440 } 3441 } 3442 } else { 3443 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3444 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3445 } 3446 } else { 3447 S = Sarray + cumarray; 3448 St = Starray + cumarray; 3449 } 3450 /* see if we can save some work */ 3451 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3452 3453 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3454 B_neigs = 0; 3455 } else { 3456 PetscBLASInt B_itype = 1; 3457 PetscBLASInt B_IL, B_IU; 3458 PetscReal eps = -1.0; /* dlamch? */ 3459 PetscInt nmin_s; 3460 PetscBool compute_range; 3461 3462 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3463 B_neigs = 0; 3464 compute_range = (PetscBool)!same_data; 3465 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3466 3467 if (pcbddc->dbg_flag) { 3468 PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof; 3469 3470 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3471 PetscCall( 3472 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc)); 3473 } 3474 3475 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3476 if (compute_range) { 3477 /* ask for eigenvalues larger than thresh */ 3478 if (sub_schurs->is_posdef) { 3479 #if defined(PETSC_USE_COMPLEX) 3480 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3481 #else 3482 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3483 #endif 3484 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3485 } else { /* no theory so far, but it works nicely */ 3486 PetscInt recipe = 0, recipe_m = 1; 3487 PetscReal bb[2]; 3488 3489 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3490 switch (recipe) { 3491 case 0: 3492 if (scal) { 3493 bb[0] = PETSC_MIN_REAL; 3494 bb[1] = lthresh; 3495 } else { 3496 bb[0] = uthresh; 3497 bb[1] = PETSC_MAX_REAL; 3498 } 3499 #if defined(PETSC_USE_COMPLEX) 3500 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3501 #else 3502 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3503 #endif 3504 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3505 break; 3506 case 1: 3507 bb[0] = PETSC_MIN_REAL; 3508 bb[1] = lthresh * lthresh; 3509 #if defined(PETSC_USE_COMPLEX) 3510 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3511 #else 3512 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3513 #endif 3514 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3515 if (!scal) { 3516 PetscBLASInt B_neigs2 = 0; 3517 3518 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3519 bb[1] = PETSC_MAX_REAL; 3520 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3521 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3522 #if defined(PETSC_USE_COMPLEX) 3523 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3524 #else 3525 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3526 #endif 3527 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3528 B_neigs += B_neigs2; 3529 } 3530 break; 3531 case 2: 3532 if (scal) { 3533 bb[0] = PETSC_MIN_REAL; 3534 bb[1] = 0; 3535 #if defined(PETSC_USE_COMPLEX) 3536 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3537 #else 3538 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3539 #endif 3540 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3541 } else { 3542 PetscBLASInt B_neigs2 = 0; 3543 PetscBool do_copy = PETSC_FALSE; 3544 3545 lthresh = PetscMax(lthresh, 0.0); 3546 if (lthresh > 0.0) { 3547 bb[0] = PETSC_MIN_REAL; 3548 bb[1] = lthresh * lthresh; 3549 3550 do_copy = PETSC_TRUE; 3551 #if defined(PETSC_USE_COMPLEX) 3552 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3553 #else 3554 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3555 #endif 3556 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3557 } 3558 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3559 bb[1] = PETSC_MAX_REAL; 3560 if (do_copy) { 3561 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3562 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3563 } 3564 #if defined(PETSC_USE_COMPLEX) 3565 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3566 #else 3567 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3568 #endif 3569 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3570 B_neigs += B_neigs2; 3571 } 3572 break; 3573 case 3: 3574 if (scal) { 3575 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3576 } else { 3577 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3578 } 3579 if (!scal) { 3580 bb[0] = uthresh; 3581 bb[1] = PETSC_MAX_REAL; 3582 #if defined(PETSC_USE_COMPLEX) 3583 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3584 #else 3585 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3586 #endif 3587 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3588 } 3589 if (recipe_m > 0 && B_N - B_neigs > 0) { 3590 PetscBLASInt B_neigs2 = 0; 3591 3592 B_IL = 1; 3593 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3594 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3595 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3596 #if defined(PETSC_USE_COMPLEX) 3597 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3598 #else 3599 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3600 #endif 3601 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3602 B_neigs += B_neigs2; 3603 } 3604 break; 3605 case 4: 3606 bb[0] = PETSC_MIN_REAL; 3607 bb[1] = lthresh; 3608 #if defined(PETSC_USE_COMPLEX) 3609 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3610 #else 3611 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3612 #endif 3613 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3614 { 3615 PetscBLASInt B_neigs2 = 0; 3616 3617 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3618 bb[1] = PETSC_MAX_REAL; 3619 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3620 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3621 #if defined(PETSC_USE_COMPLEX) 3622 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3623 #else 3624 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3625 #endif 3626 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3627 B_neigs += B_neigs2; 3628 } 3629 break; 3630 case 5: /* same as before: first compute all eigenvalues, then filter */ 3631 #if defined(PETSC_USE_COMPLEX) 3632 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3633 #else 3634 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3635 #endif 3636 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3637 { 3638 PetscInt e, k, ne; 3639 for (e = 0, ne = 0; e < B_neigs; e++) { 3640 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3641 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3642 eigs[ne] = eigs[e]; 3643 ne++; 3644 } 3645 } 3646 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3647 PetscCall(PetscBLASIntCast(ne, &B_neigs)); 3648 } 3649 break; 3650 default: 3651 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3652 } 3653 } 3654 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3655 PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU)); 3656 B_IL = 1; 3657 #if defined(PETSC_USE_COMPLEX) 3658 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3659 #else 3660 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3661 #endif 3662 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3663 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3664 PetscInt k; 3665 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3666 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3667 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3668 nmin = nmax; 3669 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3670 for (k = 0; k < nmax; k++) { 3671 eigs[k] = 1. / PETSC_SMALL; 3672 eigv[k * (subset_size + 1)] = 1.0; 3673 } 3674 } 3675 PetscCall(PetscFPTrapPop()); 3676 if (B_ierr) { 3677 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3678 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3679 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3680 } 3681 3682 if (B_neigs > nmax) { 3683 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3684 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3685 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3686 } 3687 3688 nmin_s = PetscMin(nmin, B_N); 3689 if (B_neigs < nmin_s) { 3690 PetscBLASInt B_neigs2 = 0; 3691 3692 if (upart) { 3693 if (scal) { 3694 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3695 B_IL = B_neigs + 1; 3696 } else { 3697 PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL)); 3698 B_IU = B_N - B_neigs; 3699 } 3700 } else { 3701 B_IL = B_neigs + 1; 3702 PetscCall(PetscBLASIntCast(nmin_s, &B_IU)); 3703 } 3704 if (pcbddc->dbg_flag) { 3705 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3706 } 3707 if (sub_schurs->is_symmetric) { 3708 PetscInt j, k; 3709 for (j = 0; j < subset_size; j++) { 3710 for (k = j; k < subset_size; k++) { 3711 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3712 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3713 } 3714 } 3715 } else { 3716 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3717 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3718 } 3719 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3720 #if defined(PETSC_USE_COMPLEX) 3721 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3722 #else 3723 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3724 #endif 3725 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3726 PetscCall(PetscFPTrapPop()); 3727 B_neigs += B_neigs2; 3728 } 3729 if (B_ierr) { 3730 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3731 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3732 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3733 } 3734 if (pcbddc->dbg_flag) { 3735 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3736 for (j = 0; j < B_neigs; j++) { 3737 if (!sub_schurs->gdsw) { 3738 if (eigs[j] == 0.0) { 3739 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3740 } else { 3741 if (upart) { 3742 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3743 } else { 3744 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1 / eigs[j + eigs_start]))); 3745 } 3746 } 3747 } else { 3748 double pg = (double)eigs[j + eigs_start]; 3749 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3750 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3751 } 3752 } 3753 } 3754 } 3755 /* change the basis back to the original one */ 3756 if (sub_schurs->change) { 3757 Mat change, phi, phit; 3758 3759 if (pcbddc->dbg_flag > 2) { 3760 PetscInt ii; 3761 for (ii = 0; ii < B_neigs; ii++) { 3762 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3763 for (j = 0; j < B_N; j++) { 3764 #if defined(PETSC_USE_COMPLEX) 3765 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3766 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3767 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3768 #else 3769 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3770 #endif 3771 } 3772 } 3773 } 3774 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3775 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3776 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi)); 3777 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3778 PetscCall(MatDestroy(&phit)); 3779 PetscCall(MatDestroy(&phi)); 3780 } 3781 maxneigs = PetscMax(B_neigs, maxneigs); 3782 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3783 if (B_neigs) { 3784 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3785 3786 if (pcbddc->dbg_flag > 1) { 3787 PetscInt ii; 3788 for (ii = 0; ii < B_neigs; ii++) { 3789 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3790 for (j = 0; j < B_N; j++) { 3791 #if defined(PETSC_USE_COMPLEX) 3792 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3793 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3794 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3795 #else 3796 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3797 #endif 3798 } 3799 } 3800 } 3801 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3802 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3803 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3804 cum++; 3805 } 3806 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3807 /* shift for next computation */ 3808 cumarray += subset_size * subset_size; 3809 } 3810 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3811 3812 if (mss) { 3813 if (sub_schurs->gdsw) { 3814 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3815 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3816 } else { 3817 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3818 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3819 /* destroy matrices (junk) */ 3820 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3821 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3822 } 3823 } 3824 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3825 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3826 #if defined(PETSC_USE_COMPLEX) 3827 PetscCall(PetscFree(rwork)); 3828 #endif 3829 if (pcbddc->dbg_flag) { 3830 PetscInt maxneigs_r; 3831 PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3832 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3833 } 3834 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3835 PetscFunctionReturn(PETSC_SUCCESS); 3836 } 3837 3838 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3839 { 3840 Mat coarse_submat; 3841 3842 PetscFunctionBegin; 3843 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3844 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3845 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3846 3847 /* Setup local neumann solver ksp_R */ 3848 /* PCBDDCSetUpLocalScatters should be called first! */ 3849 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3850 3851 /* 3852 Setup local correction and local part of coarse basis. 3853 Gives back the dense local part of the coarse matrix in column major ordering 3854 */ 3855 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat)); 3856 3857 /* Compute total number of coarse nodes and setup coarse solver */ 3858 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat)); 3859 PetscCall(MatDestroy(&coarse_submat)); 3860 PetscFunctionReturn(PETSC_SUCCESS); 3861 } 3862 3863 PetscErrorCode PCBDDCResetCustomization(PC pc) 3864 { 3865 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3866 3867 PetscFunctionBegin; 3868 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3869 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3870 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3871 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3872 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3873 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3874 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3875 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3876 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3877 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3878 PetscFunctionReturn(PETSC_SUCCESS); 3879 } 3880 3881 PetscErrorCode PCBDDCResetTopography(PC pc) 3882 { 3883 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3884 PetscInt i; 3885 3886 PetscFunctionBegin; 3887 PetscCall(MatDestroy(&pcbddc->nedcG)); 3888 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3889 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3890 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3891 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3892 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3893 PetscCall(VecDestroy(&pcbddc->work_change)); 3894 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3895 PetscCall(MatDestroy(&pcbddc->divudotp)); 3896 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3897 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3898 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3899 pcbddc->n_local_subs = 0; 3900 PetscCall(PetscFree(pcbddc->local_subs)); 3901 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3902 pcbddc->graphanalyzed = PETSC_FALSE; 3903 pcbddc->recompute_topography = PETSC_TRUE; 3904 pcbddc->corner_selected = PETSC_FALSE; 3905 PetscFunctionReturn(PETSC_SUCCESS); 3906 } 3907 3908 PetscErrorCode PCBDDCResetSolvers(PC pc) 3909 { 3910 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3911 3912 PetscFunctionBegin; 3913 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3914 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3915 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3916 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3917 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3918 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3919 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3920 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3921 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3922 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3923 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3924 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3925 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3926 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3927 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3928 PetscCall(KSPReset(pcbddc->ksp_D)); 3929 PetscCall(KSPReset(pcbddc->ksp_R)); 3930 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3931 PetscCall(MatDestroy(&pcbddc->local_mat)); 3932 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3933 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 3934 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3935 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3936 PetscCall(MatDestroy(&pcbddc->benign_change)); 3937 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3938 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 3939 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3940 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3941 if (pcbddc->benign_zerodiag_subs) { 3942 PetscInt i; 3943 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3944 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3945 } 3946 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3947 PetscFunctionReturn(PETSC_SUCCESS); 3948 } 3949 3950 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3951 { 3952 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3953 PC_IS *pcis = (PC_IS *)pc->data; 3954 VecType impVecType; 3955 PetscInt n_constraints, n_R, old_size; 3956 3957 PetscFunctionBegin; 3958 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3959 n_R = pcis->n - pcbddc->n_vertices; 3960 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 3961 /* local work vectors (try to avoid unneeded work)*/ 3962 /* R nodes */ 3963 old_size = -1; 3964 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 3965 if (n_R != old_size) { 3966 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3967 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3968 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 3969 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 3970 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 3971 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 3972 } 3973 /* local primal dofs */ 3974 old_size = -1; 3975 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 3976 if (pcbddc->local_primal_size != old_size) { 3977 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3978 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 3979 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 3980 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 3981 } 3982 /* local explicit constraints */ 3983 old_size = -1; 3984 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 3985 if (n_constraints && n_constraints != old_size) { 3986 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3987 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 3988 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 3989 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 3990 } 3991 PetscFunctionReturn(PETSC_SUCCESS); 3992 } 3993 3994 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode) 3995 { 3996 PetscBool flg; 3997 const PetscScalar *a; 3998 3999 PetscFunctionBegin; 4000 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg)); 4001 if (flg) { 4002 PetscCall(MatDenseGetArrayRead(S, &a)); 4003 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE)); 4004 PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode)); 4005 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE)); 4006 PetscCall(MatDenseRestoreArrayRead(S, &a)); 4007 } else { 4008 const PetscInt *ii, *jj; 4009 PetscInt n; 4010 PetscInt buf[8192], *bufc = NULL; 4011 PetscBool freeb = PETSC_FALSE; 4012 Mat Sm = S; 4013 4014 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg)); 4015 if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm)); 4016 else PetscCall(PetscObjectReference((PetscObject)S)); 4017 PetscCall(MatSeqAIJGetArrayRead(Sm, &a)); 4018 PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4019 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure"); 4020 if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) { 4021 bufc = buf; 4022 } else { 4023 PetscCall(PetscMalloc1(nc, &bufc)); 4024 freeb = PETSC_TRUE; 4025 } 4026 4027 for (PetscInt i = 0; i < n; i++) { 4028 const PetscInt nci = ii[i + 1] - ii[i]; 4029 4030 for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]]; 4031 PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode)); 4032 } 4033 PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 4034 PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a)); 4035 PetscCall(MatDestroy(&Sm)); 4036 if (freeb) PetscCall(PetscFree(bufc)); 4037 } 4038 PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY)); 4039 PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY)); 4040 PetscFunctionReturn(PETSC_SUCCESS); 4041 } 4042 4043 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat) 4044 { 4045 Mat_SeqAIJ *aij; 4046 PetscInt *ii, *jj; 4047 PetscScalar *aa; 4048 PetscInt nnz = 0, m, nc; 4049 const PetscScalar *a; 4050 const PetscScalar zero = 0.0; 4051 4052 PetscFunctionBegin; 4053 PetscCall(MatGetLocalSize(D, &m, &nc)); 4054 PetscCall(MatDenseGetArrayRead(D, &a)); 4055 PetscCall(PetscMalloc1(m + 1, &ii)); 4056 PetscCall(PetscMalloc1(m * nc, &jj)); 4057 PetscCall(PetscMalloc1(m * nc, &aa)); 4058 ii[0] = 0; 4059 for (PetscInt k = 0; k < m; k++) { 4060 for (PetscInt s = 0; s < nc; s++) { 4061 const PetscInt c = s + k * nc; 4062 const PetscScalar v = a[k + s * m]; 4063 4064 if (PetscUnlikely(j[c] < 0 || v == zero)) continue; 4065 jj[nnz] = j[c]; 4066 aa[nnz] = a[k + s * m]; 4067 nnz++; 4068 } 4069 ii[k + 1] = nnz; 4070 } 4071 4072 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat)); 4073 PetscCall(MatDenseRestoreArrayRead(D, &a)); 4074 4075 aij = (Mat_SeqAIJ *)(*mat)->data; 4076 aij->free_a = PETSC_TRUE; 4077 aij->free_ij = PETSC_TRUE; 4078 PetscFunctionReturn(PETSC_SUCCESS); 4079 } 4080 4081 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */ 4082 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B) 4083 { 4084 PetscInt n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL; 4085 const PetscBool allowzeropivot = PETSC_FALSE; 4086 PetscBool zeropivotdetected = PETSC_FALSE; 4087 const PetscReal shift = 0.0; 4088 PetscInt ipvt[5], *ii, *jj, *indi, *indj; 4089 PetscScalar work[25], *v_work = NULL, *aa, *diag; 4090 PetscLogDouble flops = 0.0; 4091 4092 PetscFunctionBegin; 4093 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices"); 4094 for (PetscInt i = 0; i < nblocks; i++) { 4095 ncnt += bsizes[i]; 4096 ncnt2 += PetscSqr(bsizes[i]); 4097 } 4098 PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n); 4099 for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]); 4100 if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots)); 4101 4102 PetscCall(PetscMalloc1(n + 1, &ii)); 4103 PetscCall(PetscMalloc1(ncnt2, &jj)); 4104 PetscCall(PetscCalloc1(ncnt2, &aa)); 4105 4106 ncnt = 0; 4107 ii[0] = 0; 4108 indi = ii; 4109 indj = jj; 4110 diag = aa; 4111 for (PetscInt i = 0; i < nblocks; i++) { 4112 const PetscInt bs = bsizes[i]; 4113 4114 for (PetscInt k = 0; k < bs; k++) { 4115 indi[k + 1] = indi[k] + bs; 4116 for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j; 4117 } 4118 PetscCall(MatGetValues(A, bs, indj, bs, indj, diag)); 4119 switch (bs) { 4120 case 1: 4121 *diag = 1.0 / (*diag); 4122 break; 4123 case 2: 4124 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 4125 break; 4126 case 3: 4127 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 4128 break; 4129 case 4: 4130 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 4131 break; 4132 case 5: 4133 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4134 break; 4135 case 6: 4136 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 4137 break; 4138 case 7: 4139 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 4140 break; 4141 default: 4142 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4143 } 4144 ncnt += bs; 4145 flops += 2.0 * PetscPowInt(bs, 3) / 3.0; 4146 diag += bs * bs; 4147 indj += bs * bs; 4148 indi += bs; 4149 } 4150 PetscCall(PetscLogFlops(flops)); 4151 PetscCall(PetscFree2(v_work, v_pivots)); 4152 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B)); 4153 { 4154 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data; 4155 aij->free_a = PETSC_TRUE; 4156 aij->free_ij = PETSC_TRUE; 4157 } 4158 PetscFunctionReturn(PETSC_SUCCESS); 4159 } 4160 4161 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B) 4162 { 4163 const PetscScalar *rarr; 4164 PetscScalar *larr; 4165 PetscSF vsf; 4166 PetscInt n, rld, lld; 4167 4168 PetscFunctionBegin; 4169 PetscCall(MatGetSize(A, NULL, &n)); 4170 PetscCall(MatDenseGetLDA(A, &rld)); 4171 PetscCall(MatDenseGetLDA(B, &lld)); 4172 PetscCall(MatDenseGetArrayRead(A, &rarr)); 4173 PetscCall(MatDenseGetArrayWrite(B, &larr)); 4174 PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf)); 4175 PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4176 PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4177 PetscCall(MatDenseRestoreArrayRead(A, &rarr)); 4178 PetscCall(MatDenseRestoreArrayWrite(B, &larr)); 4179 PetscCall(PetscSFDestroy(&vsf)); 4180 PetscFunctionReturn(PETSC_SUCCESS); 4181 } 4182 4183 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat) 4184 { 4185 PC_IS *pcis = (PC_IS *)pc->data; 4186 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4187 PCBDDCGraph graph = pcbddc->mat_graph; 4188 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4189 /* submatrices of local problem */ 4190 Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL; 4191 /* submatrices of local coarse problem */ 4192 Mat S_CV = NULL, S_VC = NULL, S_CC = NULL; 4193 /* working matrices */ 4194 Mat C_CR; 4195 4196 /* additional working stuff */ 4197 PC pc_R; 4198 IS is_R, is_V, is_C; 4199 const PetscInt *idx_V, *idx_C; 4200 Mat F, Brhs = NULL; 4201 Vec dummy_vec; 4202 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 4203 PetscInt *idx_V_B; 4204 PetscInt lda_rhs, n_vertices, n_constraints, *p0_lidx_I; 4205 PetscInt n_eff_vertices, n_eff_constraints; 4206 PetscInt i, n_R, n_D, n_B; 4207 PetscScalar one = 1.0, m_one = -1.0; 4208 4209 /* Multi-element support */ 4210 PetscBool multi_element = graph->multi_element; 4211 PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL; 4212 PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL; 4213 IS is_C_perm = NULL; 4214 PetscInt n_C_bss = 0, *C_bss = NULL; 4215 Mat coarse_phi_multi; 4216 4217 PetscFunctionBegin; 4218 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 4219 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4220 4221 /* Set Non-overlapping dimensions */ 4222 n_vertices = pcbddc->n_vertices; 4223 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 4224 n_B = pcis->n_B; 4225 n_D = pcis->n - n_B; 4226 n_R = pcis->n - n_vertices; 4227 4228 /* vertices in boundary numbering */ 4229 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 4230 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 4231 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 4232 4233 /* these two cases still need to be optimized */ 4234 if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE; 4235 4236 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 4237 if (multi_element) { 4238 PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 4239 4240 PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat)); 4241 PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size)); 4242 PetscCall(MatSetType(*coarse_submat, MATSEQAIJ)); 4243 PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 4244 PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 4245 4246 /* group vertices and constraints by subdomain id */ 4247 const PetscInt *vidxs = pcbddc->primal_indices_local_idxs; 4248 const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices; 4249 PetscInt *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz; 4250 PetscInt n_el = PetscMax(graph->n_local_subs, 1); 4251 4252 PetscCall(PetscCalloc1(2 * n_el, &count_eff)); 4253 PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V)); 4254 PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C)); 4255 for (PetscInt i = 0; i < n_vertices; i++) { 4256 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4257 4258 V_to_eff_V[i] = count_eff[s]; 4259 count_eff[s] += 1; 4260 } 4261 for (PetscInt i = 0; i < n_constraints; i++) { 4262 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1; 4263 4264 C_to_eff_C[i] = count_eff[s]; 4265 count_eff[s] += 1; 4266 } 4267 4268 /* preallocation */ 4269 PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz)); 4270 for (PetscInt i = 0; i < n_vertices; i++) { 4271 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4272 4273 nnz[i] = count_eff[s] + count_eff[s + 1]; 4274 } 4275 for (PetscInt i = 0; i < n_constraints; i++) { 4276 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub; 4277 4278 nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1]; 4279 } 4280 PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz)); 4281 PetscCall(PetscFree(nnz)); 4282 4283 n_eff_vertices = 0; 4284 n_eff_constraints = 0; 4285 for (PetscInt i = 0; i < n_el; i++) { 4286 n_eff_vertices = PetscMax(n_eff_vertices, count_eff[2 * i]); 4287 n_eff_constraints = PetscMax(n_eff_constraints, count_eff[2 * i + 1]); 4288 count_eff[2 * i] = 0; 4289 count_eff[2 * i + 1] = 0; 4290 } 4291 4292 const PetscInt *idx; 4293 PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C)); 4294 4295 for (PetscInt i = 0; i < n_vertices; i++) { 4296 const PetscInt e = graph->nodes[vidxs[i]].local_sub; 4297 const PetscInt s = 2 * e; 4298 4299 V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i; 4300 count_eff[s] += 1; 4301 } 4302 for (PetscInt i = 0; i < n_constraints; i++) { 4303 const PetscInt e = graph->nodes[cidxs[i]].local_sub; 4304 const PetscInt s = 2 * e + 1; 4305 4306 C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i; 4307 count_eff[s] += 1; 4308 } 4309 4310 PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J)); 4311 PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J)); 4312 PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J)); 4313 PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J)); 4314 for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1; 4315 for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1; 4316 for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1; 4317 for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1; 4318 4319 PetscCall(ISGetIndices(pcbddc->is_R_local, &idx)); 4320 for (PetscInt i = 0; i < n_R; i++) { 4321 const PetscInt e = graph->nodes[idx[i]].local_sub; 4322 const PetscInt s = 2 * e; 4323 PetscInt j; 4324 4325 for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4326 for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4327 } 4328 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx)); 4329 PetscCall(ISGetIndices(pcis->is_B_local, &idx)); 4330 for (PetscInt i = 0; i < n_B; i++) { 4331 const PetscInt e = graph->nodes[idx[i]].local_sub; 4332 const PetscInt s = 2 * e; 4333 PetscInt j; 4334 4335 for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4336 for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4337 } 4338 PetscCall(ISRestoreIndices(pcis->is_B_local, &idx)); 4339 4340 /* permutation and blocksizes for block invert of S_CC */ 4341 PetscInt *idxp; 4342 4343 PetscCall(PetscMalloc1(n_constraints, &idxp)); 4344 PetscCall(PetscMalloc1(n_el, &C_bss)); 4345 n_C_bss = 0; 4346 for (PetscInt e = 0, cnt = 0; e < n_el; e++) { 4347 const PetscInt nc = count_eff[2 * e + 1]; 4348 4349 if (nc) C_bss[n_C_bss++] = nc; 4350 for (PetscInt c = 0; c < nc; c++) { idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; } 4351 cnt += nc; 4352 } 4353 4354 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm)); 4355 4356 PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C)); 4357 PetscCall(PetscFree(count_eff)); 4358 } else { 4359 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat)); 4360 n_eff_constraints = n_constraints; 4361 n_eff_vertices = n_vertices; 4362 } 4363 4364 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 4365 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 4366 PetscCall(PCSetUp(pc_R)); 4367 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 4368 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 4369 lda_rhs = n_R; 4370 need_benign_correction = PETSC_FALSE; 4371 if (isLU || isCHOL) { 4372 PetscCall(PCFactorGetMatrix(pc_R, &F)); 4373 } else if (sub_schurs && sub_schurs->reuse_solver) { 4374 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4375 MatFactorType type; 4376 4377 F = reuse_solver->F; 4378 PetscCall(MatGetFactorType(F, &type)); 4379 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 4380 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 4381 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 4382 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 4383 } else F = NULL; 4384 4385 /* determine if we can use a sparse right-hand side */ 4386 sparserhs = PETSC_FALSE; 4387 if (F && !multi_element) { 4388 MatSolverType solver; 4389 4390 PetscCall(MatFactorGetSolverType(F, &solver)); 4391 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 4392 } 4393 4394 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 4395 dummy_vec = NULL; 4396 if (need_benign_correction && lda_rhs != n_R && F) { 4397 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 4398 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 4399 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 4400 } 4401 4402 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4403 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4404 4405 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R)); 4406 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V)); 4407 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C)); 4408 PetscCall(ISGetIndices(is_V, &idx_V)); 4409 PetscCall(ISGetIndices(is_C, &idx_C)); 4410 4411 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4412 if (n_constraints) { 4413 Mat C_B; 4414 4415 /* Extract constraints on R nodes: C_{CR} */ 4416 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 4417 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4418 4419 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4420 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4421 if (!sparserhs) { 4422 PetscScalar *marr; 4423 4424 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs)); 4425 PetscCall(MatDenseGetArrayWrite(Brhs, &marr)); 4426 for (i = 0; i < n_constraints; i++) { 4427 const PetscScalar *row_cmat_values; 4428 const PetscInt *row_cmat_indices; 4429 PetscInt size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i; 4430 4431 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4432 for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j]; 4433 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4434 } 4435 PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr)); 4436 } else { 4437 Mat tC_CR; 4438 4439 PetscCall(MatScale(C_CR, -1.0)); 4440 if (lda_rhs != n_R) { 4441 PetscScalar *aa; 4442 PetscInt r, *ii, *jj; 4443 PetscBool done; 4444 4445 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4446 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4447 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 4448 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 4449 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4450 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4451 } else { 4452 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4453 tC_CR = C_CR; 4454 } 4455 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 4456 PetscCall(MatDestroy(&tC_CR)); 4457 } 4458 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R)); 4459 if (F) { 4460 if (need_benign_correction) { 4461 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4462 4463 /* rhs is already zero on interior dofs, no need to change the rhs */ 4464 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 4465 } 4466 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 4467 if (need_benign_correction) { 4468 PetscScalar *marr; 4469 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4470 4471 /* XXX multi_element? */ 4472 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4473 if (lda_rhs != n_R) { 4474 for (i = 0; i < n_eff_constraints; i++) { 4475 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4476 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4477 PetscCall(VecResetArray(dummy_vec)); 4478 } 4479 } else { 4480 for (i = 0; i < n_eff_constraints; i++) { 4481 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4482 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4483 PetscCall(VecResetArray(pcbddc->vec1_R)); 4484 } 4485 } 4486 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4487 } 4488 } else { 4489 const PetscScalar *barr; 4490 PetscScalar *marr; 4491 4492 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4493 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4494 for (i = 0; i < n_eff_constraints; i++) { 4495 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4496 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4497 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4498 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4499 PetscCall(VecResetArray(pcbddc->vec1_R)); 4500 PetscCall(VecResetArray(pcbddc->vec2_R)); 4501 } 4502 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4503 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4504 } 4505 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 4506 PetscCall(MatDestroy(&Brhs)); 4507 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4508 if (!pcbddc->switch_static) { 4509 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2)); 4510 for (i = 0; i < n_eff_constraints; i++) { 4511 Vec r, b; 4512 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 4513 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 4514 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4515 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4516 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 4517 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 4518 } 4519 if (multi_element) { 4520 Mat T; 4521 4522 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4523 PetscCall(MatDestroy(&local_auxmat2_R)); 4524 local_auxmat2_R = T; 4525 PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T)); 4526 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4527 pcbddc->local_auxmat2 = T; 4528 } 4529 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4530 } else { 4531 if (multi_element) { 4532 Mat T; 4533 4534 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4535 PetscCall(MatDestroy(&local_auxmat2_R)); 4536 local_auxmat2_R = T; 4537 } 4538 if (lda_rhs != n_R) { 4539 PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 4540 } else { 4541 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4542 pcbddc->local_auxmat2 = local_auxmat2_R; 4543 } 4544 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC)); 4545 } 4546 PetscCall(MatScale(S_CC, m_one)); 4547 if (multi_element) { 4548 Mat T, T2; 4549 IS isp, ispi; 4550 4551 isp = is_C_perm; 4552 4553 PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi)); 4554 PetscCall(MatPermute(S_CC, isp, isp, &T)); 4555 PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2)); 4556 PetscCall(MatDestroy(&T)); 4557 PetscCall(MatDestroy(&S_CC)); 4558 PetscCall(MatPermute(T2, ispi, ispi, &S_CC)); 4559 PetscCall(MatDestroy(&T2)); 4560 PetscCall(ISDestroy(&ispi)); 4561 } else { 4562 if (isCHOL) { 4563 PetscCall(MatCholeskyFactor(S_CC, NULL, NULL)); 4564 } else { 4565 PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL)); 4566 } 4567 PetscCall(MatSeqDenseInvertFactors_Private(S_CC)); 4568 } 4569 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4570 PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1)); 4571 PetscCall(MatDestroy(&C_B)); 4572 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES)); 4573 } 4574 4575 /* Get submatrices from subdomain matrix */ 4576 if (n_vertices) { 4577 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4578 PetscBool oldpin; 4579 #endif 4580 IS is_aux; 4581 4582 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4583 IS tis; 4584 4585 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4586 PetscCall(ISSort(tis)); 4587 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4588 PetscCall(ISDestroy(&tis)); 4589 } else { 4590 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4591 } 4592 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4593 oldpin = pcbddc->local_mat->boundtocpu; 4594 #endif 4595 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4596 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4597 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4598 /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4599 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4600 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4601 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4602 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4603 #endif 4604 PetscCall(ISDestroy(&is_aux)); 4605 } 4606 PetscCall(ISDestroy(&is_C_perm)); 4607 PetscCall(PetscFree(C_bss)); 4608 4609 p0_lidx_I = NULL; 4610 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4611 const PetscInt *idxs; 4612 4613 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4614 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4615 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4616 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4617 } 4618 4619 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4620 4621 /* Matrices of coarse basis functions (local) */ 4622 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4623 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4624 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4625 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4626 if (!multi_element) { 4627 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B)); 4628 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D)); 4629 coarse_phi_multi = NULL; 4630 } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */ 4631 IS is_rows[2] = {pcbddc->is_R_local, NULL}; 4632 IS is_cols[2] = {is_V, is_C}; 4633 4634 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1])); 4635 PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi)); 4636 PetscCall(ISDestroy(&is_rows[1])); 4637 } 4638 4639 /* vertices */ 4640 if (n_vertices) { 4641 PetscBool restoreavr = PETSC_FALSE; 4642 Mat A_RRmA_RV = NULL; 4643 4644 PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4645 PetscCall(MatDestroy(&A_VV)); 4646 4647 if (n_R) { 4648 Mat A_RV_bcorr = NULL, S_VV; 4649 4650 PetscCall(MatScale(A_RV, m_one)); 4651 if (need_benign_correction) { 4652 ISLocalToGlobalMapping RtoN; 4653 IS is_p0; 4654 PetscInt *idxs_p0, n; 4655 4656 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4657 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4658 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4659 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4660 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4661 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4662 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4663 PetscCall(ISDestroy(&is_p0)); 4664 } 4665 4666 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV)); 4667 if (!sparserhs || need_benign_correction) { 4668 if (lda_rhs == n_R && !multi_element) { 4669 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4670 } else { 4671 Mat T; 4672 PetscScalar *av, *array; 4673 const PetscInt *xadj, *adjncy; 4674 PetscInt n; 4675 PetscBool flg_row; 4676 4677 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T)); 4678 PetscCall(MatDenseGetArrayWrite(T, &array)); 4679 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4680 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4681 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4682 for (i = 0; i < n; i++) { 4683 PetscInt j; 4684 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j]; 4685 } 4686 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4687 PetscCall(MatDenseRestoreArrayWrite(T, &array)); 4688 PetscCall(MatDestroy(&A_RV)); 4689 A_RV = T; 4690 } 4691 if (need_benign_correction) { 4692 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4693 PetscScalar *marr; 4694 4695 /* XXX multi_element */ 4696 PetscCall(MatDenseGetArray(A_RV, &marr)); 4697 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4698 4699 | 0 0 0 | (V) 4700 L = | 0 0 -1 | (P-p0) 4701 | 0 0 -1 | (p0) 4702 4703 */ 4704 for (i = 0; i < reuse_solver->benign_n; i++) { 4705 const PetscScalar *vals; 4706 const PetscInt *idxs, *idxs_zero; 4707 PetscInt n, j, nz; 4708 4709 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4710 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4711 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4712 for (j = 0; j < n; j++) { 4713 PetscScalar val = vals[j]; 4714 PetscInt k, col = idxs[j]; 4715 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4716 } 4717 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4718 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4719 } 4720 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4721 } 4722 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4723 Brhs = A_RV; 4724 } else { 4725 Mat tA_RVT, A_RVT; 4726 4727 if (!pcbddc->symmetric_primal) { 4728 /* A_RV already scaled by -1 */ 4729 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4730 } else { 4731 restoreavr = PETSC_TRUE; 4732 PetscCall(MatScale(A_VR, -1.0)); 4733 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4734 A_RVT = A_VR; 4735 } 4736 if (lda_rhs != n_R) { 4737 PetscScalar *aa; 4738 PetscInt r, *ii, *jj; 4739 PetscBool done; 4740 4741 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4742 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4743 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4744 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4745 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4746 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4747 } else { 4748 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4749 tA_RVT = A_RVT; 4750 } 4751 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4752 PetscCall(MatDestroy(&tA_RVT)); 4753 PetscCall(MatDestroy(&A_RVT)); 4754 } 4755 if (F) { 4756 /* need to correct the rhs */ 4757 if (need_benign_correction) { 4758 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4759 PetscScalar *marr; 4760 4761 PetscCall(MatDenseGetArray(Brhs, &marr)); 4762 if (lda_rhs != n_R) { 4763 for (i = 0; i < n_eff_vertices; i++) { 4764 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4765 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4766 PetscCall(VecResetArray(dummy_vec)); 4767 } 4768 } else { 4769 for (i = 0; i < n_eff_vertices; i++) { 4770 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4771 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4772 PetscCall(VecResetArray(pcbddc->vec1_R)); 4773 } 4774 } 4775 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4776 } 4777 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4778 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4779 /* need to correct the solution */ 4780 if (need_benign_correction) { 4781 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4782 PetscScalar *marr; 4783 4784 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4785 if (lda_rhs != n_R) { 4786 for (i = 0; i < n_eff_vertices; i++) { 4787 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4788 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4789 PetscCall(VecResetArray(dummy_vec)); 4790 } 4791 } else { 4792 for (i = 0; i < n_eff_vertices; i++) { 4793 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4794 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4795 PetscCall(VecResetArray(pcbddc->vec1_R)); 4796 } 4797 } 4798 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4799 } 4800 } else { 4801 const PetscScalar *barr; 4802 PetscScalar *marr; 4803 4804 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4805 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4806 for (i = 0; i < n_eff_vertices; i++) { 4807 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4808 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4809 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4810 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4811 PetscCall(VecResetArray(pcbddc->vec1_R)); 4812 PetscCall(VecResetArray(pcbddc->vec2_R)); 4813 } 4814 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4815 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4816 } 4817 PetscCall(MatDestroy(&A_RV)); 4818 PetscCall(MatDestroy(&Brhs)); 4819 /* S_VV and S_CV */ 4820 if (n_constraints) { 4821 Mat B; 4822 4823 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B)); 4824 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B)); 4825 4826 /* S_CV = pcbddc->local_auxmat1 * B */ 4827 if (multi_element) { 4828 Mat T; 4829 4830 PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T)); 4831 PetscCall(MatDestroy(&B)); 4832 B = T; 4833 } 4834 PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV)); 4835 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4836 PetscCall(MatProductSetFromOptions(S_CV)); 4837 PetscCall(MatProductSymbolic(S_CV)); 4838 PetscCall(MatProductNumeric(S_CV)); 4839 PetscCall(MatProductClear(S_CV)); 4840 PetscCall(MatDestroy(&B)); 4841 4842 /* B = local_auxmat2_R * S_CV */ 4843 PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B)); 4844 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4845 PetscCall(MatProductSetFromOptions(B)); 4846 PetscCall(MatProductSymbolic(B)); 4847 PetscCall(MatProductNumeric(B)); 4848 4849 PetscCall(MatScale(S_CV, m_one)); 4850 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES)); 4851 4852 if (multi_element) { 4853 Mat T; 4854 4855 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4856 PetscCall(MatDestroy(&A_RRmA_RV)); 4857 A_RRmA_RV = T; 4858 } 4859 PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */ 4860 PetscCall(MatDestroy(&B)); 4861 } else if (multi_element) { 4862 Mat T; 4863 4864 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4865 PetscCall(MatDestroy(&A_RRmA_RV)); 4866 A_RRmA_RV = T; 4867 } 4868 4869 if (lda_rhs != n_R) { 4870 Mat T; 4871 4872 PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T)); 4873 PetscCall(MatDestroy(&A_RRmA_RV)); 4874 A_RRmA_RV = T; 4875 } 4876 4877 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4878 if (need_benign_correction) { /* XXX SPARSE */ 4879 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4880 PetscScalar *sums; 4881 const PetscScalar *marr; 4882 4883 PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr)); 4884 PetscCall(PetscMalloc1(n_vertices, &sums)); 4885 for (i = 0; i < reuse_solver->benign_n; i++) { 4886 const PetscScalar *vals; 4887 const PetscInt *idxs, *idxs_zero; 4888 PetscInt n, j, nz; 4889 4890 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4891 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4892 for (j = 0; j < n_vertices; j++) { 4893 sums[j] = 0.; 4894 for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R]; 4895 } 4896 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4897 for (j = 0; j < n; j++) { 4898 PetscScalar val = vals[j]; 4899 for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES)); 4900 } 4901 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4902 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4903 } 4904 PetscCall(PetscFree(sums)); 4905 PetscCall(MatDestroy(&A_RV_bcorr)); 4906 PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr)); 4907 } 4908 4909 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV)); 4910 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4911 PetscCall(MatDestroy(&S_VV)); 4912 } 4913 4914 /* coarse basis functions */ 4915 if (coarse_phi_multi) { 4916 Mat Vid; 4917 4918 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid)); 4919 PetscCall(MatShift_Basic(Vid, 1.0)); 4920 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV)); 4921 PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid)); 4922 PetscCall(MatDestroy(&Vid)); 4923 } else { 4924 if (A_RRmA_RV) { 4925 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B)); 4926 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4927 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D)); 4928 if (pcbddc->benign_n) { 4929 for (i = 0; i < n_vertices; i++) { PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); } 4930 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY)); 4931 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY)); 4932 } 4933 } 4934 } 4935 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES)); 4936 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 4937 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 4938 } 4939 PetscCall(MatDestroy(&A_RRmA_RV)); 4940 } 4941 PetscCall(MatDestroy(&A_RV)); 4942 PetscCall(VecDestroy(&dummy_vec)); 4943 4944 if (n_constraints) { 4945 Mat B, B2; 4946 4947 PetscCall(MatScale(S_CC, m_one)); 4948 PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B)); 4949 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4950 PetscCall(MatProductSetFromOptions(B)); 4951 PetscCall(MatProductSymbolic(B)); 4952 PetscCall(MatProductNumeric(B)); 4953 4954 if (n_vertices) { 4955 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4956 PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC)); 4957 } else { 4958 if (lda_rhs != n_R) { 4959 Mat tB; 4960 4961 PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB)); 4962 PetscCall(MatDestroy(&B)); 4963 B = tB; 4964 } 4965 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC)); 4966 } 4967 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES)); 4968 } 4969 4970 /* coarse basis functions */ 4971 if (coarse_phi_multi) { 4972 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B)); 4973 } else { 4974 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 4975 PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2)); 4976 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2)); 4977 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4978 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 4979 PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2)); 4980 if (pcbddc->benign_n) { 4981 for (i = 0; i < n_constraints; i++) { PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); } 4982 } 4983 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2)); 4984 } 4985 } 4986 PetscCall(MatDestroy(&B)); 4987 } 4988 4989 /* assemble sparse coarse basis functions */ 4990 if (coarse_phi_multi) { 4991 Mat T; 4992 4993 PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T)); 4994 PetscCall(MatDestroy(&coarse_phi_multi)); 4995 PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B)); 4996 if (pcbddc->switch_static || pcbddc->dbg_flag) { PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); } 4997 PetscCall(MatDestroy(&T)); 4998 } 4999 PetscCall(MatDestroy(&local_auxmat2_R)); 5000 PetscCall(PetscFree(p0_lidx_I)); 5001 5002 /* coarse matrix entries relative to B_0 */ 5003 if (pcbddc->benign_n) { 5004 Mat B0_B, B0_BPHI; 5005 IS is_dummy; 5006 const PetscScalar *data; 5007 PetscInt j; 5008 5009 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5010 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5011 PetscCall(ISDestroy(&is_dummy)); 5012 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5013 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5014 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 5015 for (j = 0; j < pcbddc->benign_n; j++) { 5016 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5017 for (i = 0; i < pcbddc->local_primal_size; i++) { 5018 PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5019 PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 5020 } 5021 } 5022 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 5023 PetscCall(MatDestroy(&B0_B)); 5024 PetscCall(MatDestroy(&B0_BPHI)); 5025 } 5026 5027 /* compute other basis functions for non-symmetric problems */ 5028 if (!pcbddc->symmetric_primal) { 5029 Mat B_V = NULL, B_C = NULL; 5030 PetscScalar *marray, *work; 5031 5032 /* TODO multi_element MatDenseScatter */ 5033 if (n_constraints) { 5034 Mat S_CCT, C_CRT; 5035 5036 PetscCall(MatScale(S_CC, m_one)); 5037 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 5038 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 5039 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C)); 5040 PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C)); 5041 PetscCall(MatDestroy(&S_CCT)); 5042 if (n_vertices) { 5043 Mat S_VCT; 5044 5045 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 5046 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V)); 5047 PetscCall(MatDestroy(&S_VCT)); 5048 PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V)); 5049 } 5050 PetscCall(MatDestroy(&C_CRT)); 5051 } else { 5052 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 5053 } 5054 if (n_vertices && n_R) { 5055 PetscScalar *av, *marray; 5056 const PetscInt *xadj, *adjncy; 5057 PetscInt n; 5058 PetscBool flg_row; 5059 5060 /* B_V = B_V - A_VR^T */ 5061 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 5062 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5063 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 5064 PetscCall(MatDenseGetArray(B_V, &marray)); 5065 for (i = 0; i < n; i++) { 5066 PetscInt j; 5067 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 5068 } 5069 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5070 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5071 PetscCall(MatDestroy(&A_VR)); 5072 } 5073 5074 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 5075 PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work)); 5076 if (n_vertices) { 5077 PetscCall(MatDenseGetArray(B_V, &marray)); 5078 for (i = 0; i < n_vertices; i++) { 5079 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 5080 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5081 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5082 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5083 PetscCall(VecResetArray(pcbddc->vec1_R)); 5084 PetscCall(VecResetArray(pcbddc->vec2_R)); 5085 } 5086 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5087 } 5088 if (B_C) { 5089 PetscCall(MatDenseGetArray(B_C, &marray)); 5090 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 5091 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 5092 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5093 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5094 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5095 PetscCall(VecResetArray(pcbddc->vec1_R)); 5096 PetscCall(VecResetArray(pcbddc->vec2_R)); 5097 } 5098 PetscCall(MatDenseRestoreArray(B_C, &marray)); 5099 } 5100 /* coarse basis functions */ 5101 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B)); 5102 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D)); 5103 for (i = 0; i < pcbddc->local_primal_size; i++) { 5104 Vec v; 5105 5106 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 5107 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 5108 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5109 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5110 if (i < n_vertices) { 5111 PetscScalar one = 1.0; 5112 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 5113 PetscCall(VecAssemblyBegin(v)); 5114 PetscCall(VecAssemblyEnd(v)); 5115 } 5116 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 5117 5118 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5119 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 5120 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5121 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5122 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 5123 } 5124 PetscCall(VecResetArray(pcbddc->vec1_R)); 5125 } 5126 PetscCall(MatDestroy(&B_V)); 5127 PetscCall(MatDestroy(&B_C)); 5128 PetscCall(PetscFree(work)); 5129 } else { 5130 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 5131 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 5132 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 5133 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 5134 } 5135 PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5136 PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5137 5138 /* free memory */ 5139 PetscCall(PetscFree(V_to_eff_V)); 5140 PetscCall(PetscFree(C_to_eff_C)); 5141 PetscCall(PetscFree(R_eff_V_J)); 5142 PetscCall(PetscFree(R_eff_C_J)); 5143 PetscCall(PetscFree(B_eff_V_J)); 5144 PetscCall(PetscFree(B_eff_C_J)); 5145 PetscCall(ISDestroy(&is_R)); 5146 PetscCall(ISRestoreIndices(is_V, &idx_V)); 5147 PetscCall(ISRestoreIndices(is_C, &idx_C)); 5148 PetscCall(ISDestroy(&is_V)); 5149 PetscCall(ISDestroy(&is_C)); 5150 PetscCall(PetscFree(idx_V_B)); 5151 PetscCall(MatDestroy(&S_CV)); 5152 PetscCall(MatDestroy(&S_VC)); 5153 PetscCall(MatDestroy(&S_CC)); 5154 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 5155 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 5156 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 5157 5158 /* Checking coarse_sub_mat and coarse basis functions */ 5159 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5160 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5161 if (pcbddc->dbg_flag) { 5162 Mat AUXMAT, TM1, TM2, TM3, TM4; 5163 Mat coarse_phi_D, coarse_phi_B; 5164 Mat coarse_psi_D, coarse_psi_B; 5165 Mat A_II, A_BB, A_IB, A_BI; 5166 Mat C_B, CPHI; 5167 IS is_dummy; 5168 Vec mones; 5169 MatType checkmattype = MATSEQAIJ; 5170 PetscReal real_value; 5171 5172 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5173 Mat A; 5174 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 5175 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 5176 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 5177 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 5178 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 5179 PetscCall(MatDestroy(&A)); 5180 } else { 5181 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 5182 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 5183 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 5184 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 5185 } 5186 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 5187 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 5188 if (!pcbddc->symmetric_primal) { 5189 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 5190 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 5191 } 5192 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5193 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 5194 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5195 if (!pcbddc->symmetric_primal) { 5196 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5197 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5198 PetscCall(MatDestroy(&AUXMAT)); 5199 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5200 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5201 PetscCall(MatDestroy(&AUXMAT)); 5202 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5203 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5204 PetscCall(MatDestroy(&AUXMAT)); 5205 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5206 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5207 PetscCall(MatDestroy(&AUXMAT)); 5208 } else { 5209 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5210 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5211 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5212 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5213 PetscCall(MatDestroy(&AUXMAT)); 5214 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5215 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5216 PetscCall(MatDestroy(&AUXMAT)); 5217 } 5218 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 5219 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 5220 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 5221 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 5222 if (pcbddc->benign_n) { 5223 Mat B0_B, B0_BPHI; 5224 const PetscScalar *data2; 5225 PetscScalar *data; 5226 PetscInt j; 5227 5228 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5229 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5230 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5231 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5232 PetscCall(MatDenseGetArray(TM1, &data)); 5233 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 5234 for (j = 0; j < pcbddc->benign_n; j++) { 5235 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5236 for (i = 0; i < pcbddc->local_primal_size; i++) { 5237 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 5238 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 5239 } 5240 } 5241 PetscCall(MatDenseRestoreArray(TM1, &data)); 5242 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 5243 PetscCall(MatDestroy(&B0_B)); 5244 PetscCall(ISDestroy(&is_dummy)); 5245 PetscCall(MatDestroy(&B0_BPHI)); 5246 } 5247 PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN)); 5248 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 5249 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5250 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5251 5252 /* check constraints */ 5253 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 5254 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 5255 if (!pcbddc->benign_n) { /* TODO: add benign case */ 5256 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5257 } else { 5258 PetscScalar *data; 5259 Mat tmat; 5260 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 5261 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 5262 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 5263 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5264 PetscCall(MatDestroy(&tmat)); 5265 } 5266 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 5267 PetscCall(VecSet(mones, -1.0)); 5268 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5269 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5270 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5271 if (!pcbddc->symmetric_primal) { 5272 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 5273 PetscCall(VecSet(mones, -1.0)); 5274 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5275 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5276 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5277 } 5278 PetscCall(MatDestroy(&C_B)); 5279 PetscCall(MatDestroy(&CPHI)); 5280 PetscCall(ISDestroy(&is_dummy)); 5281 PetscCall(VecDestroy(&mones)); 5282 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5283 PetscCall(MatDestroy(&A_II)); 5284 PetscCall(MatDestroy(&A_BB)); 5285 PetscCall(MatDestroy(&A_IB)); 5286 PetscCall(MatDestroy(&A_BI)); 5287 PetscCall(MatDestroy(&TM1)); 5288 PetscCall(MatDestroy(&TM2)); 5289 PetscCall(MatDestroy(&TM3)); 5290 PetscCall(MatDestroy(&TM4)); 5291 PetscCall(MatDestroy(&coarse_phi_D)); 5292 PetscCall(MatDestroy(&coarse_phi_B)); 5293 if (!pcbddc->symmetric_primal) { 5294 PetscCall(MatDestroy(&coarse_psi_D)); 5295 PetscCall(MatDestroy(&coarse_psi_B)); 5296 } 5297 } 5298 5299 #if 0 5300 { 5301 PetscViewer viewer; 5302 char filename[256]; 5303 5304 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 5305 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 5306 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 5307 PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat")); 5308 PetscCall(MatView(*coarse_submat,viewer)); 5309 if (pcbddc->coarse_phi_B) { 5310 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 5311 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 5312 } 5313 if (pcbddc->coarse_phi_D) { 5314 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 5315 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 5316 } 5317 if (pcbddc->coarse_psi_B) { 5318 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 5319 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 5320 } 5321 if (pcbddc->coarse_psi_D) { 5322 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 5323 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 5324 } 5325 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 5326 PetscCall(MatView(pcbddc->local_mat,viewer)); 5327 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 5328 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 5329 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 5330 PetscCall(ISView(pcis->is_I_local,viewer)); 5331 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 5332 PetscCall(ISView(pcis->is_B_local,viewer)); 5333 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 5334 PetscCall(ISView(pcbddc->is_R_local,viewer)); 5335 PetscCall(PetscViewerDestroy(&viewer)); 5336 } 5337 #endif 5338 5339 /* device support */ 5340 { 5341 PetscBool iscuda, iship, iskokkos; 5342 MatType mtype = NULL; 5343 5344 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, "")); 5345 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, "")); 5346 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, "")); 5347 if (iskokkos) { 5348 if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE; 5349 else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE; 5350 } 5351 if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP); 5352 else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP; 5353 else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA; 5354 if (mtype) { 5355 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 5356 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 5357 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 5358 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 5359 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 5360 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 5361 } 5362 } 5363 PetscFunctionReturn(PETSC_SUCCESS); 5364 } 5365 5366 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 5367 { 5368 Mat *work_mat; 5369 IS isrow_s, iscol_s; 5370 PetscBool rsorted, csorted; 5371 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 5372 5373 PetscFunctionBegin; 5374 PetscCall(ISSorted(isrow, &rsorted)); 5375 PetscCall(ISSorted(iscol, &csorted)); 5376 PetscCall(ISGetLocalSize(isrow, &rsize)); 5377 PetscCall(ISGetLocalSize(iscol, &csize)); 5378 5379 if (!rsorted) { 5380 const PetscInt *idxs; 5381 PetscInt *idxs_sorted, i; 5382 5383 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 5384 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 5385 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 5386 PetscCall(ISGetIndices(isrow, &idxs)); 5387 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 5388 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 5389 PetscCall(ISRestoreIndices(isrow, &idxs)); 5390 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 5391 } else { 5392 PetscCall(PetscObjectReference((PetscObject)isrow)); 5393 isrow_s = isrow; 5394 } 5395 5396 if (!csorted) { 5397 if (isrow == iscol) { 5398 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 5399 iscol_s = isrow_s; 5400 } else { 5401 const PetscInt *idxs; 5402 PetscInt *idxs_sorted, i; 5403 5404 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 5405 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 5406 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 5407 PetscCall(ISGetIndices(iscol, &idxs)); 5408 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 5409 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5410 PetscCall(ISRestoreIndices(iscol, &idxs)); 5411 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 5412 } 5413 } else { 5414 PetscCall(PetscObjectReference((PetscObject)iscol)); 5415 iscol_s = iscol; 5416 } 5417 5418 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 5419 5420 if (!rsorted || !csorted) { 5421 Mat new_mat; 5422 IS is_perm_r, is_perm_c; 5423 5424 if (!rsorted) { 5425 PetscInt *idxs_r, i; 5426 PetscCall(PetscMalloc1(rsize, &idxs_r)); 5427 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 5428 PetscCall(PetscFree(idxs_perm_r)); 5429 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 5430 } else { 5431 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 5432 } 5433 PetscCall(ISSetPermutation(is_perm_r)); 5434 5435 if (!csorted) { 5436 if (isrow_s == iscol_s) { 5437 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5438 is_perm_c = is_perm_r; 5439 } else { 5440 PetscInt *idxs_c, i; 5441 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 5442 PetscCall(PetscMalloc1(csize, &idxs_c)); 5443 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 5444 PetscCall(PetscFree(idxs_perm_c)); 5445 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 5446 } 5447 } else { 5448 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 5449 } 5450 PetscCall(ISSetPermutation(is_perm_c)); 5451 5452 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 5453 PetscCall(MatDestroy(&work_mat[0])); 5454 work_mat[0] = new_mat; 5455 PetscCall(ISDestroy(&is_perm_r)); 5456 PetscCall(ISDestroy(&is_perm_c)); 5457 } 5458 5459 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5460 *B = work_mat[0]; 5461 PetscCall(MatDestroyMatrices(1, &work_mat)); 5462 PetscCall(ISDestroy(&isrow_s)); 5463 PetscCall(ISDestroy(&iscol_s)); 5464 PetscFunctionReturn(PETSC_SUCCESS); 5465 } 5466 5467 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5468 { 5469 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5470 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5471 Mat new_mat, lA; 5472 IS is_local, is_global; 5473 PetscInt local_size; 5474 PetscBool isseqaij, issym, isset; 5475 5476 PetscFunctionBegin; 5477 PetscCall(MatDestroy(&pcbddc->local_mat)); 5478 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 5479 if (pcbddc->mat_graph->multi_element) { 5480 Mat *mats, *bdiags; 5481 IS *gsubs; 5482 PetscInt nsubs = pcbddc->n_local_subs; 5483 5484 PetscCall(PetscCalloc1(nsubs * nsubs, &mats)); 5485 PetscCall(PetscMalloc1(nsubs, &gsubs)); 5486 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i])); 5487 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags)); 5488 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i])); 5489 PetscCall(PetscFree(gsubs)); 5490 5491 for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i]; 5492 PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat)); 5493 PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat)); 5494 PetscCall(MatDestroySubMatrices(nsubs, &bdiags)); 5495 PetscCall(PetscFree(mats)); 5496 } else { 5497 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5498 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5499 PetscCall(ISDestroy(&is_local)); 5500 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 5501 PetscCall(ISDestroy(&is_global)); 5502 } 5503 if (pcbddc->dbg_flag) { 5504 Vec x, x_change; 5505 PetscReal error; 5506 5507 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 5508 PetscCall(VecSetRandom(x, NULL)); 5509 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 5510 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5511 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5512 PetscCall(MatMult(new_mat, matis->x, matis->y)); 5513 if (!pcbddc->change_interior) { 5514 const PetscScalar *x, *y, *v; 5515 PetscReal lerror = 0.; 5516 PetscInt i; 5517 5518 PetscCall(VecGetArrayRead(matis->x, &x)); 5519 PetscCall(VecGetArrayRead(matis->y, &y)); 5520 PetscCall(VecGetArrayRead(matis->counter, &v)); 5521 for (i = 0; i < local_size; i++) 5522 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 5523 PetscCall(VecRestoreArrayRead(matis->x, &x)); 5524 PetscCall(VecRestoreArrayRead(matis->y, &y)); 5525 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 5526 PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 5527 if (error > PETSC_SMALL) { 5528 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5529 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 5530 } else { 5531 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 5532 } 5533 } 5534 } 5535 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5536 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5537 PetscCall(VecAXPY(x, -1.0, x_change)); 5538 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 5539 if (error > PETSC_SMALL) { 5540 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5541 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 5542 } else { 5543 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 5544 } 5545 } 5546 PetscCall(VecDestroy(&x)); 5547 PetscCall(VecDestroy(&x_change)); 5548 } 5549 5550 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5551 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 5552 5553 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5554 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 5555 if (isseqaij) { 5556 PetscCall(MatDestroy(&pcbddc->local_mat)); 5557 PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5558 if (lA) { 5559 Mat work; 5560 PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5561 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5562 PetscCall(MatDestroy(&work)); 5563 } 5564 } else { 5565 Mat work_mat; 5566 5567 PetscCall(MatDestroy(&pcbddc->local_mat)); 5568 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5569 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5570 PetscCall(MatDestroy(&work_mat)); 5571 if (lA) { 5572 Mat work; 5573 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5574 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5575 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5576 PetscCall(MatDestroy(&work)); 5577 } 5578 } 5579 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 5580 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 5581 PetscCall(MatDestroy(&new_mat)); 5582 PetscFunctionReturn(PETSC_SUCCESS); 5583 } 5584 5585 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5586 { 5587 PC_IS *pcis = (PC_IS *)pc->data; 5588 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5589 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5590 PetscInt *idx_R_local = NULL; 5591 PetscInt n_vertices, i, j, n_R, n_D, n_B; 5592 PetscInt vbs, bs; 5593 PetscBT bitmask = NULL; 5594 5595 PetscFunctionBegin; 5596 /* 5597 No need to setup local scatters if 5598 - primal space is unchanged 5599 AND 5600 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5601 AND 5602 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5603 */ 5604 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 5605 /* destroy old objects */ 5606 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5607 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5608 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5609 /* Set Non-overlapping dimensions */ 5610 n_B = pcis->n_B; 5611 n_D = pcis->n - n_B; 5612 n_vertices = pcbddc->n_vertices; 5613 5614 /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5615 5616 /* create auxiliary bitmask and allocate workspace */ 5617 if (!sub_schurs || !sub_schurs->reuse_solver) { 5618 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5619 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5620 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5621 5622 for (i = 0, n_R = 0; i < pcis->n; i++) { 5623 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5624 } 5625 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5626 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5627 5628 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5629 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5630 } 5631 5632 /* Block code */ 5633 vbs = 1; 5634 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5635 if (bs > 1 && !(n_vertices % bs)) { 5636 PetscBool is_blocked = PETSC_TRUE; 5637 PetscInt *vary; 5638 if (!sub_schurs || !sub_schurs->reuse_solver) { 5639 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5640 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5641 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5642 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5643 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5644 for (i = 0; i < pcis->n / bs; i++) { 5645 if (vary[i] != 0 && vary[i] != bs) { 5646 is_blocked = PETSC_FALSE; 5647 break; 5648 } 5649 } 5650 PetscCall(PetscFree(vary)); 5651 } else { 5652 /* Verify directly the R set */ 5653 for (i = 0; i < n_R / bs; i++) { 5654 PetscInt j, node = idx_R_local[bs * i]; 5655 for (j = 1; j < bs; j++) { 5656 if (node != idx_R_local[bs * i + j] - j) { 5657 is_blocked = PETSC_FALSE; 5658 break; 5659 } 5660 } 5661 } 5662 } 5663 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5664 vbs = bs; 5665 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5666 } 5667 } 5668 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5669 if (sub_schurs && sub_schurs->reuse_solver) { 5670 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5671 5672 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5673 PetscCall(ISDestroy(&reuse_solver->is_R)); 5674 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5675 reuse_solver->is_R = pcbddc->is_R_local; 5676 } else { 5677 PetscCall(PetscFree(idx_R_local)); 5678 } 5679 5680 /* print some info if requested */ 5681 if (pcbddc->dbg_flag) { 5682 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5683 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5684 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5685 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5686 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5687 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5688 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5689 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5690 } 5691 5692 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5693 if (!sub_schurs || !sub_schurs->reuse_solver) { 5694 IS is_aux1, is_aux2; 5695 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5696 5697 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5698 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5699 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5700 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5701 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5702 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5703 for (i = 0, j = 0; i < n_R; i++) { 5704 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5705 } 5706 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5707 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5708 for (i = 0, j = 0; i < n_B; i++) { 5709 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5710 } 5711 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5712 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5713 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5714 PetscCall(ISDestroy(&is_aux1)); 5715 PetscCall(ISDestroy(&is_aux2)); 5716 5717 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5718 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5719 for (i = 0, j = 0; i < n_R; i++) { 5720 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5721 } 5722 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5723 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5724 PetscCall(ISDestroy(&is_aux1)); 5725 } 5726 PetscCall(PetscBTDestroy(&bitmask)); 5727 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5728 } else { 5729 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5730 IS tis; 5731 PetscInt schur_size; 5732 5733 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5734 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5735 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5736 PetscCall(ISDestroy(&tis)); 5737 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5738 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5739 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5740 PetscCall(ISDestroy(&tis)); 5741 } 5742 } 5743 PetscFunctionReturn(PETSC_SUCCESS); 5744 } 5745 5746 PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5747 { 5748 MatNullSpace NullSpace; 5749 Mat dmat; 5750 const Vec *nullvecs; 5751 Vec v, v2, *nullvecs2; 5752 VecScatter sct = NULL; 5753 PetscScalar *ddata; 5754 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5755 PetscBool nnsp_has_cnst; 5756 5757 PetscFunctionBegin; 5758 if (!is && !B) { /* MATIS */ 5759 Mat_IS *matis = (Mat_IS *)A->data; 5760 5761 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5762 sct = matis->cctx; 5763 PetscCall(PetscObjectReference((PetscObject)sct)); 5764 } else { 5765 PetscCall(MatGetNullSpace(B, &NullSpace)); 5766 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5767 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5768 } 5769 PetscCall(MatGetNullSpace(A, &NullSpace)); 5770 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5771 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5772 5773 PetscCall(MatCreateVecs(A, &v, NULL)); 5774 PetscCall(MatCreateVecs(B, &v2, NULL)); 5775 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5776 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs)); 5777 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5778 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5779 PetscCall(VecGetBlockSize(v2, &bs)); 5780 PetscCall(VecGetSize(v2, &N)); 5781 PetscCall(VecGetLocalSize(v2, &n)); 5782 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5783 for (k = 0; k < nnsp_size; k++) { 5784 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5785 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5786 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5787 } 5788 if (nnsp_has_cnst) { 5789 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5790 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5791 } 5792 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5793 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5794 5795 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5796 PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault)); 5797 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5798 PetscCall(MatDestroy(&dmat)); 5799 5800 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5801 PetscCall(PetscFree(nullvecs2)); 5802 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5803 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5804 PetscCall(VecDestroy(&v)); 5805 PetscCall(VecDestroy(&v2)); 5806 PetscCall(VecScatterDestroy(&sct)); 5807 PetscFunctionReturn(PETSC_SUCCESS); 5808 } 5809 5810 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5811 { 5812 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5813 PC_IS *pcis = (PC_IS *)pc->data; 5814 PC pc_temp; 5815 Mat A_RR; 5816 MatNullSpace nnsp; 5817 MatReuse reuse; 5818 PetscScalar m_one = -1.0; 5819 PetscReal value; 5820 PetscInt n_D, n_R; 5821 PetscBool issbaij, opts, isset, issym; 5822 PetscBool f = PETSC_FALSE; 5823 char dir_prefix[256], neu_prefix[256], str_level[16]; 5824 size_t len; 5825 5826 PetscFunctionBegin; 5827 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5828 /* approximate solver, propagate NearNullSpace if needed */ 5829 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5830 MatNullSpace gnnsp1, gnnsp2; 5831 PetscBool lhas, ghas; 5832 5833 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5834 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5835 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5836 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5837 PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5838 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5839 } 5840 5841 /* compute prefixes */ 5842 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5843 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5844 if (!pcbddc->current_level) { 5845 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5846 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5847 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5848 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5849 } else { 5850 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level)); 5851 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5852 len -= 15; /* remove "pc_bddc_coarse_" */ 5853 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5854 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5855 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5856 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5857 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5858 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5859 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5860 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5861 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5862 } 5863 5864 /* DIRICHLET PROBLEM */ 5865 if (dirichlet) { 5866 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5867 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5868 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5869 if (pcbddc->dbg_flag) { 5870 Mat A_IIn; 5871 5872 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5873 PetscCall(MatDestroy(&pcis->A_II)); 5874 pcis->A_II = A_IIn; 5875 } 5876 } 5877 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5878 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 5879 5880 /* Matrix for Dirichlet problem is pcis->A_II */ 5881 n_D = pcis->n - pcis->n_B; 5882 opts = PETSC_FALSE; 5883 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5884 opts = PETSC_TRUE; 5885 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 5886 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel)); 5887 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 5888 /* default */ 5889 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 5890 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 5891 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 5892 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5893 if (issbaij) { 5894 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5895 } else { 5896 PetscCall(PCSetType(pc_temp, PCLU)); 5897 } 5898 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 5899 } 5900 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 5901 PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view")); 5902 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 5903 /* Allow user's customization */ 5904 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5905 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5906 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5907 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 5908 } 5909 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5910 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5911 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5912 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5913 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5914 const PetscInt *idxs; 5915 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5916 5917 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 5918 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 5919 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5920 for (i = 0; i < nl; i++) { 5921 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5922 } 5923 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 5924 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5925 PetscCall(PetscFree(scoords)); 5926 } 5927 if (sub_schurs && sub_schurs->reuse_solver) { 5928 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5929 5930 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 5931 } 5932 5933 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5934 if (!n_D) { 5935 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5936 PetscCall(PCSetType(pc_temp, PCNONE)); 5937 } 5938 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5939 /* set ksp_D into pcis data */ 5940 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5941 PetscCall(KSPDestroy(&pcis->ksp_D)); 5942 pcis->ksp_D = pcbddc->ksp_D; 5943 } 5944 5945 /* NEUMANN PROBLEM */ 5946 A_RR = NULL; 5947 if (neumann) { 5948 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5949 PetscInt ibs, mbs; 5950 PetscBool issbaij, reuse_neumann_solver, isset, issym; 5951 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5952 5953 reuse_neumann_solver = PETSC_FALSE; 5954 if (sub_schurs && sub_schurs->reuse_solver) { 5955 IS iP; 5956 5957 reuse_neumann_solver = PETSC_TRUE; 5958 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 5959 if (iP) reuse_neumann_solver = PETSC_FALSE; 5960 } 5961 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5962 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 5963 if (pcbddc->ksp_R) { /* already created ksp */ 5964 PetscInt nn_R; 5965 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 5966 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5967 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 5968 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5969 PetscCall(KSPReset(pcbddc->ksp_R)); 5970 PetscCall(MatDestroy(&A_RR)); 5971 reuse = MAT_INITIAL_MATRIX; 5972 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5973 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5974 PetscCall(MatDestroy(&A_RR)); 5975 reuse = MAT_INITIAL_MATRIX; 5976 } else { /* safe to reuse the matrix */ 5977 reuse = MAT_REUSE_MATRIX; 5978 } 5979 } 5980 /* last check */ 5981 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5982 PetscCall(MatDestroy(&A_RR)); 5983 reuse = MAT_INITIAL_MATRIX; 5984 } 5985 } else { /* first time, so we need to create the matrix */ 5986 reuse = MAT_INITIAL_MATRIX; 5987 } 5988 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5989 TODO: Get Rid of these conversions */ 5990 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 5991 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 5992 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 5993 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5994 if (matis->A == pcbddc->local_mat) { 5995 PetscCall(MatDestroy(&pcbddc->local_mat)); 5996 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5997 } else { 5998 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5999 } 6000 } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */ 6001 if (matis->A == pcbddc->local_mat) { 6002 PetscCall(MatDestroy(&pcbddc->local_mat)); 6003 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 6004 } else { 6005 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 6006 } 6007 } 6008 /* extract A_RR */ 6009 if (reuse_neumann_solver) { 6010 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6011 6012 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 6013 PetscCall(MatDestroy(&A_RR)); 6014 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 6015 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 6016 } else { 6017 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 6018 } 6019 } else { 6020 PetscCall(MatDestroy(&A_RR)); 6021 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 6022 PetscCall(PetscObjectReference((PetscObject)A_RR)); 6023 } 6024 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 6025 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 6026 } 6027 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 6028 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 6029 opts = PETSC_FALSE; 6030 if (!pcbddc->ksp_R) { /* create object if not present */ 6031 opts = PETSC_TRUE; 6032 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 6033 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel)); 6034 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 6035 /* default */ 6036 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 6037 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 6038 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6039 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 6040 if (issbaij) { 6041 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 6042 } else { 6043 PetscCall(PCSetType(pc_temp, PCLU)); 6044 } 6045 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 6046 } 6047 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 6048 PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view")); 6049 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 6050 if (opts) { /* Allow user's customization once */ 6051 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 6052 } 6053 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6054 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 6055 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 6056 } 6057 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 6058 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6059 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 6060 if (f && pcbddc->mat_graph->cloc && !nnsp) { 6061 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 6062 const PetscInt *idxs; 6063 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 6064 6065 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 6066 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 6067 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 6068 for (i = 0; i < nl; i++) { 6069 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 6070 } 6071 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 6072 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 6073 PetscCall(PetscFree(scoords)); 6074 } 6075 6076 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 6077 if (!n_R) { 6078 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6079 PetscCall(PCSetType(pc_temp, PCNONE)); 6080 } 6081 /* Reuse solver if it is present */ 6082 if (reuse_neumann_solver) { 6083 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6084 6085 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 6086 } 6087 PetscCall(KSPSetUp(pcbddc->ksp_R)); 6088 } 6089 6090 if (pcbddc->dbg_flag) { 6091 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6092 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6093 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 6094 } 6095 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 6096 6097 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 6098 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 6099 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 6100 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 6101 /* check Dirichlet and Neumann solvers */ 6102 if (pcbddc->dbg_flag) { 6103 if (dirichlet) { /* Dirichlet */ 6104 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 6105 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 6106 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 6107 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 6108 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 6109 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 6110 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value)); 6111 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6112 } 6113 if (neumann) { /* Neumann */ 6114 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 6115 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 6116 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 6117 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 6118 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 6119 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 6120 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value)); 6121 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6122 } 6123 } 6124 /* free Neumann problem's matrix */ 6125 PetscCall(MatDestroy(&A_RR)); 6126 PetscFunctionReturn(PETSC_SUCCESS); 6127 } 6128 6129 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 6130 { 6131 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6132 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6133 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 6134 6135 PetscFunctionBegin; 6136 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 6137 if (!pcbddc->switch_static) { 6138 if (applytranspose && pcbddc->local_auxmat1) { 6139 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 6140 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6141 } 6142 if (!reuse_solver) { 6143 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6144 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6145 } else { 6146 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6147 6148 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6149 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6150 } 6151 } else { 6152 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6153 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6154 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6155 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6156 if (applytranspose && pcbddc->local_auxmat1) { 6157 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 6158 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6159 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6160 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6161 } 6162 } 6163 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6164 if (!reuse_solver || pcbddc->switch_static) { 6165 if (applytranspose) { 6166 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6167 } else { 6168 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6169 } 6170 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 6171 } else { 6172 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6173 6174 if (applytranspose) { 6175 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6176 } else { 6177 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6178 } 6179 } 6180 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6181 PetscCall(VecSet(inout_B, 0.)); 6182 if (!pcbddc->switch_static) { 6183 if (!reuse_solver) { 6184 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6185 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6186 } else { 6187 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6188 6189 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6190 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6191 } 6192 if (!applytranspose && pcbddc->local_auxmat1) { 6193 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6194 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 6195 } 6196 } else { 6197 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6198 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6199 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6200 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6201 if (!applytranspose && pcbddc->local_auxmat1) { 6202 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6203 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 6204 } 6205 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6206 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6207 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6208 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6209 } 6210 PetscFunctionReturn(PETSC_SUCCESS); 6211 } 6212 6213 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 6214 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 6215 { 6216 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6217 PC_IS *pcis = (PC_IS *)pc->data; 6218 const PetscScalar zero = 0.0; 6219 6220 PetscFunctionBegin; 6221 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 6222 if (!pcbddc->benign_apply_coarse_only) { 6223 if (applytranspose) { 6224 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 6225 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6226 } else { 6227 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 6228 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6229 } 6230 } else { 6231 PetscCall(VecSet(pcbddc->vec1_P, zero)); 6232 } 6233 6234 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 6235 if (pcbddc->benign_n) { 6236 PetscScalar *array; 6237 PetscInt j; 6238 6239 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6240 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 6241 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6242 } 6243 6244 /* start communications from local primal nodes to rhs of coarse solver */ 6245 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 6246 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 6247 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 6248 6249 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 6250 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6251 if (pcbddc->coarse_ksp) { 6252 Mat coarse_mat; 6253 Vec rhs, sol; 6254 MatNullSpace nullsp; 6255 PetscBool isbddc = PETSC_FALSE; 6256 6257 if (pcbddc->benign_have_null) { 6258 PC coarse_pc; 6259 6260 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6261 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 6262 /* we need to propagate to coarser levels the need for a possible benign correction */ 6263 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 6264 PC_BDDC *coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6265 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 6266 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 6267 } 6268 } 6269 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 6270 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 6271 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 6272 if (applytranspose) { 6273 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 6274 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 6275 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6276 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 6277 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6278 } else { 6279 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 6280 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 6281 PC coarse_pc; 6282 6283 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 6284 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6285 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 6286 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 6287 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 6288 } else { 6289 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 6290 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6291 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6292 } 6293 } 6294 /* we don't need the benign correction at coarser levels anymore */ 6295 if (pcbddc->benign_have_null && isbddc) { 6296 PC coarse_pc; 6297 PC_BDDC *coarsepcbddc; 6298 6299 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6300 coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6301 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 6302 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 6303 } 6304 } 6305 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6306 6307 /* Local solution on R nodes */ 6308 if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 6309 /* communications from coarse sol to local primal nodes */ 6310 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 6311 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 6312 6313 /* Sum contributions from the two levels */ 6314 if (!pcbddc->benign_apply_coarse_only) { 6315 if (applytranspose) { 6316 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6317 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6318 } else { 6319 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6320 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6321 } 6322 /* store p0 */ 6323 if (pcbddc->benign_n) { 6324 PetscScalar *array; 6325 PetscInt j; 6326 6327 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6328 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 6329 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6330 } 6331 } else { /* expand the coarse solution */ 6332 if (applytranspose) { 6333 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 6334 } else { 6335 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 6336 } 6337 } 6338 PetscFunctionReturn(PETSC_SUCCESS); 6339 } 6340 6341 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 6342 { 6343 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6344 Vec from, to; 6345 const PetscScalar *array; 6346 6347 PetscFunctionBegin; 6348 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6349 from = pcbddc->coarse_vec; 6350 to = pcbddc->vec1_P; 6351 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6352 Vec tvec; 6353 6354 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6355 PetscCall(VecResetArray(tvec)); 6356 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 6357 PetscCall(VecGetArrayRead(tvec, &array)); 6358 PetscCall(VecPlaceArray(from, array)); 6359 PetscCall(VecRestoreArrayRead(tvec, &array)); 6360 } 6361 } else { /* from local to global -> put data in coarse right-hand side */ 6362 from = pcbddc->vec1_P; 6363 to = pcbddc->coarse_vec; 6364 } 6365 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6366 PetscFunctionReturn(PETSC_SUCCESS); 6367 } 6368 6369 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6370 { 6371 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6372 Vec from, to; 6373 const PetscScalar *array; 6374 6375 PetscFunctionBegin; 6376 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6377 from = pcbddc->coarse_vec; 6378 to = pcbddc->vec1_P; 6379 } else { /* from local to global -> put data in coarse right-hand side */ 6380 from = pcbddc->vec1_P; 6381 to = pcbddc->coarse_vec; 6382 } 6383 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6384 if (smode == SCATTER_FORWARD) { 6385 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6386 Vec tvec; 6387 6388 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6389 PetscCall(VecGetArrayRead(to, &array)); 6390 PetscCall(VecPlaceArray(tvec, array)); 6391 PetscCall(VecRestoreArrayRead(to, &array)); 6392 } 6393 } else { 6394 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6395 PetscCall(VecResetArray(from)); 6396 } 6397 } 6398 PetscFunctionReturn(PETSC_SUCCESS); 6399 } 6400 6401 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6402 { 6403 PC_IS *pcis = (PC_IS *)pc->data; 6404 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6405 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6406 /* one and zero */ 6407 PetscScalar one = 1.0, zero = 0.0; 6408 /* space to store constraints and their local indices */ 6409 PetscScalar *constraints_data; 6410 PetscInt *constraints_idxs, *constraints_idxs_B; 6411 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 6412 PetscInt *constraints_n; 6413 /* iterators */ 6414 PetscInt i, j, k, total_counts, total_counts_cc, cum; 6415 /* BLAS integers */ 6416 PetscBLASInt lwork, lierr; 6417 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 6418 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 6419 /* reuse */ 6420 PetscInt olocal_primal_size, olocal_primal_size_cc; 6421 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 6422 /* change of basis */ 6423 PetscBool qr_needed; 6424 PetscBT change_basis, qr_needed_idx; 6425 /* auxiliary stuff */ 6426 PetscInt *nnz, *is_indices; 6427 PetscInt ncc; 6428 /* some quantities */ 6429 PetscInt n_vertices, total_primal_vertices, valid_constraints; 6430 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 6431 PetscReal tol; /* tolerance for retaining eigenmodes */ 6432 6433 PetscFunctionBegin; 6434 tol = PetscSqrtReal(PETSC_SMALL); 6435 /* Destroy Mat objects computed previously */ 6436 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6437 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6438 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6439 /* save info on constraints from previous setup (if any) */ 6440 olocal_primal_size = pcbddc->local_primal_size; 6441 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6442 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 6443 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 6444 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 6445 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 6446 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6447 6448 if (!pcbddc->adaptive_selection) { 6449 IS ISForVertices, *ISForFaces, *ISForEdges; 6450 MatNullSpace nearnullsp; 6451 const Vec *nearnullvecs; 6452 Vec *localnearnullsp; 6453 PetscScalar *array; 6454 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 6455 PetscBool nnsp_has_cnst; 6456 /* LAPACK working arrays for SVD or POD */ 6457 PetscBool skip_lapack, boolforchange; 6458 PetscScalar *work; 6459 PetscReal *singular_vals; 6460 #if defined(PETSC_USE_COMPLEX) 6461 PetscReal *rwork; 6462 #endif 6463 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 6464 PetscBLASInt dummy_int = 1; 6465 PetscScalar dummy_scalar = 1.; 6466 PetscBool use_pod = PETSC_FALSE; 6467 6468 /* MKL SVD with same input gives different results on different processes! */ 6469 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6470 use_pod = PETSC_TRUE; 6471 #endif 6472 /* Get index sets for faces, edges and vertices from graph */ 6473 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 6474 o_nf = n_ISForFaces; 6475 o_ne = n_ISForEdges; 6476 n_vertices = 0; 6477 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 6478 /* print some info */ 6479 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6480 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6481 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 6482 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6483 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6484 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 6485 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 6486 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 6487 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6488 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6489 } 6490 6491 if (!pcbddc->use_vertices) n_vertices = 0; 6492 if (!pcbddc->use_edges) n_ISForEdges = 0; 6493 if (!pcbddc->use_faces) n_ISForFaces = 0; 6494 6495 /* check if near null space is attached to global mat */ 6496 if (pcbddc->use_nnsp) { 6497 PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 6498 } else nearnullsp = NULL; 6499 6500 if (nearnullsp) { 6501 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 6502 /* remove any stored info */ 6503 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6504 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6505 /* store information for BDDC solver reuse */ 6506 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6507 pcbddc->onearnullspace = nearnullsp; 6508 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 6509 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 6510 } else { /* if near null space is not provided BDDC uses constants by default */ 6511 nnsp_size = 0; 6512 nnsp_has_cnst = PETSC_TRUE; 6513 } 6514 /* get max number of constraints on a single cc */ 6515 max_constraints = nnsp_size; 6516 if (nnsp_has_cnst) max_constraints++; 6517 6518 /* 6519 Evaluate maximum storage size needed by the procedure 6520 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6521 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6522 There can be multiple constraints per connected component 6523 */ 6524 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 6525 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 6526 6527 total_counts = n_ISForFaces + n_ISForEdges; 6528 total_counts *= max_constraints; 6529 total_counts += n_vertices; 6530 PetscCall(PetscBTCreate(total_counts, &change_basis)); 6531 6532 total_counts = 0; 6533 max_size_of_constraint = 0; 6534 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 6535 IS used_is; 6536 if (i < n_ISForEdges) { 6537 used_is = ISForEdges[i]; 6538 } else { 6539 used_is = ISForFaces[i - n_ISForEdges]; 6540 } 6541 PetscCall(ISGetSize(used_is, &j)); 6542 total_counts += j; 6543 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 6544 } 6545 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 6546 6547 /* get local part of global near null space vectors */ 6548 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 6549 for (k = 0; k < nnsp_size; k++) { 6550 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 6551 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6552 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6553 } 6554 6555 /* whether or not to skip lapack calls */ 6556 skip_lapack = PETSC_TRUE; 6557 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6558 6559 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6560 if (!skip_lapack) { 6561 PetscScalar temp_work; 6562 6563 if (use_pod) { 6564 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6565 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 6566 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 6567 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 6568 #if defined(PETSC_USE_COMPLEX) 6569 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 6570 #endif 6571 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6572 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6573 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 6574 lwork = -1; 6575 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6576 #if !defined(PETSC_USE_COMPLEX) 6577 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 6578 #else 6579 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 6580 #endif 6581 PetscCall(PetscFPTrapPop()); 6582 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr); 6583 } else { 6584 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6585 /* SVD */ 6586 PetscInt max_n, min_n; 6587 max_n = max_size_of_constraint; 6588 min_n = max_constraints; 6589 if (max_size_of_constraint < max_constraints) { 6590 min_n = max_size_of_constraint; 6591 max_n = max_constraints; 6592 } 6593 PetscCall(PetscMalloc1(min_n, &singular_vals)); 6594 #if defined(PETSC_USE_COMPLEX) 6595 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 6596 #endif 6597 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6598 lwork = -1; 6599 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 6600 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 6601 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 6602 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6603 #if !defined(PETSC_USE_COMPLEX) 6604 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6605 #else 6606 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6607 #endif 6608 PetscCall(PetscFPTrapPop()); 6609 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 6610 #else 6611 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6612 #endif /* on missing GESVD */ 6613 } 6614 /* Allocate optimal workspace */ 6615 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6616 PetscCall(PetscMalloc1(lwork, &work)); 6617 } 6618 /* Now we can loop on constraining sets */ 6619 total_counts = 0; 6620 constraints_idxs_ptr[0] = 0; 6621 constraints_data_ptr[0] = 0; 6622 /* vertices */ 6623 if (n_vertices) { 6624 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6625 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6626 for (i = 0; i < n_vertices; i++) { 6627 constraints_n[total_counts] = 1; 6628 constraints_data[total_counts] = 1.0; 6629 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6630 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6631 total_counts++; 6632 } 6633 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6634 } 6635 6636 /* edges and faces */ 6637 total_counts_cc = total_counts; 6638 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6639 IS used_is; 6640 PetscBool idxs_copied = PETSC_FALSE; 6641 6642 if (ncc < n_ISForEdges) { 6643 used_is = ISForEdges[ncc]; 6644 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6645 } else { 6646 used_is = ISForFaces[ncc - n_ISForEdges]; 6647 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6648 } 6649 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6650 6651 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6652 if (!size_of_constraint) continue; 6653 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6654 if (nnsp_has_cnst) { 6655 PetscScalar quad_value; 6656 6657 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6658 idxs_copied = PETSC_TRUE; 6659 6660 if (!pcbddc->use_nnsp_true) { 6661 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6662 } else { 6663 quad_value = 1.0; 6664 } 6665 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6666 temp_constraints++; 6667 total_counts++; 6668 } 6669 for (k = 0; k < nnsp_size; k++) { 6670 PetscReal real_value; 6671 PetscScalar *ptr_to_data; 6672 6673 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6674 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6675 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6676 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6677 /* check if array is null on the connected component */ 6678 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6679 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6680 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6681 temp_constraints++; 6682 total_counts++; 6683 if (!idxs_copied) { 6684 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6685 idxs_copied = PETSC_TRUE; 6686 } 6687 } 6688 } 6689 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6690 valid_constraints = temp_constraints; 6691 if (!pcbddc->use_nnsp_true && temp_constraints) { 6692 if (temp_constraints == 1) { /* just normalize the constraint */ 6693 PetscScalar norm, *ptr_to_data; 6694 6695 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6696 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6697 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6698 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6699 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6700 } else { /* perform SVD */ 6701 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6702 6703 if (use_pod) { 6704 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6705 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6706 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6707 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6708 from that computed using LAPACKgesvd 6709 -> This is due to a different computation of eigenvectors in LAPACKheev 6710 -> The quality of the POD-computed basis will be the same */ 6711 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6712 /* Store upper triangular part of correlation matrix */ 6713 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6714 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6715 for (j = 0; j < temp_constraints; j++) { 6716 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6717 } 6718 /* compute eigenvalues and eigenvectors of correlation matrix */ 6719 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6720 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6721 #if !defined(PETSC_USE_COMPLEX) 6722 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6723 #else 6724 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6725 #endif 6726 PetscCall(PetscFPTrapPop()); 6727 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr); 6728 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6729 j = 0; 6730 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6731 total_counts = total_counts - j; 6732 valid_constraints = temp_constraints - j; 6733 /* scale and copy POD basis into used quadrature memory */ 6734 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6735 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6736 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6737 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6738 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6739 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6740 if (j < temp_constraints) { 6741 PetscInt ii; 6742 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6743 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6744 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6745 PetscCall(PetscFPTrapPop()); 6746 for (k = 0; k < temp_constraints - j; k++) { 6747 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6748 } 6749 } 6750 } else { 6751 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6752 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6753 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6754 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6755 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6756 #if !defined(PETSC_USE_COMPLEX) 6757 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6758 #else 6759 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6760 #endif 6761 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr); 6762 PetscCall(PetscFPTrapPop()); 6763 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6764 k = temp_constraints; 6765 if (k > size_of_constraint) k = size_of_constraint; 6766 j = 0; 6767 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6768 valid_constraints = k - j; 6769 total_counts = total_counts - temp_constraints + valid_constraints; 6770 #else 6771 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6772 #endif /* on missing GESVD */ 6773 } 6774 } 6775 } 6776 /* update pointers information */ 6777 if (valid_constraints) { 6778 constraints_n[total_counts_cc] = valid_constraints; 6779 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6780 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6781 /* set change_of_basis flag */ 6782 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6783 total_counts_cc++; 6784 } 6785 } 6786 /* free workspace */ 6787 if (!skip_lapack) { 6788 PetscCall(PetscFree(work)); 6789 #if defined(PETSC_USE_COMPLEX) 6790 PetscCall(PetscFree(rwork)); 6791 #endif 6792 PetscCall(PetscFree(singular_vals)); 6793 PetscCall(PetscFree(correlation_mat)); 6794 PetscCall(PetscFree(temp_basis)); 6795 } 6796 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6797 PetscCall(PetscFree(localnearnullsp)); 6798 /* free index sets of faces, edges and vertices */ 6799 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6800 } else { 6801 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6802 6803 total_counts = 0; 6804 n_vertices = 0; 6805 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6806 max_constraints = 0; 6807 total_counts_cc = 0; 6808 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6809 total_counts += pcbddc->adaptive_constraints_n[i]; 6810 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6811 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6812 } 6813 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6814 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6815 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6816 constraints_data = pcbddc->adaptive_constraints_data; 6817 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6818 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6819 total_counts_cc = 0; 6820 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6821 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6822 } 6823 6824 max_size_of_constraint = 0; 6825 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6826 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6827 /* Change of basis */ 6828 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6829 if (pcbddc->use_change_of_basis) { 6830 for (i = 0; i < sub_schurs->n_subs; i++) { 6831 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6832 } 6833 } 6834 } 6835 pcbddc->local_primal_size = total_counts; 6836 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6837 6838 /* map constraints_idxs in boundary numbering */ 6839 if (pcbddc->use_change_of_basis) { 6840 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6841 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6842 } 6843 6844 /* Create constraint matrix */ 6845 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6846 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6847 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6848 6849 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6850 /* determine if a QR strategy is needed for change of basis */ 6851 qr_needed = pcbddc->use_qr_single; 6852 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6853 total_primal_vertices = 0; 6854 pcbddc->local_primal_size_cc = 0; 6855 for (i = 0; i < total_counts_cc; i++) { 6856 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6857 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6858 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6859 pcbddc->local_primal_size_cc += 1; 6860 } else if (PetscBTLookup(change_basis, i)) { 6861 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6862 pcbddc->local_primal_size_cc += constraints_n[i]; 6863 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6864 PetscCall(PetscBTSet(qr_needed_idx, i)); 6865 qr_needed = PETSC_TRUE; 6866 } 6867 } else { 6868 pcbddc->local_primal_size_cc += 1; 6869 } 6870 } 6871 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6872 pcbddc->n_vertices = total_primal_vertices; 6873 /* permute indices in order to have a sorted set of vertices */ 6874 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 6875 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 6876 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 6877 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 6878 6879 /* nonzero structure of constraint matrix */ 6880 /* and get reference dof for local constraints */ 6881 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 6882 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 6883 6884 j = total_primal_vertices; 6885 total_counts = total_primal_vertices; 6886 cum = total_primal_vertices; 6887 for (i = n_vertices; i < total_counts_cc; i++) { 6888 if (!PetscBTLookup(change_basis, i)) { 6889 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6890 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6891 cum++; 6892 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6893 for (k = 0; k < constraints_n[i]; k++) { 6894 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6895 nnz[j + k] = size_of_constraint; 6896 } 6897 j += constraints_n[i]; 6898 } 6899 } 6900 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 6901 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6902 PetscCall(PetscFree(nnz)); 6903 6904 /* set values in constraint matrix */ 6905 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 6906 total_counts = total_primal_vertices; 6907 for (i = n_vertices; i < total_counts_cc; i++) { 6908 if (!PetscBTLookup(change_basis, i)) { 6909 PetscInt *cols; 6910 6911 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6912 cols = constraints_idxs + constraints_idxs_ptr[i]; 6913 for (k = 0; k < constraints_n[i]; k++) { 6914 PetscInt row = total_counts + k; 6915 PetscScalar *vals; 6916 6917 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 6918 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 6919 } 6920 total_counts += constraints_n[i]; 6921 } 6922 } 6923 /* assembling */ 6924 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6925 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6926 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 6927 6928 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6929 if (pcbddc->use_change_of_basis) { 6930 /* dual and primal dofs on a single cc */ 6931 PetscInt dual_dofs, primal_dofs; 6932 /* working stuff for GEQRF */ 6933 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 6934 PetscBLASInt lqr_work; 6935 /* working stuff for UNGQR */ 6936 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 6937 PetscBLASInt lgqr_work; 6938 /* working stuff for TRTRS */ 6939 PetscScalar *trs_rhs = NULL; 6940 PetscBLASInt Blas_NRHS; 6941 /* pointers for values insertion into change of basis matrix */ 6942 PetscInt *start_rows, *start_cols; 6943 PetscScalar *start_vals; 6944 /* working stuff for values insertion */ 6945 PetscBT is_primal; 6946 PetscInt *aux_primal_numbering_B; 6947 /* matrix sizes */ 6948 PetscInt global_size, local_size; 6949 /* temporary change of basis */ 6950 Mat localChangeOfBasisMatrix; 6951 /* extra space for debugging */ 6952 PetscScalar *dbg_work = NULL; 6953 6954 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 6955 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 6956 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 6957 /* nonzeros for local mat */ 6958 PetscCall(PetscMalloc1(pcis->n, &nnz)); 6959 if (!pcbddc->benign_change || pcbddc->fake_change) { 6960 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 6961 } else { 6962 const PetscInt *ii; 6963 PetscInt n; 6964 PetscBool flg_row; 6965 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6966 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 6967 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6968 } 6969 for (i = n_vertices; i < total_counts_cc; i++) { 6970 if (PetscBTLookup(change_basis, i)) { 6971 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6972 if (PetscBTLookup(qr_needed_idx, i)) { 6973 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 6974 } else { 6975 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6976 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 6977 } 6978 } 6979 } 6980 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 6981 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6982 PetscCall(PetscFree(nnz)); 6983 /* Set interior change in the matrix */ 6984 if (!pcbddc->benign_change || pcbddc->fake_change) { 6985 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 6986 } else { 6987 const PetscInt *ii, *jj; 6988 PetscScalar *aa; 6989 PetscInt n; 6990 PetscBool flg_row; 6991 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6992 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 6993 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 6994 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 6995 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6996 } 6997 6998 if (pcbddc->dbg_flag) { 6999 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 7000 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 7001 } 7002 7003 /* Now we loop on the constraints which need a change of basis */ 7004 /* 7005 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 7006 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 7007 7008 Basic blocks of change of basis matrix T computed: 7009 7010 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 7011 7012 | 1 0 ... 0 s_1/S | 7013 | 0 1 ... 0 s_2/S | 7014 | ... | 7015 | 0 ... 1 s_{n-1}/S | 7016 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 7017 7018 with S = \sum_{i=1}^n s_i^2 7019 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 7020 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 7021 7022 - QR decomposition of constraints otherwise 7023 */ 7024 if (qr_needed && max_size_of_constraint) { 7025 /* space to store Q */ 7026 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 7027 /* array to store scaling factors for reflectors */ 7028 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 7029 /* first we issue queries for optimal work */ 7030 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7031 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 7032 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7033 lqr_work = -1; 7034 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 7035 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr); 7036 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 7037 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 7038 lgqr_work = -1; 7039 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 7040 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 7041 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 7042 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 7043 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 7044 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 7045 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr); 7046 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 7047 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 7048 /* array to store rhs and solution of triangular solver */ 7049 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 7050 /* allocating workspace for check */ 7051 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 7052 } 7053 /* array to store whether a node is primal or not */ 7054 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 7055 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 7056 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 7057 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 7058 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 7059 PetscCall(PetscFree(aux_primal_numbering_B)); 7060 7061 /* loop on constraints and see whether or not they need a change of basis and compute it */ 7062 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 7063 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 7064 if (PetscBTLookup(change_basis, total_counts)) { 7065 /* get constraint info */ 7066 primal_dofs = constraints_n[total_counts]; 7067 dual_dofs = size_of_constraint - primal_dofs; 7068 7069 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 7070 7071 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 7072 7073 /* copy quadrature constraints for change of basis check */ 7074 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7075 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 7076 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7077 7078 /* compute QR decomposition of constraints */ 7079 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7080 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7081 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7082 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7083 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 7084 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr); 7085 PetscCall(PetscFPTrapPop()); 7086 7087 /* explicitly compute R^-T */ 7088 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 7089 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 7090 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7091 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 7092 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7093 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7094 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7095 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 7096 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr); 7097 PetscCall(PetscFPTrapPop()); 7098 7099 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 7100 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7101 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7102 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7103 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7104 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7105 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 7106 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr); 7107 PetscCall(PetscFPTrapPop()); 7108 7109 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 7110 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 7111 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 7112 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7113 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7114 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7115 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7116 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7117 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 7118 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7119 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 7120 PetscCall(PetscFPTrapPop()); 7121 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7122 7123 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 7124 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 7125 /* insert cols for primal dofs */ 7126 for (j = 0; j < primal_dofs; j++) { 7127 start_vals = &qr_basis[j * size_of_constraint]; 7128 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7129 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7130 } 7131 /* insert cols for dual dofs */ 7132 for (j = 0, k = 0; j < dual_dofs; k++) { 7133 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 7134 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 7135 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7136 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7137 j++; 7138 } 7139 } 7140 7141 /* check change of basis */ 7142 if (pcbddc->dbg_flag) { 7143 PetscInt ii, jj; 7144 PetscBool valid_qr = PETSC_TRUE; 7145 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 7146 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7147 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 7148 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7149 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 7150 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 7151 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7152 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 7153 PetscCall(PetscFPTrapPop()); 7154 for (jj = 0; jj < size_of_constraint; jj++) { 7155 for (ii = 0; ii < primal_dofs; ii++) { 7156 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 7157 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 7158 } 7159 } 7160 if (!valid_qr) { 7161 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 7162 for (jj = 0; jj < size_of_constraint; jj++) { 7163 for (ii = 0; ii < primal_dofs; ii++) { 7164 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 7165 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7166 } 7167 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 7168 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7169 } 7170 } 7171 } 7172 } else { 7173 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 7174 } 7175 } 7176 } else { /* simple transformation block */ 7177 PetscInt row, col; 7178 PetscScalar val, norm; 7179 7180 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7181 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 7182 for (j = 0; j < size_of_constraint; j++) { 7183 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 7184 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7185 if (!PetscBTLookup(is_primal, row_B)) { 7186 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 7187 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 7188 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 7189 } else { 7190 for (k = 0; k < size_of_constraint; k++) { 7191 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7192 if (row != col) { 7193 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 7194 } else { 7195 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 7196 } 7197 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 7198 } 7199 } 7200 } 7201 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 7202 } 7203 } else { 7204 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 7205 } 7206 } 7207 7208 /* free workspace */ 7209 if (qr_needed) { 7210 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 7211 PetscCall(PetscFree(trs_rhs)); 7212 PetscCall(PetscFree(qr_tau)); 7213 PetscCall(PetscFree(qr_work)); 7214 PetscCall(PetscFree(gqr_work)); 7215 PetscCall(PetscFree(qr_basis)); 7216 } 7217 PetscCall(PetscBTDestroy(&is_primal)); 7218 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7219 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7220 7221 /* assembling of global change of variable */ 7222 if (!pcbddc->fake_change) { 7223 Mat tmat; 7224 PetscInt bs; 7225 7226 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 7227 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 7228 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 7229 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 7230 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 7231 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 7232 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix)); 7233 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ)); 7234 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 7235 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs)); 7236 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size)); 7237 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE)); 7238 PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 7239 PetscCall(MatDestroy(&tmat)); 7240 PetscCall(VecSet(pcis->vec1_global, 0.0)); 7241 PetscCall(VecSet(pcis->vec1_N, 1.0)); 7242 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7243 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7244 PetscCall(VecReciprocal(pcis->vec1_global)); 7245 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 7246 7247 /* check */ 7248 if (pcbddc->dbg_flag) { 7249 PetscReal error; 7250 Vec x, x_change; 7251 7252 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 7253 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 7254 PetscCall(VecSetRandom(x, NULL)); 7255 PetscCall(VecCopy(x, pcis->vec1_global)); 7256 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7257 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7258 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 7259 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7260 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7261 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 7262 PetscCall(VecAXPY(x, -1.0, x_change)); 7263 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 7264 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 7265 PetscCall(VecDestroy(&x)); 7266 PetscCall(VecDestroy(&x_change)); 7267 } 7268 /* adapt sub_schurs computed (if any) */ 7269 if (pcbddc->use_deluxe_scaling) { 7270 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 7271 7272 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 7273 if (sub_schurs && sub_schurs->S_Ej_all) { 7274 Mat S_new, tmat; 7275 IS is_all_N, is_V_Sall = NULL; 7276 7277 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 7278 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 7279 if (pcbddc->deluxe_zerorows) { 7280 ISLocalToGlobalMapping NtoSall; 7281 IS is_V; 7282 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 7283 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 7284 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 7285 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 7286 PetscCall(ISDestroy(&is_V)); 7287 } 7288 PetscCall(ISDestroy(&is_all_N)); 7289 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7290 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 7291 PetscCall(PetscObjectReference((PetscObject)S_new)); 7292 if (pcbddc->deluxe_zerorows) { 7293 const PetscScalar *array; 7294 const PetscInt *idxs_V, *idxs_all; 7295 PetscInt i, n_V; 7296 7297 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7298 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 7299 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 7300 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 7301 PetscCall(VecGetArrayRead(pcis->D, &array)); 7302 for (i = 0; i < n_V; i++) { 7303 PetscScalar val; 7304 PetscInt idx; 7305 7306 idx = idxs_V[i]; 7307 val = array[idxs_all[idxs_V[i]]]; 7308 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 7309 } 7310 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 7311 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 7312 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 7313 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 7314 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 7315 } 7316 sub_schurs->S_Ej_all = S_new; 7317 PetscCall(MatDestroy(&S_new)); 7318 if (sub_schurs->sum_S_Ej_all) { 7319 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7320 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7321 PetscCall(PetscObjectReference((PetscObject)S_new)); 7322 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7323 sub_schurs->sum_S_Ej_all = S_new; 7324 PetscCall(MatDestroy(&S_new)); 7325 } 7326 PetscCall(ISDestroy(&is_V_Sall)); 7327 PetscCall(MatDestroy(&tmat)); 7328 } 7329 /* destroy any change of basis context in sub_schurs */ 7330 if (sub_schurs && sub_schurs->change) { 7331 PetscInt i; 7332 7333 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 7334 PetscCall(PetscFree(sub_schurs->change)); 7335 } 7336 } 7337 if (pcbddc->switch_static) { /* need to save the local change */ 7338 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7339 } else { 7340 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7341 } 7342 /* determine if any process has changed the pressures locally */ 7343 pcbddc->change_interior = pcbddc->benign_have_null; 7344 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7345 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7346 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7347 pcbddc->use_qr_single = qr_needed; 7348 } 7349 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7350 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7351 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7352 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7353 } else { 7354 Mat benign_global = NULL; 7355 if (pcbddc->benign_have_null) { 7356 Mat M; 7357 7358 pcbddc->change_interior = PETSC_TRUE; 7359 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 7360 PetscCall(VecReciprocal(pcis->vec1_N)); 7361 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 7362 if (pcbddc->benign_change) { 7363 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 7364 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 7365 } else { 7366 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 7367 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 7368 } 7369 PetscCall(MatISSetLocalMat(benign_global, M)); 7370 PetscCall(MatDestroy(&M)); 7371 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 7372 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 7373 } 7374 if (pcbddc->user_ChangeOfBasisMatrix) { 7375 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix)); 7376 PetscCall(MatDestroy(&benign_global)); 7377 } else if (pcbddc->benign_have_null) { 7378 pcbddc->ChangeOfBasisMatrix = benign_global; 7379 } 7380 } 7381 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7382 IS is_global; 7383 const PetscInt *gidxs; 7384 7385 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 7386 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 7387 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 7388 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 7389 PetscCall(ISDestroy(&is_global)); 7390 } 7391 } 7392 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 7393 7394 if (!pcbddc->fake_change) { 7395 /* add pressure dofs to set of primal nodes for numbering purposes */ 7396 for (i = 0; i < pcbddc->benign_n; i++) { 7397 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7398 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7399 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7400 pcbddc->local_primal_size_cc++; 7401 pcbddc->local_primal_size++; 7402 } 7403 7404 /* check if a new primal space has been introduced (also take into account benign trick) */ 7405 pcbddc->new_primal_space_local = PETSC_TRUE; 7406 if (olocal_primal_size == pcbddc->local_primal_size) { 7407 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7408 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7409 if (!pcbddc->new_primal_space_local) { 7410 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7411 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7412 } 7413 } 7414 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7415 PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7416 } 7417 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 7418 7419 /* flush dbg viewer */ 7420 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7421 7422 /* free workspace */ 7423 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7424 PetscCall(PetscBTDestroy(&change_basis)); 7425 if (!pcbddc->adaptive_selection) { 7426 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 7427 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 7428 } else { 7429 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 7430 PetscCall(PetscFree(constraints_n)); 7431 PetscCall(PetscFree(constraints_idxs_B)); 7432 } 7433 PetscFunctionReturn(PETSC_SUCCESS); 7434 } 7435 7436 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7437 { 7438 ISLocalToGlobalMapping map; 7439 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7440 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 7441 PetscInt i, N; 7442 PetscBool rcsr = PETSC_FALSE; 7443 7444 PetscFunctionBegin; 7445 if (pcbddc->recompute_topography) { 7446 pcbddc->graphanalyzed = PETSC_FALSE; 7447 /* Reset previously computed graph */ 7448 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7449 /* Init local Graph struct */ 7450 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 7451 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 7452 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 7453 7454 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 7455 /* Check validity of the csr graph passed in by the user */ 7456 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 7457 pcbddc->mat_graph->nvtxs); 7458 7459 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7460 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7461 PetscInt *xadj, *adjncy; 7462 PetscInt nvtxs; 7463 PetscBool flg_row; 7464 Mat A; 7465 7466 PetscCall(PetscObjectReference((PetscObject)matis->A)); 7467 A = matis->A; 7468 for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) { 7469 Mat AtA; 7470 7471 PetscCall(MatProductCreate(A, A, NULL, &AtA)); 7472 PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_")); 7473 PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB)); 7474 PetscCall(MatProductSetFromOptions(AtA)); 7475 PetscCall(MatProductSymbolic(AtA)); 7476 PetscCall(MatProductClear(AtA)); 7477 /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */ 7478 AtA->assembled = PETSC_TRUE; 7479 PetscCall(MatDestroy(&A)); 7480 A = AtA; 7481 } 7482 PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7483 if (flg_row) { 7484 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 7485 pcbddc->computed_rowadj = PETSC_TRUE; 7486 PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7487 rcsr = PETSC_TRUE; 7488 } 7489 PetscCall(MatDestroy(&A)); 7490 } 7491 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7492 7493 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7494 PetscReal *lcoords; 7495 PetscInt n; 7496 MPI_Datatype dimrealtype; 7497 PetscMPIInt cdimi; 7498 7499 /* TODO: support for blocked */ 7500 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 7501 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7502 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 7503 PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi)); 7504 PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype)); 7505 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7506 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7507 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7508 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7509 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7510 7511 pcbddc->mat_graph->coords = lcoords; 7512 pcbddc->mat_graph->cloc = PETSC_TRUE; 7513 pcbddc->mat_graph->cnloc = n; 7514 } 7515 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 7516 pcbddc->mat_graph->nvtxs); 7517 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7518 7519 /* attach info on disconnected subdomains if present */ 7520 if (pcbddc->n_local_subs) { 7521 PetscInt *local_subs, n, totn; 7522 7523 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7524 PetscCall(PetscMalloc1(n, &local_subs)); 7525 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 7526 for (i = 0; i < pcbddc->n_local_subs; i++) { 7527 const PetscInt *idxs; 7528 PetscInt nl, j; 7529 7530 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 7531 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 7532 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 7533 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 7534 } 7535 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 7536 pcbddc->mat_graph->n_local_subs = totn + 1; 7537 pcbddc->mat_graph->local_subs = local_subs; 7538 } 7539 7540 /* Setup of Graph */ 7541 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 7542 } 7543 7544 if (!pcbddc->graphanalyzed) { 7545 /* Graph's connected components analysis */ 7546 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7547 pcbddc->graphanalyzed = PETSC_TRUE; 7548 pcbddc->corner_selected = pcbddc->corner_selection; 7549 } 7550 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7551 PetscFunctionReturn(PETSC_SUCCESS); 7552 } 7553 7554 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7555 { 7556 PetscInt i, j, n; 7557 PetscScalar *alphas; 7558 PetscReal norm, *onorms; 7559 7560 PetscFunctionBegin; 7561 n = *nio; 7562 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 7563 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 7564 PetscCall(VecNormalize(vecs[0], &norm)); 7565 if (norm < PETSC_SMALL) { 7566 onorms[0] = 0.0; 7567 PetscCall(VecSet(vecs[0], 0.0)); 7568 } else { 7569 onorms[0] = norm; 7570 } 7571 7572 for (i = 1; i < n; i++) { 7573 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 7574 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 7575 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 7576 PetscCall(VecNormalize(vecs[i], &norm)); 7577 if (norm < PETSC_SMALL) { 7578 onorms[i] = 0.0; 7579 PetscCall(VecSet(vecs[i], 0.0)); 7580 } else { 7581 onorms[i] = norm; 7582 } 7583 } 7584 /* push nonzero vectors at the beginning */ 7585 for (i = 0; i < n; i++) { 7586 if (onorms[i] == 0.0) { 7587 for (j = i + 1; j < n; j++) { 7588 if (onorms[j] != 0.0) { 7589 PetscCall(VecCopy(vecs[j], vecs[i])); 7590 onorms[i] = onorms[j]; 7591 onorms[j] = 0.0; 7592 break; 7593 } 7594 } 7595 } 7596 } 7597 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7598 PetscCall(PetscFree2(alphas, onorms)); 7599 PetscFunctionReturn(PETSC_SUCCESS); 7600 } 7601 7602 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 7603 { 7604 ISLocalToGlobalMapping mapping; 7605 Mat A; 7606 PetscInt n_neighs, *neighs, *n_shared, **shared; 7607 PetscMPIInt size, rank, color; 7608 PetscInt *xadj, *adjncy; 7609 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 7610 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 7611 PetscInt void_procs, *procs_candidates = NULL; 7612 PetscInt xadj_count, *count; 7613 PetscBool ismatis, use_vwgt = PETSC_FALSE; 7614 PetscSubcomm psubcomm; 7615 MPI_Comm subcomm; 7616 7617 PetscFunctionBegin; 7618 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7619 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7620 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7621 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 7622 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 7623 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7624 7625 if (have_void) *have_void = PETSC_FALSE; 7626 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7627 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7628 PetscCall(MatISGetLocalMat(mat, &A)); 7629 PetscCall(MatGetLocalSize(A, &n, NULL)); 7630 im_active = !!n; 7631 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7632 void_procs = size - active_procs; 7633 /* get ranks of non-active processes in mat communicator */ 7634 if (void_procs) { 7635 PetscInt ncand; 7636 7637 if (have_void) *have_void = PETSC_TRUE; 7638 PetscCall(PetscMalloc1(size, &procs_candidates)); 7639 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7640 for (i = 0, ncand = 0; i < size; i++) { 7641 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7642 } 7643 /* force n_subdomains to be not greater that the number of non-active processes */ 7644 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7645 } 7646 7647 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7648 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7649 PetscCall(MatGetSize(mat, &N, NULL)); 7650 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7651 PetscInt issize, isidx, dest; 7652 if (*n_subdomains == 1) dest = 0; 7653 else dest = rank; 7654 if (im_active) { 7655 issize = 1; 7656 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7657 isidx = procs_candidates[dest]; 7658 } else { 7659 isidx = dest; 7660 } 7661 } else { 7662 issize = 0; 7663 isidx = -1; 7664 } 7665 if (*n_subdomains != 1) *n_subdomains = active_procs; 7666 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7667 PetscCall(PetscFree(procs_candidates)); 7668 PetscFunctionReturn(PETSC_SUCCESS); 7669 } 7670 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL)); 7671 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL)); 7672 threshold = PetscMax(threshold, 2); 7673 7674 /* Get info on mapping */ 7675 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7676 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7677 7678 /* build local CSR graph of subdomains' connectivity */ 7679 PetscCall(PetscMalloc1(2, &xadj)); 7680 xadj[0] = 0; 7681 xadj[1] = PetscMax(n_neighs - 1, 0); 7682 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7683 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7684 PetscCall(PetscCalloc1(n, &count)); 7685 for (i = 1; i < n_neighs; i++) 7686 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7687 7688 xadj_count = 0; 7689 for (i = 1; i < n_neighs; i++) { 7690 for (j = 0; j < n_shared[i]; j++) { 7691 if (count[shared[i][j]] < threshold) { 7692 adjncy[xadj_count] = neighs[i]; 7693 adjncy_wgt[xadj_count] = n_shared[i]; 7694 xadj_count++; 7695 break; 7696 } 7697 } 7698 } 7699 xadj[1] = xadj_count; 7700 PetscCall(PetscFree(count)); 7701 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7702 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7703 7704 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7705 7706 /* Restrict work on active processes only */ 7707 PetscCall(PetscMPIIntCast(im_active, &color)); 7708 if (void_procs) { 7709 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7710 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7711 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7712 subcomm = PetscSubcommChild(psubcomm); 7713 } else { 7714 psubcomm = NULL; 7715 subcomm = PetscObjectComm((PetscObject)mat); 7716 } 7717 7718 v_wgt = NULL; 7719 if (!color) { 7720 PetscCall(PetscFree(xadj)); 7721 PetscCall(PetscFree(adjncy)); 7722 PetscCall(PetscFree(adjncy_wgt)); 7723 } else { 7724 Mat subdomain_adj; 7725 IS new_ranks, new_ranks_contig; 7726 MatPartitioning partitioner; 7727 PetscInt rstart, rend; 7728 PetscMPIInt irstart = 0, irend = 0; 7729 PetscInt *is_indices, *oldranks; 7730 PetscMPIInt size; 7731 PetscBool aggregate; 7732 7733 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7734 if (void_procs) { 7735 PetscInt prank = rank; 7736 PetscCall(PetscMalloc1(size, &oldranks)); 7737 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7738 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7739 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7740 } else { 7741 oldranks = NULL; 7742 } 7743 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7744 if (aggregate) { /* TODO: all this part could be made more efficient */ 7745 PetscInt lrows, row, ncols, *cols; 7746 PetscMPIInt nrank; 7747 PetscScalar *vals; 7748 7749 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7750 lrows = 0; 7751 if (nrank < redprocs) { 7752 lrows = size / redprocs; 7753 if (nrank < size % redprocs) lrows++; 7754 } 7755 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7756 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7757 PetscCall(PetscMPIIntCast(rstart, &irstart)); 7758 PetscCall(PetscMPIIntCast(rend, &irend)); 7759 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7760 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7761 row = nrank; 7762 ncols = xadj[1] - xadj[0]; 7763 cols = adjncy; 7764 PetscCall(PetscMalloc1(ncols, &vals)); 7765 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7766 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7767 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7768 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7769 PetscCall(PetscFree(xadj)); 7770 PetscCall(PetscFree(adjncy)); 7771 PetscCall(PetscFree(adjncy_wgt)); 7772 PetscCall(PetscFree(vals)); 7773 if (use_vwgt) { 7774 Vec v; 7775 const PetscScalar *array; 7776 PetscInt nl; 7777 7778 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7779 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7780 PetscCall(VecAssemblyBegin(v)); 7781 PetscCall(VecAssemblyEnd(v)); 7782 PetscCall(VecGetLocalSize(v, &nl)); 7783 PetscCall(VecGetArrayRead(v, &array)); 7784 PetscCall(PetscMalloc1(nl, &v_wgt)); 7785 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7786 PetscCall(VecRestoreArrayRead(v, &array)); 7787 PetscCall(VecDestroy(&v)); 7788 } 7789 } else { 7790 PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7791 if (use_vwgt) { 7792 PetscCall(PetscMalloc1(1, &v_wgt)); 7793 v_wgt[0] = n; 7794 } 7795 } 7796 /* PetscCall(MatView(subdomain_adj,0)); */ 7797 7798 /* Partition */ 7799 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7800 #if defined(PETSC_HAVE_PTSCOTCH) 7801 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7802 #elif defined(PETSC_HAVE_PARMETIS) 7803 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7804 #else 7805 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7806 #endif 7807 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7808 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7809 *n_subdomains = PetscMin(size, *n_subdomains); 7810 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7811 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7812 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7813 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7814 7815 /* renumber new_ranks to avoid "holes" in new set of processors */ 7816 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7817 PetscCall(ISDestroy(&new_ranks)); 7818 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7819 if (!aggregate) { 7820 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7821 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7822 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7823 } else if (oldranks) { 7824 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7825 } else { 7826 ranks_send_to_idx[0] = is_indices[0]; 7827 } 7828 } else { 7829 PetscInt idx = 0; 7830 PetscMPIInt tag; 7831 MPI_Request *reqs; 7832 7833 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7834 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7835 for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7836 PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7837 PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE)); 7838 PetscCall(PetscFree(reqs)); 7839 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7840 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7841 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7842 } else if (oldranks) { 7843 ranks_send_to_idx[0] = oldranks[idx]; 7844 } else { 7845 ranks_send_to_idx[0] = idx; 7846 } 7847 } 7848 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7849 /* clean up */ 7850 PetscCall(PetscFree(oldranks)); 7851 PetscCall(ISDestroy(&new_ranks_contig)); 7852 PetscCall(MatDestroy(&subdomain_adj)); 7853 PetscCall(MatPartitioningDestroy(&partitioner)); 7854 } 7855 PetscCall(PetscSubcommDestroy(&psubcomm)); 7856 PetscCall(PetscFree(procs_candidates)); 7857 7858 /* assemble parallel IS for sends */ 7859 i = 1; 7860 if (!color) i = 0; 7861 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7862 PetscFunctionReturn(PETSC_SUCCESS); 7863 } 7864 7865 typedef enum { 7866 MATDENSE_PRIVATE = 0, 7867 MATAIJ_PRIVATE, 7868 MATBAIJ_PRIVATE, 7869 MATSBAIJ_PRIVATE 7870 } MatTypePrivate; 7871 7872 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7873 { 7874 Mat local_mat; 7875 IS is_sends_internal; 7876 PetscInt rows, cols, new_local_rows; 7877 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 7878 PetscBool ismatis, isdense, newisdense, destroy_mat; 7879 ISLocalToGlobalMapping l2gmap; 7880 PetscInt *l2gmap_indices; 7881 const PetscInt *is_indices; 7882 MatType new_local_type; 7883 /* buffers */ 7884 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 7885 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 7886 PetscInt *recv_buffer_idxs_local; 7887 PetscScalar *ptr_vals, *recv_buffer_vals; 7888 const PetscScalar *send_buffer_vals; 7889 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 7890 /* MPI */ 7891 MPI_Comm comm, comm_n; 7892 PetscSubcomm subcomm; 7893 PetscMPIInt n_sends, n_recvs, size; 7894 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 7895 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 7896 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 7897 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 7898 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 7899 7900 PetscFunctionBegin; 7901 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7902 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7903 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7904 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 7905 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 7906 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 7907 PetscValidLogicalCollectiveBool(mat, reuse, 6); 7908 PetscValidLogicalCollectiveInt(mat, nis, 8); 7909 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 7910 if (nvecs) { 7911 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 7912 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 7913 } 7914 /* further checks */ 7915 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7916 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7917 /* XXX hack for multi_element */ 7918 if (!isdense) PetscCall(MatConvert(local_mat, MATDENSE, MAT_INPLACE_MATRIX, &local_mat)); 7919 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7920 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7921 7922 PetscCall(MatGetSize(local_mat, &rows, &cols)); 7923 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 7924 if (reuse && *mat_n) { 7925 PetscInt mrows, mcols, mnrows, mncols; 7926 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 7927 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 7928 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 7929 PetscCall(MatGetSize(mat, &mrows, &mcols)); 7930 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 7931 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 7932 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 7933 } 7934 PetscCall(MatGetBlockSize(local_mat, &bs)); 7935 PetscValidLogicalCollectiveInt(mat, bs, 1); 7936 7937 /* prepare IS for sending if not provided */ 7938 if (!is_sends) { 7939 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 7940 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 7941 } else { 7942 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7943 is_sends_internal = is_sends; 7944 } 7945 7946 /* get comm */ 7947 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 7948 7949 /* compute number of sends */ 7950 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 7951 PetscCall(PetscMPIIntCast(i, &n_sends)); 7952 7953 /* compute number of receives */ 7954 PetscCallMPI(MPI_Comm_size(comm, &size)); 7955 PetscCall(PetscMalloc1(size, &iflags)); 7956 PetscCall(PetscArrayzero(iflags, size)); 7957 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 7958 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 7959 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 7960 PetscCall(PetscFree(iflags)); 7961 7962 /* restrict comm if requested */ 7963 subcomm = NULL; 7964 destroy_mat = PETSC_FALSE; 7965 if (restrict_comm) { 7966 PetscMPIInt color, subcommsize; 7967 7968 color = 0; 7969 if (restrict_full) { 7970 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7971 } else { 7972 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7973 } 7974 PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 7975 subcommsize = size - subcommsize; 7976 /* check if reuse has been requested */ 7977 if (reuse) { 7978 if (*mat_n) { 7979 PetscMPIInt subcommsize2; 7980 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 7981 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 7982 comm_n = PetscObjectComm((PetscObject)*mat_n); 7983 } else { 7984 comm_n = PETSC_COMM_SELF; 7985 } 7986 } else { /* MAT_INITIAL_MATRIX */ 7987 PetscMPIInt rank; 7988 7989 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7990 PetscCall(PetscSubcommCreate(comm, &subcomm)); 7991 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 7992 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 7993 comm_n = PetscSubcommChild(subcomm); 7994 } 7995 /* flag to destroy *mat_n if not significative */ 7996 if (color) destroy_mat = PETSC_TRUE; 7997 } else { 7998 comm_n = comm; 7999 } 8000 8001 /* prepare send/receive buffers */ 8002 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 8003 PetscCall(PetscArrayzero(ilengths_idxs, size)); 8004 PetscCall(PetscMalloc1(size, &ilengths_vals)); 8005 PetscCall(PetscArrayzero(ilengths_vals, size)); 8006 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 8007 8008 /* Get data from local matrices */ 8009 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 8010 /* TODO: See below some guidelines on how to prepare the local buffers */ 8011 /* 8012 send_buffer_vals should contain the raw values of the local matrix 8013 send_buffer_idxs should contain: 8014 - MatType_PRIVATE type 8015 - PetscInt size_of_l2gmap 8016 - PetscInt global_row_indices[size_of_l2gmap] 8017 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 8018 */ 8019 { 8020 ISLocalToGlobalMapping mapping; 8021 8022 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 8023 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 8024 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 8025 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 8026 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 8027 send_buffer_idxs[1] = i; 8028 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 8029 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 8030 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 8031 PetscCall(PetscMPIIntCast(i, &len)); 8032 for (i = 0; i < n_sends; i++) { 8033 ilengths_vals[is_indices[i]] = len * len; 8034 ilengths_idxs[is_indices[i]] = len + 2; 8035 } 8036 } 8037 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 8038 /* additional is (if any) */ 8039 if (nis) { 8040 PetscMPIInt psum; 8041 PetscInt j; 8042 for (j = 0, psum = 0; j < nis; j++) { 8043 PetscInt plen; 8044 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8045 PetscCall(PetscMPIIntCast(plen, &len)); 8046 psum += len + 1; /* indices + length */ 8047 } 8048 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 8049 for (j = 0, psum = 0; j < nis; j++) { 8050 PetscInt plen; 8051 const PetscInt *is_array_idxs; 8052 PetscCall(ISGetLocalSize(isarray[j], &plen)); 8053 send_buffer_idxs_is[psum] = plen; 8054 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 8055 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 8056 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 8057 psum += plen + 1; /* indices + length */ 8058 } 8059 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 8060 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 8061 } 8062 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8063 8064 buf_size_idxs = 0; 8065 buf_size_vals = 0; 8066 buf_size_idxs_is = 0; 8067 buf_size_vecs = 0; 8068 for (i = 0; i < n_recvs; i++) { 8069 buf_size_idxs += olengths_idxs[i]; 8070 buf_size_vals += olengths_vals[i]; 8071 if (nis) buf_size_idxs_is += olengths_idxs_is[i]; 8072 if (nvecs) buf_size_vecs += olengths_idxs[i]; 8073 } 8074 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 8075 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 8076 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 8077 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 8078 8079 /* get new tags for clean communications */ 8080 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 8081 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 8082 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 8083 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 8084 8085 /* allocate for requests */ 8086 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 8087 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 8088 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 8089 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 8090 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 8091 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 8092 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 8093 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 8094 8095 /* communications */ 8096 ptr_idxs = recv_buffer_idxs; 8097 ptr_vals = recv_buffer_vals; 8098 ptr_idxs_is = recv_buffer_idxs_is; 8099 ptr_vecs = recv_buffer_vecs; 8100 for (i = 0; i < n_recvs; i++) { 8101 PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i])); 8102 PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i])); 8103 ptr_idxs += olengths_idxs[i]; 8104 ptr_vals += olengths_vals[i]; 8105 if (nis) { 8106 PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i])); 8107 ptr_idxs_is += olengths_idxs_is[i]; 8108 } 8109 if (nvecs) { 8110 PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i])); 8111 ptr_vecs += olengths_idxs[i] - 2; 8112 } 8113 } 8114 for (i = 0; i < n_sends; i++) { 8115 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 8116 PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 8117 PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 8118 if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 8119 if (nvecs) { 8120 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8121 PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 8122 } 8123 } 8124 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 8125 PetscCall(ISDestroy(&is_sends_internal)); 8126 8127 /* assemble new l2g map */ 8128 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 8129 ptr_idxs = recv_buffer_idxs; 8130 new_local_rows = 0; 8131 for (i = 0; i < n_recvs; i++) { 8132 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8133 ptr_idxs += olengths_idxs[i]; 8134 } 8135 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 8136 ptr_idxs = recv_buffer_idxs; 8137 new_local_rows = 0; 8138 for (i = 0; i < n_recvs; i++) { 8139 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 8140 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8141 ptr_idxs += olengths_idxs[i]; 8142 } 8143 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 8144 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 8145 PetscCall(PetscFree(l2gmap_indices)); 8146 8147 /* infer new local matrix type from received local matrices type */ 8148 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 8149 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 8150 if (n_recvs) { 8151 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 8152 ptr_idxs = recv_buffer_idxs; 8153 for (i = 0; i < n_recvs; i++) { 8154 if ((PetscInt)new_local_type_private != *ptr_idxs) { 8155 new_local_type_private = MATAIJ_PRIVATE; 8156 break; 8157 } 8158 ptr_idxs += olengths_idxs[i]; 8159 } 8160 switch (new_local_type_private) { 8161 case MATDENSE_PRIVATE: 8162 new_local_type = MATSEQAIJ; 8163 bs = 1; 8164 break; 8165 case MATAIJ_PRIVATE: 8166 new_local_type = MATSEQAIJ; 8167 bs = 1; 8168 break; 8169 case MATBAIJ_PRIVATE: 8170 new_local_type = MATSEQBAIJ; 8171 break; 8172 case MATSBAIJ_PRIVATE: 8173 new_local_type = MATSEQSBAIJ; 8174 break; 8175 default: 8176 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 8177 } 8178 } else { /* by default, new_local_type is seqaij */ 8179 new_local_type = MATSEQAIJ; 8180 bs = 1; 8181 } 8182 8183 /* create MATIS object if needed */ 8184 if (!reuse) { 8185 PetscCall(MatGetSize(mat, &rows, &cols)); 8186 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8187 } else { 8188 /* it also destroys the local matrices */ 8189 if (*mat_n) { 8190 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 8191 } else { /* this is a fake object */ 8192 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8193 } 8194 } 8195 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 8196 PetscCall(MatSetType(local_mat, new_local_type)); 8197 8198 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 8199 8200 /* Global to local map of received indices */ 8201 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 8202 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 8203 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 8204 8205 /* restore attributes -> type of incoming data and its size */ 8206 buf_size_idxs = 0; 8207 for (i = 0; i < n_recvs; i++) { 8208 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 8209 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 8210 buf_size_idxs += olengths_idxs[i]; 8211 } 8212 PetscCall(PetscFree(recv_buffer_idxs)); 8213 8214 /* set preallocation */ 8215 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 8216 if (!newisdense) { 8217 PetscInt *new_local_nnz = NULL; 8218 8219 ptr_idxs = recv_buffer_idxs_local; 8220 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 8221 for (i = 0; i < n_recvs; i++) { 8222 PetscInt j; 8223 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 8224 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 8225 } else { 8226 /* TODO */ 8227 } 8228 ptr_idxs += olengths_idxs[i]; 8229 } 8230 if (new_local_nnz) { 8231 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 8232 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 8233 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 8234 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8235 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 8236 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8237 } else { 8238 PetscCall(MatSetUp(local_mat)); 8239 } 8240 PetscCall(PetscFree(new_local_nnz)); 8241 } else { 8242 PetscCall(MatSetUp(local_mat)); 8243 } 8244 8245 /* set values */ 8246 ptr_vals = recv_buffer_vals; 8247 ptr_idxs = recv_buffer_idxs_local; 8248 for (i = 0; i < n_recvs; i++) { 8249 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 8250 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 8251 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 8252 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 8253 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 8254 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 8255 } else { 8256 /* TODO */ 8257 } 8258 ptr_idxs += olengths_idxs[i]; 8259 ptr_vals += olengths_vals[i]; 8260 } 8261 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 8262 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 8263 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 8264 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 8265 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 8266 PetscCall(PetscFree(recv_buffer_vals)); 8267 8268 #if 0 8269 if (!restrict_comm) { /* check */ 8270 Vec lvec,rvec; 8271 PetscReal infty_error; 8272 8273 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 8274 PetscCall(VecSetRandom(rvec,NULL)); 8275 PetscCall(MatMult(mat,rvec,lvec)); 8276 PetscCall(VecScale(lvec,-1.0)); 8277 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 8278 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 8279 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 8280 PetscCall(VecDestroy(&rvec)); 8281 PetscCall(VecDestroy(&lvec)); 8282 } 8283 #endif 8284 8285 /* assemble new additional is (if any) */ 8286 if (nis) { 8287 PetscInt **temp_idxs, *count_is, j, psum; 8288 8289 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 8290 PetscCall(PetscCalloc1(nis, &count_is)); 8291 ptr_idxs = recv_buffer_idxs_is; 8292 psum = 0; 8293 for (i = 0; i < n_recvs; i++) { 8294 for (j = 0; j < nis; j++) { 8295 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8296 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8297 psum += plen; 8298 ptr_idxs += plen + 1; /* shift pointer to received data */ 8299 } 8300 } 8301 PetscCall(PetscMalloc1(nis, &temp_idxs)); 8302 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 8303 for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]); 8304 PetscCall(PetscArrayzero(count_is, nis)); 8305 ptr_idxs = recv_buffer_idxs_is; 8306 for (i = 0; i < n_recvs; i++) { 8307 for (j = 0; j < nis; j++) { 8308 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8309 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 8310 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8311 ptr_idxs += plen + 1; /* shift pointer to received data */ 8312 } 8313 } 8314 for (i = 0; i < nis; i++) { 8315 PetscCall(ISDestroy(&isarray[i])); 8316 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 8317 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 8318 } 8319 PetscCall(PetscFree(count_is)); 8320 PetscCall(PetscFree(temp_idxs[0])); 8321 PetscCall(PetscFree(temp_idxs)); 8322 } 8323 /* free workspace */ 8324 PetscCall(PetscFree(recv_buffer_idxs_is)); 8325 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 8326 PetscCall(PetscFree(send_buffer_idxs)); 8327 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 8328 if (isdense) { 8329 PetscCall(MatISGetLocalMat(mat, &local_mat)); 8330 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 8331 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8332 } else { 8333 /* PetscCall(PetscFree(send_buffer_vals)); */ 8334 } 8335 if (nis) { 8336 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 8337 PetscCall(PetscFree(send_buffer_idxs_is)); 8338 } 8339 8340 if (nvecs) { 8341 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 8342 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 8343 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8344 PetscCall(VecDestroy(&nnsp_vec[0])); 8345 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 8346 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 8347 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 8348 /* set values */ 8349 ptr_vals = recv_buffer_vecs; 8350 ptr_idxs = recv_buffer_idxs_local; 8351 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8352 for (i = 0; i < n_recvs; i++) { 8353 PetscInt j; 8354 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 8355 ptr_idxs += olengths_idxs[i]; 8356 ptr_vals += olengths_idxs[i] - 2; 8357 } 8358 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8359 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8360 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8361 } 8362 8363 PetscCall(PetscFree(recv_buffer_vecs)); 8364 PetscCall(PetscFree(recv_buffer_idxs_local)); 8365 PetscCall(PetscFree(recv_req_idxs)); 8366 PetscCall(PetscFree(recv_req_vals)); 8367 PetscCall(PetscFree(recv_req_vecs)); 8368 PetscCall(PetscFree(recv_req_idxs_is)); 8369 PetscCall(PetscFree(send_req_idxs)); 8370 PetscCall(PetscFree(send_req_vals)); 8371 PetscCall(PetscFree(send_req_vecs)); 8372 PetscCall(PetscFree(send_req_idxs_is)); 8373 PetscCall(PetscFree(ilengths_vals)); 8374 PetscCall(PetscFree(ilengths_idxs)); 8375 PetscCall(PetscFree(olengths_vals)); 8376 PetscCall(PetscFree(olengths_idxs)); 8377 PetscCall(PetscFree(onodes)); 8378 if (nis) { 8379 PetscCall(PetscFree(ilengths_idxs_is)); 8380 PetscCall(PetscFree(olengths_idxs_is)); 8381 PetscCall(PetscFree(onodes_is)); 8382 } 8383 PetscCall(PetscSubcommDestroy(&subcomm)); 8384 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8385 PetscCall(MatDestroy(mat_n)); 8386 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 8387 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8388 PetscCall(VecDestroy(&nnsp_vec[0])); 8389 } 8390 *mat_n = NULL; 8391 } 8392 PetscFunctionReturn(PETSC_SUCCESS); 8393 } 8394 8395 /* temporary hack into ksp private data structure */ 8396 #include <petsc/private/kspimpl.h> 8397 8398 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat) 8399 { 8400 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8401 PC_IS *pcis = (PC_IS *)pc->data; 8402 PCBDDCGraph graph = pcbddc->mat_graph; 8403 Mat coarse_mat, coarse_mat_is; 8404 Mat coarsedivudotp = NULL; 8405 Mat coarseG, t_coarse_mat_is; 8406 MatNullSpace CoarseNullSpace = NULL; 8407 ISLocalToGlobalMapping coarse_islg; 8408 IS coarse_is, *isarray, corners; 8409 PetscInt i, im_active = -1, active_procs = -1; 8410 PetscInt nis, nisdofs, nisneu, nisvert; 8411 PetscInt coarse_eqs_per_proc, coarsening_ratio; 8412 PC pc_temp; 8413 PCType coarse_pc_type; 8414 KSPType coarse_ksp_type; 8415 PetscBool multilevel_requested, multilevel_allowed; 8416 PetscBool coarse_reuse, multi_element = graph->multi_element; 8417 PetscInt ncoarse, nedcfield; 8418 PetscBool compute_vecs = PETSC_FALSE; 8419 PetscScalar *array; 8420 MatReuse coarse_mat_reuse; 8421 PetscBool restr, full_restr, have_void; 8422 PetscMPIInt size; 8423 8424 PetscFunctionBegin; 8425 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8426 /* Assign global numbering to coarse dofs */ 8427 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8428 PetscInt ocoarse_size; 8429 compute_vecs = PETSC_TRUE; 8430 8431 pcbddc->new_primal_space = PETSC_TRUE; 8432 ocoarse_size = pcbddc->coarse_size; 8433 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8434 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 8435 /* see if we can avoid some work */ 8436 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8437 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8438 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8439 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8440 coarse_reuse = PETSC_FALSE; 8441 } else { /* we can safely reuse already computed coarse matrix */ 8442 coarse_reuse = PETSC_TRUE; 8443 } 8444 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8445 coarse_reuse = PETSC_FALSE; 8446 } 8447 /* reset any subassembling information */ 8448 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8449 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8450 coarse_reuse = PETSC_TRUE; 8451 } 8452 if (coarse_reuse && pcbddc->coarse_ksp) { 8453 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 8454 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8455 coarse_mat_reuse = MAT_REUSE_MATRIX; 8456 } else { 8457 coarse_mat = NULL; 8458 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8459 } 8460 8461 /* creates temporary l2gmap and IS for coarse indexes */ 8462 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 8463 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 8464 8465 /* creates temporary MATIS object for coarse matrix */ 8466 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is)); 8467 PetscCall(MatSetType(t_coarse_mat_is, MATIS)); 8468 PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size)); 8469 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_TRUE)); 8470 PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg)); 8471 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat)); 8472 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8473 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8474 PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view")); 8475 8476 /* count "active" (i.e. with positive local size) and "void" processes */ 8477 im_active = !!pcis->n; 8478 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8479 8480 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8481 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8482 /* full_restr : just use the receivers from the subassembling pattern */ 8483 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 8484 coarse_mat_is = NULL; 8485 multilevel_allowed = PETSC_FALSE; 8486 multilevel_requested = PETSC_FALSE; 8487 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 8488 if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1); 8489 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8490 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8491 coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio; 8492 if (multilevel_requested) { 8493 ncoarse = active_procs / coarsening_ratio; 8494 restr = PETSC_FALSE; 8495 full_restr = PETSC_FALSE; 8496 } else { 8497 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 8498 restr = PETSC_TRUE; 8499 full_restr = PETSC_TRUE; 8500 } 8501 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8502 ncoarse = PetscMax(1, ncoarse); 8503 if (!pcbddc->coarse_subassembling) { 8504 if (coarsening_ratio > 1) { 8505 if (multilevel_requested) { 8506 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8507 } else { 8508 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8509 } 8510 } else { 8511 PetscMPIInt rank; 8512 8513 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 8514 have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE; 8515 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 8516 } 8517 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8518 PetscInt psum; 8519 if (pcbddc->coarse_ksp) psum = 1; 8520 else psum = 0; 8521 PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8522 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8523 } 8524 /* determine if we can go multilevel */ 8525 if (multilevel_requested) { 8526 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8527 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8528 } 8529 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8530 8531 /* dump subassembling pattern */ 8532 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 8533 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8534 nedcfield = -1; 8535 corners = NULL; 8536 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8537 PetscInt *tidxs, *tidxs2, nout, tsize, i; 8538 const PetscInt *idxs; 8539 ISLocalToGlobalMapping tmap; 8540 8541 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8542 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 8543 /* allocate space for temporary storage */ 8544 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 8545 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 8546 /* allocate for IS array */ 8547 nisdofs = pcbddc->n_ISForDofsLocal; 8548 if (pcbddc->nedclocal) { 8549 if (pcbddc->nedfield > -1) { 8550 nedcfield = pcbddc->nedfield; 8551 } else { 8552 nedcfield = 0; 8553 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 8554 nisdofs = 1; 8555 } 8556 } 8557 nisneu = !!pcbddc->NeumannBoundariesLocal; 8558 nisvert = 0; /* nisvert is not used */ 8559 nis = nisdofs + nisneu + nisvert; 8560 PetscCall(PetscMalloc1(nis, &isarray)); 8561 /* dofs splitting */ 8562 for (i = 0; i < nisdofs; i++) { 8563 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8564 if (nedcfield != i) { 8565 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 8566 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8567 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8568 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8569 } else { 8570 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 8571 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 8572 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8573 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8574 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 8575 } 8576 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8577 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 8578 /* PetscCall(ISView(isarray[i],0)); */ 8579 } 8580 /* neumann boundaries */ 8581 if (pcbddc->NeumannBoundariesLocal) { 8582 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8583 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 8584 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8585 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8586 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8587 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8588 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 8589 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8590 } 8591 /* coordinates */ 8592 if (pcbddc->corner_selected) { 8593 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8594 PetscCall(ISGetLocalSize(corners, &tsize)); 8595 PetscCall(ISGetIndices(corners, &idxs)); 8596 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8597 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8598 PetscCall(ISRestoreIndices(corners, &idxs)); 8599 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8600 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8601 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 8602 } 8603 PetscCall(PetscFree(tidxs)); 8604 PetscCall(PetscFree(tidxs2)); 8605 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8606 } else { 8607 nis = 0; 8608 nisdofs = 0; 8609 nisneu = 0; 8610 nisvert = 0; 8611 isarray = NULL; 8612 } 8613 /* destroy no longer needed map */ 8614 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8615 8616 /* subassemble */ 8617 if (multilevel_allowed) { 8618 Vec vp[1]; 8619 PetscInt nvecs = 0; 8620 PetscBool reuse; 8621 8622 vp[0] = NULL; 8623 /* XXX HDIV also */ 8624 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8625 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 8626 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 8627 PetscCall(VecSetType(vp[0], VECSTANDARD)); 8628 nvecs = 1; 8629 8630 if (pcbddc->divudotp) { 8631 Mat B, loc_divudotp; 8632 Vec v, p; 8633 IS dummy; 8634 PetscInt np; 8635 8636 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8637 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8638 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8639 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8640 PetscCall(MatCreateVecs(B, &v, &p)); 8641 PetscCall(VecSet(p, 1.)); 8642 PetscCall(MatMultTranspose(B, p, v)); 8643 PetscCall(VecDestroy(&p)); 8644 PetscCall(MatDestroy(&B)); 8645 PetscCall(VecGetArray(vp[0], &array)); 8646 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8647 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8648 PetscCall(VecResetArray(pcbddc->vec1_P)); 8649 PetscCall(VecRestoreArray(vp[0], &array)); 8650 PetscCall(ISDestroy(&dummy)); 8651 PetscCall(VecDestroy(&v)); 8652 } 8653 } 8654 if (coarse_mat) reuse = PETSC_TRUE; 8655 else reuse = PETSC_FALSE; 8656 if (multi_element) { 8657 /* XXX divudotp */ 8658 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_FALSE)); 8659 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8660 coarse_mat_is = t_coarse_mat_is; 8661 } else { 8662 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8663 if (reuse) { 8664 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8665 } else { 8666 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8667 } 8668 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8669 PetscScalar *arraym; 8670 const PetscScalar *arrayv; 8671 PetscInt nl; 8672 PetscCall(VecGetLocalSize(vp[0], &nl)); 8673 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8674 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8675 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8676 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8677 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8678 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8679 PetscCall(VecDestroy(&vp[0])); 8680 } else { 8681 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8682 } 8683 } 8684 } else { 8685 if (ncoarse != size) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8686 else { 8687 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8688 coarse_mat_is = t_coarse_mat_is; 8689 } 8690 } 8691 if (coarse_mat_is || coarse_mat) { 8692 if (!multilevel_allowed) { 8693 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8694 } else { 8695 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8696 if (coarse_mat_is) { 8697 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8698 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8699 coarse_mat = coarse_mat_is; 8700 } 8701 } 8702 } 8703 PetscCall(MatDestroy(&t_coarse_mat_is)); 8704 PetscCall(MatDestroy(&coarse_mat_is)); 8705 8706 /* create local to global scatters for coarse problem */ 8707 if (compute_vecs) { 8708 PetscInt lrows; 8709 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8710 if (coarse_mat) { 8711 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8712 } else { 8713 lrows = 0; 8714 } 8715 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8716 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8717 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8718 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8719 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8720 } 8721 PetscCall(ISDestroy(&coarse_is)); 8722 8723 /* set defaults for coarse KSP and PC */ 8724 if (multilevel_allowed) { 8725 coarse_ksp_type = KSPRICHARDSON; 8726 coarse_pc_type = PCBDDC; 8727 } else { 8728 coarse_ksp_type = KSPPREONLY; 8729 coarse_pc_type = PCREDUNDANT; 8730 } 8731 8732 /* print some info if requested */ 8733 if (pcbddc->dbg_flag) { 8734 if (!multilevel_allowed) { 8735 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8736 if (multilevel_requested) { 8737 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio)); 8738 } else if (pcbddc->max_levels) { 8739 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8740 } 8741 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8742 } 8743 } 8744 8745 /* communicate coarse discrete gradient */ 8746 coarseG = NULL; 8747 if (pcbddc->nedcG && multilevel_allowed) { 8748 MPI_Comm ccomm; 8749 if (coarse_mat) { 8750 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8751 } else { 8752 ccomm = MPI_COMM_NULL; 8753 } 8754 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8755 } 8756 8757 /* create the coarse KSP object only once with defaults */ 8758 if (coarse_mat) { 8759 PetscBool isredundant, isbddc, force, valid; 8760 PetscViewer dbg_viewer = NULL; 8761 PetscBool isset, issym, isher, isspd; 8762 8763 if (pcbddc->dbg_flag) { 8764 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8765 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8766 } 8767 if (!pcbddc->coarse_ksp) { 8768 char prefix[256], str_level[16]; 8769 size_t len; 8770 8771 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8772 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel)); 8773 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8774 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8775 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1)); 8776 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8777 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8778 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8779 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8780 /* TODO is this logic correct? should check for coarse_mat type */ 8781 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8782 /* prefix */ 8783 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8784 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8785 if (!pcbddc->current_level) { 8786 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8787 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8788 } else { 8789 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8790 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8791 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8792 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8793 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8794 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level)); 8795 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8796 } 8797 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8798 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8799 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8800 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8801 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8802 /* allow user customization */ 8803 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8804 /* get some info after set from options */ 8805 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8806 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8807 force = PETSC_FALSE; 8808 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8809 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8810 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8811 if (multilevel_allowed && !force && !valid) { 8812 isbddc = PETSC_TRUE; 8813 PetscCall(PCSetType(pc_temp, PCBDDC)); 8814 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8815 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8816 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8817 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8818 PetscObjectOptionsBegin((PetscObject)pc_temp); 8819 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8820 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8821 PetscOptionsEnd(); 8822 pc_temp->setfromoptionscalled++; 8823 } 8824 } 8825 } 8826 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8827 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8828 if (nisdofs) { 8829 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8830 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8831 } 8832 if (nisneu) { 8833 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8834 PetscCall(ISDestroy(&isarray[nisdofs])); 8835 } 8836 if (nisvert) { 8837 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8838 PetscCall(ISDestroy(&isarray[nis - 1])); 8839 } 8840 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8841 8842 /* get some info after set from options */ 8843 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8844 8845 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8846 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8847 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8848 force = PETSC_FALSE; 8849 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8850 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8851 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8852 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8853 if (isredundant) { 8854 KSP inner_ksp; 8855 PC inner_pc; 8856 8857 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8858 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8859 } 8860 8861 /* parameters which miss an API */ 8862 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8863 if (isbddc) { 8864 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8865 8866 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8867 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8868 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8869 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8870 if (pcbddc_coarse->benign_saddle_point) { 8871 Mat coarsedivudotp_is; 8872 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8873 IS row, col; 8874 const PetscInt *gidxs; 8875 PetscInt n, st, M, N; 8876 8877 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 8878 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 8879 st = st - n; 8880 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 8881 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 8882 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 8883 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 8884 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 8885 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 8886 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 8887 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 8888 PetscCall(ISGetSize(row, &M)); 8889 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 8890 PetscCall(ISDestroy(&row)); 8891 PetscCall(ISDestroy(&col)); 8892 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 8893 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 8894 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 8895 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 8896 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8897 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8898 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 8899 PetscCall(MatDestroy(&coarsedivudotp)); 8900 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 8901 PetscCall(MatDestroy(&coarsedivudotp_is)); 8902 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8903 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8904 } 8905 } 8906 8907 /* propagate symmetry info of coarse matrix */ 8908 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 8909 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 8910 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 8911 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 8912 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 8913 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 8914 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 8915 8916 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 8917 /* set operators */ 8918 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 8919 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 8920 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8921 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 8922 } 8923 PetscCall(MatDestroy(&coarseG)); 8924 PetscCall(PetscFree(isarray)); 8925 #if 0 8926 { 8927 PetscViewer viewer; 8928 char filename[256]; 8929 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 8930 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8931 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8932 PetscCall(MatView(coarse_mat,viewer)); 8933 PetscCall(PetscViewerPopFormat(viewer)); 8934 PetscCall(PetscViewerDestroy(&viewer)); 8935 } 8936 #endif 8937 8938 if (corners) { 8939 Vec gv; 8940 IS is; 8941 const PetscInt *idxs; 8942 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 8943 PetscScalar *coords; 8944 8945 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 8946 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 8947 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 8948 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 8949 PetscCall(VecSetBlockSize(gv, cdim)); 8950 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 8951 PetscCall(VecSetType(gv, VECSTANDARD)); 8952 PetscCall(VecSetFromOptions(gv)); 8953 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8954 8955 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8956 PetscCall(ISGetLocalSize(is, &n)); 8957 PetscCall(ISGetIndices(is, &idxs)); 8958 PetscCall(PetscMalloc1(n * cdim, &coords)); 8959 for (i = 0; i < n; i++) { 8960 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 8961 } 8962 PetscCall(ISRestoreIndices(is, &idxs)); 8963 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8964 8965 PetscCall(ISGetLocalSize(corners, &n)); 8966 PetscCall(ISGetIndices(corners, &idxs)); 8967 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 8968 PetscCall(ISRestoreIndices(corners, &idxs)); 8969 PetscCall(PetscFree(coords)); 8970 PetscCall(VecAssemblyBegin(gv)); 8971 PetscCall(VecAssemblyEnd(gv)); 8972 PetscCall(VecGetArray(gv, &coords)); 8973 if (pcbddc->coarse_ksp) { 8974 PC coarse_pc; 8975 PetscBool isbddc; 8976 8977 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 8978 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 8979 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8980 PetscReal *realcoords; 8981 8982 PetscCall(VecGetLocalSize(gv, &n)); 8983 #if defined(PETSC_USE_COMPLEX) 8984 PetscCall(PetscMalloc1(n, &realcoords)); 8985 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 8986 #else 8987 realcoords = coords; 8988 #endif 8989 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 8990 #if defined(PETSC_USE_COMPLEX) 8991 PetscCall(PetscFree(realcoords)); 8992 #endif 8993 } 8994 } 8995 PetscCall(VecRestoreArray(gv, &coords)); 8996 PetscCall(VecDestroy(&gv)); 8997 } 8998 PetscCall(ISDestroy(&corners)); 8999 9000 if (pcbddc->coarse_ksp) { 9001 Vec crhs, csol; 9002 9003 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 9004 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 9005 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL)); 9006 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs)); 9007 } 9008 PetscCall(MatDestroy(&coarsedivudotp)); 9009 9010 /* compute null space for coarse solver if the benign trick has been requested */ 9011 if (pcbddc->benign_null) { 9012 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 9013 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 9014 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 9015 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 9016 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9017 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 9018 if (coarse_mat) { 9019 Vec nullv; 9020 PetscScalar *array, *array2; 9021 PetscInt nl; 9022 9023 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 9024 PetscCall(VecGetLocalSize(nullv, &nl)); 9025 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9026 PetscCall(VecGetArray(nullv, &array2)); 9027 PetscCall(PetscArraycpy(array2, array, nl)); 9028 PetscCall(VecRestoreArray(nullv, &array2)); 9029 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 9030 PetscCall(VecNormalize(nullv, NULL)); 9031 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 9032 PetscCall(VecDestroy(&nullv)); 9033 } 9034 } 9035 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 9036 9037 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9038 if (pcbddc->coarse_ksp) { 9039 PetscBool ispreonly; 9040 9041 if (CoarseNullSpace) { 9042 PetscBool isnull; 9043 9044 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 9045 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 9046 /* TODO: add local nullspaces (if any) */ 9047 } 9048 /* setup coarse ksp */ 9049 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 9050 /* Check coarse problem if in debug mode or if solving with an iterative method */ 9051 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 9052 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 9053 KSP check_ksp; 9054 KSPType check_ksp_type; 9055 PC check_pc; 9056 Vec check_vec, coarse_vec; 9057 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 9058 PetscInt its; 9059 PetscBool compute_eigs; 9060 PetscReal *eigs_r, *eigs_c; 9061 PetscInt neigs; 9062 const char *prefix; 9063 9064 /* Create ksp object suitable for estimation of extreme eigenvalues */ 9065 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 9066 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel)); 9067 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 9068 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 9069 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 9070 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size)); 9071 /* prevent from setup unneeded object */ 9072 PetscCall(KSPGetPC(check_ksp, &check_pc)); 9073 PetscCall(PCSetType(check_pc, PCNONE)); 9074 if (ispreonly) { 9075 check_ksp_type = KSPPREONLY; 9076 compute_eigs = PETSC_FALSE; 9077 } else { 9078 check_ksp_type = KSPGMRES; 9079 compute_eigs = PETSC_TRUE; 9080 } 9081 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 9082 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 9083 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 9084 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 9085 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 9086 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 9087 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 9088 PetscCall(KSPSetFromOptions(check_ksp)); 9089 PetscCall(KSPSetUp(check_ksp)); 9090 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 9091 PetscCall(KSPSetPC(check_ksp, check_pc)); 9092 /* create random vec */ 9093 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 9094 PetscCall(VecSetRandom(check_vec, NULL)); 9095 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9096 /* solve coarse problem */ 9097 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 9098 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 9099 /* set eigenvalue estimation if preonly has not been requested */ 9100 if (compute_eigs) { 9101 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 9102 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 9103 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 9104 if (neigs) { 9105 lambda_max = eigs_r[neigs - 1]; 9106 lambda_min = eigs_r[0]; 9107 if (pcbddc->use_coarse_estimates) { 9108 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 9109 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 9110 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 9111 } 9112 } 9113 } 9114 } 9115 9116 /* check coarse problem residual error */ 9117 if (pcbddc->dbg_flag) { 9118 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 9119 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9120 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 9121 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 9122 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9123 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 9124 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 9125 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer)); 9126 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer)); 9127 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 9128 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 9129 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 9130 if (compute_eigs) { 9131 PetscReal lambda_max_s, lambda_min_s; 9132 KSPConvergedReason reason; 9133 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 9134 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 9135 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 9136 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 9137 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 9138 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 9139 } 9140 PetscCall(PetscViewerFlush(dbg_viewer)); 9141 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9142 } 9143 PetscCall(VecDestroy(&check_vec)); 9144 PetscCall(VecDestroy(&coarse_vec)); 9145 PetscCall(KSPDestroy(&check_ksp)); 9146 if (compute_eigs) { 9147 PetscCall(PetscFree(eigs_r)); 9148 PetscCall(PetscFree(eigs_c)); 9149 } 9150 } 9151 } 9152 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 9153 /* print additional info */ 9154 if (pcbddc->dbg_flag) { 9155 /* waits until all processes reaches this point */ 9156 PetscCall(PetscBarrier((PetscObject)pc)); 9157 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 9158 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9159 } 9160 9161 /* free memory */ 9162 PetscCall(MatDestroy(&coarse_mat)); 9163 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9164 PetscFunctionReturn(PETSC_SUCCESS); 9165 } 9166 9167 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 9168 { 9169 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9170 PC_IS *pcis = (PC_IS *)pc->data; 9171 IS subset, subset_mult, subset_n; 9172 PetscInt local_size, coarse_size = 0; 9173 PetscInt *local_primal_indices = NULL; 9174 const PetscInt *t_local_primal_indices; 9175 9176 PetscFunctionBegin; 9177 /* Compute global number of coarse dofs */ 9178 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 9179 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 9180 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 9181 PetscCall(ISDestroy(&subset_n)); 9182 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 9183 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 9184 PetscCall(ISDestroy(&subset)); 9185 PetscCall(ISDestroy(&subset_mult)); 9186 PetscCall(ISGetLocalSize(subset_n, &local_size)); 9187 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 9188 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 9189 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 9190 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 9191 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 9192 PetscCall(ISDestroy(&subset_n)); 9193 9194 if (pcbddc->dbg_flag) { 9195 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9196 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 9197 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size)); 9198 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9199 } 9200 9201 /* get back data */ 9202 *coarse_size_n = coarse_size; 9203 *local_primal_indices_n = local_primal_indices; 9204 PetscFunctionReturn(PETSC_SUCCESS); 9205 } 9206 9207 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 9208 { 9209 IS localis_t; 9210 PetscInt i, lsize, *idxs, n; 9211 PetscScalar *vals; 9212 9213 PetscFunctionBegin; 9214 /* get indices in local ordering exploiting local to global map */ 9215 PetscCall(ISGetLocalSize(globalis, &lsize)); 9216 PetscCall(PetscMalloc1(lsize, &vals)); 9217 for (i = 0; i < lsize; i++) vals[i] = 1.0; 9218 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 9219 PetscCall(VecSet(gwork, 0.0)); 9220 PetscCall(VecSet(lwork, 0.0)); 9221 if (idxs) { /* multilevel guard */ 9222 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 9223 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 9224 } 9225 PetscCall(VecAssemblyBegin(gwork)); 9226 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 9227 PetscCall(PetscFree(vals)); 9228 PetscCall(VecAssemblyEnd(gwork)); 9229 /* now compute set in local ordering */ 9230 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9231 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9232 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 9233 PetscCall(VecGetSize(lwork, &n)); 9234 for (i = 0, lsize = 0; i < n; i++) { 9235 if (PetscRealPart(vals[i]) > 0.5) lsize++; 9236 } 9237 PetscCall(PetscMalloc1(lsize, &idxs)); 9238 for (i = 0, lsize = 0; i < n; i++) { 9239 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 9240 } 9241 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 9242 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 9243 *localis = localis_t; 9244 PetscFunctionReturn(PETSC_SUCCESS); 9245 } 9246 9247 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 9248 { 9249 PC_IS *pcis = (PC_IS *)pc->data; 9250 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9251 PC_IS *pcisf; 9252 PC_BDDC *pcbddcf; 9253 PC pcf; 9254 9255 PetscFunctionBegin; 9256 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 9257 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 9258 PetscCall(PCSetType(pcf, PCBDDC)); 9259 9260 pcisf = (PC_IS *)pcf->data; 9261 pcbddcf = (PC_BDDC *)pcf->data; 9262 9263 pcisf->is_B_local = pcis->is_B_local; 9264 pcisf->vec1_N = pcis->vec1_N; 9265 pcisf->BtoNmap = pcis->BtoNmap; 9266 pcisf->n = pcis->n; 9267 pcisf->n_B = pcis->n_B; 9268 9269 PetscCall(PetscFree(pcbddcf->mat_graph)); 9270 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9271 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 9272 pcbddcf->sub_schurs = schurs; 9273 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 9274 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 9275 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 9276 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 9277 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9278 pcbddcf->use_faces = PETSC_TRUE; 9279 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9280 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9281 pcbddcf->use_qr_single = (PetscBool)!constraints; 9282 pcbddcf->fake_change = PETSC_TRUE; 9283 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9284 9285 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9286 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9287 9288 *change = pcbddcf->ConstraintMatrix; 9289 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 9290 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 9291 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9292 9293 if (schurs) pcbddcf->sub_schurs = NULL; 9294 pcbddcf->ConstraintMatrix = NULL; 9295 pcbddcf->mat_graph = NULL; 9296 pcisf->is_B_local = NULL; 9297 pcisf->vec1_N = NULL; 9298 pcisf->BtoNmap = NULL; 9299 PetscCall(PCDestroy(&pcf)); 9300 PetscFunctionReturn(PETSC_SUCCESS); 9301 } 9302 9303 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9304 { 9305 PC_IS *pcis = (PC_IS *)pc->data; 9306 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9307 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 9308 Mat S_j; 9309 PetscInt *used_xadj, *used_adjncy; 9310 PetscBool free_used_adj; 9311 9312 PetscFunctionBegin; 9313 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9314 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9315 free_used_adj = PETSC_FALSE; 9316 if (pcbddc->sub_schurs_layers == -1) { 9317 used_xadj = NULL; 9318 used_adjncy = NULL; 9319 } else { 9320 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9321 used_xadj = pcbddc->mat_graph->xadj; 9322 used_adjncy = pcbddc->mat_graph->adjncy; 9323 } else if (pcbddc->computed_rowadj) { 9324 used_xadj = pcbddc->mat_graph->xadj; 9325 used_adjncy = pcbddc->mat_graph->adjncy; 9326 } else { 9327 PetscBool flg_row = PETSC_FALSE; 9328 const PetscInt *xadj, *adjncy; 9329 PetscInt nvtxs; 9330 9331 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9332 if (flg_row) { 9333 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 9334 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 9335 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 9336 free_used_adj = PETSC_TRUE; 9337 } else { 9338 pcbddc->sub_schurs_layers = -1; 9339 used_xadj = NULL; 9340 used_adjncy = NULL; 9341 } 9342 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9343 } 9344 } 9345 9346 /* setup sub_schurs data */ 9347 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 9348 if (!sub_schurs->schur_explicit) { 9349 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9350 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 9351 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 9352 } else { 9353 Mat change = NULL; 9354 Vec scaling = NULL; 9355 IS change_primal = NULL, iP; 9356 PetscInt benign_n; 9357 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9358 PetscBool need_change = PETSC_FALSE; 9359 PetscBool discrete_harmonic = PETSC_FALSE; 9360 9361 if (!pcbddc->use_vertices && reuse_solvers) { 9362 PetscInt n_vertices; 9363 9364 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 9365 reuse_solvers = (PetscBool)!n_vertices; 9366 } 9367 if (!pcbddc->benign_change_explicit) { 9368 benign_n = pcbddc->benign_n; 9369 } else { 9370 benign_n = 0; 9371 } 9372 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9373 We need a global reduction to avoid possible deadlocks. 9374 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9375 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9376 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9377 PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 9378 need_change = (PetscBool)(!need_change); 9379 } 9380 /* If the user defines additional constraints, we import them here */ 9381 if (need_change) { 9382 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 9383 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 9384 } 9385 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9386 9387 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 9388 if (iP) { 9389 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 9390 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 9391 PetscOptionsEnd(); 9392 } 9393 if (discrete_harmonic) { 9394 Mat A; 9395 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 9396 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 9397 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 9398 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 9399 pcbddc->benign_zerodiag_subs, change, change_primal)); 9400 PetscCall(MatDestroy(&A)); 9401 } else { 9402 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 9403 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 9404 } 9405 PetscCall(MatDestroy(&change)); 9406 PetscCall(ISDestroy(&change_primal)); 9407 } 9408 PetscCall(MatDestroy(&S_j)); 9409 9410 /* free adjacency */ 9411 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 9412 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9413 PetscFunctionReturn(PETSC_SUCCESS); 9414 } 9415 9416 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9417 { 9418 PC_IS *pcis = (PC_IS *)pc->data; 9419 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9420 PCBDDCGraph graph; 9421 9422 PetscFunctionBegin; 9423 /* attach interface graph for determining subsets */ 9424 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9425 IS verticesIS, verticescomm; 9426 PetscInt vsize, *idxs; 9427 9428 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9429 PetscCall(ISGetSize(verticesIS, &vsize)); 9430 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 9431 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 9432 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 9433 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9434 PetscCall(PCBDDCGraphCreate(&graph)); 9435 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 9436 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 9437 PetscCall(ISDestroy(&verticescomm)); 9438 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9439 } else { 9440 graph = pcbddc->mat_graph; 9441 } 9442 /* print some info */ 9443 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9444 IS vertices; 9445 PetscInt nv, nedges, nfaces; 9446 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 9447 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9448 PetscCall(ISGetSize(vertices, &nv)); 9449 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9450 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 9451 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 9452 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 9453 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 9454 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9455 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9456 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9457 } 9458 9459 /* sub_schurs init */ 9460 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9461 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 9462 9463 /* free graph struct */ 9464 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 9465 PetscFunctionReturn(PETSC_SUCCESS); 9466 } 9467 9468 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer) 9469 { 9470 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 9471 PetscInt n = pc->pmat->rmap->n, ln, ni, st; 9472 const PetscInt *idxs; 9473 IS gis; 9474 9475 PetscFunctionBegin; 9476 if (!is) PetscFunctionReturn(PETSC_SUCCESS); 9477 PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL)); 9478 PetscCall(MatGetLocalSize(matis->A, NULL, &ln)); 9479 PetscCall(PetscArrayzero(matis->sf_leafdata, ln)); 9480 PetscCall(PetscArrayzero(matis->sf_rootdata, n)); 9481 PetscCall(ISGetLocalSize(is, &ni)); 9482 PetscCall(ISGetIndices(is, &idxs)); 9483 for (PetscInt i = 0; i < ni; i++) { 9484 if (idxs[i] < 0 || idxs[i] >= ln) continue; 9485 matis->sf_leafdata[idxs[i]] = 1; 9486 } 9487 PetscCall(ISRestoreIndices(is, &idxs)); 9488 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9489 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9490 ln = 0; 9491 for (PetscInt i = 0; i < n; i++) { 9492 if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st; 9493 } 9494 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis)); 9495 PetscCall(ISView(gis, viewer)); 9496 PetscCall(ISDestroy(&gis)); 9497 PetscFunctionReturn(PETSC_SUCCESS); 9498 } 9499 9500 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile) 9501 { 9502 PetscInt header[11]; 9503 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9504 PetscViewer viewer; 9505 MPI_Comm comm = PetscObjectComm((PetscObject)pc); 9506 9507 PetscFunctionBegin; 9508 PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer)); 9509 if (load) { 9510 IS is; 9511 Mat A; 9512 9513 PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT)); 9514 PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9515 PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9516 PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9517 PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9518 PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9519 PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9520 PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9521 PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9522 PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9523 PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9524 if (header[0]) { 9525 PetscCall(ISCreate(comm, &is)); 9526 PetscCall(ISLoad(is, viewer)); 9527 PetscCall(PCBDDCSetDirichletBoundaries(pc, is)); 9528 PetscCall(ISDestroy(&is)); 9529 } 9530 if (header[1]) { 9531 PetscCall(ISCreate(comm, &is)); 9532 PetscCall(ISLoad(is, viewer)); 9533 PetscCall(PCBDDCSetNeumannBoundaries(pc, is)); 9534 PetscCall(ISDestroy(&is)); 9535 } 9536 if (header[2]) { 9537 IS *isarray; 9538 9539 PetscCall(PetscMalloc1(header[2], &isarray)); 9540 for (PetscInt i = 0; i < header[2]; i++) { 9541 PetscCall(ISCreate(comm, &isarray[i])); 9542 PetscCall(ISLoad(isarray[i], viewer)); 9543 } 9544 PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray)); 9545 for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i])); 9546 PetscCall(PetscFree(isarray)); 9547 } 9548 if (header[3]) { 9549 PetscCall(ISCreate(comm, &is)); 9550 PetscCall(ISLoad(is, viewer)); 9551 PetscCall(PCBDDCSetPrimalVerticesIS(pc, is)); 9552 PetscCall(ISDestroy(&is)); 9553 } 9554 if (header[4]) { 9555 PetscCall(MatCreate(comm, &A)); 9556 PetscCall(MatSetType(A, MATAIJ)); 9557 PetscCall(MatLoad(A, viewer)); 9558 PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8])); 9559 PetscCall(MatDestroy(&A)); 9560 } 9561 if (header[9]) { 9562 PetscCall(MatCreate(comm, &A)); 9563 PetscCall(MatSetType(A, MATIS)); 9564 PetscCall(MatLoad(A, viewer)); 9565 PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL)); 9566 PetscCall(MatDestroy(&A)); 9567 } 9568 } else { 9569 header[0] = (PetscInt)!!pcbddc->DirichletBoundariesLocal; 9570 header[1] = (PetscInt)!!pcbddc->NeumannBoundariesLocal; 9571 header[2] = pcbddc->n_ISForDofsLocal; 9572 header[3] = (PetscInt)!!pcbddc->user_primal_vertices_local; 9573 header[4] = (PetscInt)!!pcbddc->discretegradient; 9574 header[5] = pcbddc->nedorder; 9575 header[6] = pcbddc->nedfield; 9576 header[7] = (PetscInt)pcbddc->nedglobal; 9577 header[8] = (PetscInt)pcbddc->conforming; 9578 header[9] = (PetscInt)!!pcbddc->divudotp; 9579 header[10] = (PetscInt)pcbddc->divudotp_trans; 9580 if (header[4]) header[3] = 0; 9581 9582 PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT)); 9583 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer)); 9584 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer)); 9585 for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer)); 9586 if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer)); 9587 if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer)); 9588 if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer)); 9589 } 9590 PetscCall(PetscViewerDestroy(&viewer)); 9591 PetscFunctionReturn(PETSC_SUCCESS); 9592 } 9593 9594 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9595 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9596 { 9597 Mat At; 9598 IS rows; 9599 PetscInt rst, ren; 9600 PetscLayout rmap; 9601 9602 PetscFunctionBegin; 9603 rst = ren = 0; 9604 if (ccomm != MPI_COMM_NULL) { 9605 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 9606 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 9607 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 9608 PetscCall(PetscLayoutSetUp(rmap)); 9609 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 9610 } 9611 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 9612 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 9613 PetscCall(ISDestroy(&rows)); 9614 9615 if (ccomm != MPI_COMM_NULL) { 9616 Mat_MPIAIJ *a, *b; 9617 IS from, to; 9618 Vec gvec; 9619 PetscInt lsize; 9620 9621 PetscCall(MatCreate(ccomm, B)); 9622 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 9623 PetscCall(MatSetType(*B, MATAIJ)); 9624 PetscCall(PetscLayoutDestroy(&(*B)->rmap)); 9625 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9626 a = (Mat_MPIAIJ *)At->data; 9627 b = (Mat_MPIAIJ *)(*B)->data; 9628 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 9629 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 9630 PetscCall(PetscObjectReference((PetscObject)a->A)); 9631 PetscCall(PetscObjectReference((PetscObject)a->B)); 9632 b->A = a->A; 9633 b->B = a->B; 9634 9635 b->donotstash = a->donotstash; 9636 b->roworiented = a->roworiented; 9637 b->rowindices = NULL; 9638 b->rowvalues = NULL; 9639 b->getrowactive = PETSC_FALSE; 9640 9641 (*B)->rmap = rmap; 9642 (*B)->factortype = A->factortype; 9643 (*B)->assembled = PETSC_TRUE; 9644 (*B)->insertmode = NOT_SET_VALUES; 9645 (*B)->preallocated = PETSC_TRUE; 9646 9647 if (a->colmap) { 9648 #if defined(PETSC_USE_CTABLE) 9649 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 9650 #else 9651 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9652 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9653 #endif 9654 } else b->colmap = NULL; 9655 if (a->garray) { 9656 PetscInt len; 9657 len = a->B->cmap->n; 9658 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9659 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9660 } else b->garray = NULL; 9661 9662 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9663 b->lvec = a->lvec; 9664 9665 /* cannot use VecScatterCopy */ 9666 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9667 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9668 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9669 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9670 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9671 PetscCall(ISDestroy(&from)); 9672 PetscCall(ISDestroy(&to)); 9673 PetscCall(VecDestroy(&gvec)); 9674 } 9675 PetscCall(MatDestroy(&At)); 9676 PetscFunctionReturn(PETSC_SUCCESS); 9677 } 9678 9679 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */ 9680 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA) 9681 { 9682 PetscBool isaij; 9683 MPI_Comm comm; 9684 9685 PetscFunctionBegin; 9686 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9687 PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, "")); 9688 PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented"); 9689 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij)); 9690 if (isaij) { /* SeqAIJ supports repeated rows */ 9691 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA)); 9692 } else { 9693 Mat A_loc; 9694 Mat_SeqAIJ *da; 9695 PetscSF sf; 9696 PetscInt ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata; 9697 PetscScalar *daa; 9698 const PetscInt *idxs; 9699 const PetscSFNode *iremotes; 9700 PetscSFNode *remotes; 9701 9702 /* SF for incoming rows */ 9703 PetscCall(PetscSFCreate(comm, &sf)); 9704 PetscCall(ISGetLocalSize(rows, &ni)); 9705 PetscCall(ISGetIndices(rows, &idxs)); 9706 PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs)); 9707 PetscCall(ISRestoreIndices(rows, &idxs)); 9708 9709 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc)); 9710 da = (Mat_SeqAIJ *)A_loc->data; 9711 PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata)); 9712 for (PetscInt i = 0; i < m; i++) { 9713 rdata[2 * i + 0] = da->i[i + 1] - da->i[i]; 9714 rdata[2 * i + 1] = da->i[i]; 9715 } 9716 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9717 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9718 PetscCall(PetscMalloc1(ni + 1, &di)); 9719 di[0] = 0; 9720 for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0]; 9721 PetscCall(PetscMalloc1(di[ni], &dj)); 9722 PetscCall(PetscMalloc1(di[ni], &daa)); 9723 PetscCall(PetscMalloc1(di[ni], &remotes)); 9724 9725 PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes)); 9726 9727 /* SF graph for nonzeros */ 9728 c = 0; 9729 for (PetscInt i = 0; i < ni; i++) { 9730 const PetscInt rank = iremotes[i].rank; 9731 const PetscInt rsize = ldata[2 * i]; 9732 for (PetscInt j = 0; j < rsize; j++) { 9733 remotes[c].rank = rank; 9734 remotes[c].index = ldata[2 * i + 1] + j; 9735 c++; 9736 } 9737 } 9738 PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]); 9739 PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER)); 9740 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9741 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9742 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9743 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9744 9745 PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA)); 9746 PetscCall(MatDestroy(&A_loc)); 9747 PetscCall(PetscSFDestroy(&sf)); 9748 PetscCall(PetscFree(di)); 9749 PetscCall(PetscFree(dj)); 9750 PetscCall(PetscFree(daa)); 9751 PetscCall(PetscFree(remotes)); 9752 PetscCall(PetscFree2(ldata, rdata)); 9753 } 9754 PetscFunctionReturn(PETSC_SUCCESS); 9755 } 9756