1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <petsc/private/pcbddcimpl.h> 3 #include <petsc/private/pcbddcprivateimpl.h> 4 #include <petsc/private/kernels/blockinvert.h> 5 #include <../src/mat/impls/dense/seq/dense.h> 6 #include <petscdmplex.h> 7 #include <petscblaslapack.h> 8 #include <petsc/private/sfimpl.h> 9 #include <petsc/private/dmpleximpl.h> 10 #include <petscdmda.h> 11 12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *); 13 14 /* if range is true, it returns B s.t. span{B} = range(A) 15 if range is false, it returns B s.t. range(B) _|_ range(A) */ 16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 17 { 18 PetscScalar *uwork, *data, *U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM, bN, lwork, lierr, di = 1; 21 PetscInt ulw, i, nr, nc, n; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork2; 24 #endif 25 26 PetscFunctionBegin; 27 PetscCall(MatGetSize(A, &nr, &nc)); 28 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc)); 33 PetscCall(PetscMalloc1(ulw, &uwork)); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr, nc); 39 if (!rwork) { 40 PetscCall(PetscMalloc1(n, &sing)); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 PetscCall(PetscMalloc1(nr * nr, &U)); 47 PetscCall(PetscBLASIntCast(nr, &bM)); 48 PetscCall(PetscBLASIntCast(nc, &bN)); 49 PetscCall(PetscBLASIntCast(ulw, &lwork)); 50 PetscCall(MatDenseGetArray(A, &data)); 51 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 52 #if !defined(PETSC_USE_COMPLEX) 53 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr)); 54 #else 55 PetscCall(PetscMalloc1(5 * n, &rwork2)); 56 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr)); 57 PetscCall(PetscFree(rwork2)); 58 #endif 59 PetscCall(PetscFPTrapPop()); 60 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 61 PetscCall(MatDenseRestoreArray(A, &data)); 62 for (i = 0; i < n; i++) 63 if (sing[i] < PETSC_SMALL) break; 64 if (!rwork) PetscCall(PetscFree(sing)); 65 if (!work) PetscCall(PetscFree(uwork)); 66 /* create B */ 67 if (!range) { 68 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B)); 69 PetscCall(MatDenseGetArray(*B, &data)); 70 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr)); 71 } else { 72 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B)); 73 PetscCall(MatDenseGetArray(*B, &data)); 74 PetscCall(PetscArraycpy(data, U, i * nr)); 75 } 76 PetscCall(MatDenseRestoreArray(*B, &data)); 77 PetscCall(PetscFree(U)); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 /* TODO REMOVE */ 82 #if defined(PRINT_GDET) 83 static int inc = 0; 84 static int lev = 0; 85 #endif 86 87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 88 { 89 Mat GE, GEd; 90 PetscInt rsize, csize, esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 PetscCall(ISGetSize(edge, &esize)); 95 if (!esize) PetscFunctionReturn(PETSC_SUCCESS); 96 PetscCall(ISGetSize(extrow, &rsize)); 97 PetscCall(ISGetSize(extcol, &csize)); 98 99 /* gradients */ 100 ptr = work + 5 * esize; 101 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE)); 102 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins)); 103 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins)); 104 PetscCall(MatDestroy(&GE)); 105 106 /* constants */ 107 ptr += rsize * csize; 108 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd)); 109 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE)); 110 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd)); 111 PetscCall(MatDestroy(&GE)); 112 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins)); 113 PetscCall(MatDestroy(&GEd)); 114 115 if (corners) { 116 Mat GEc; 117 const PetscScalar *vals; 118 PetscScalar v; 119 120 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc)); 121 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd)); 122 PetscCall(MatDenseGetArrayRead(GEd, &vals)); 123 /* v = PetscAbsScalar(vals[0]); */ 124 v = 1.; 125 cvals[0] = vals[0] / v; 126 cvals[1] = vals[1] / v; 127 PetscCall(MatDenseRestoreArrayRead(GEd, &vals)); 128 PetscCall(MatScale(*GKins, 1. / v)); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++)); 134 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer)); 135 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB)); 136 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc")); 137 PetscCall(MatView(GEc, viewer)); 138 PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK")); 139 PetscCall(MatView(*GKins, viewer)); 140 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj")); 141 PetscCall(MatView(GEd, viewer)); 142 PetscCall(PetscViewerDestroy(&viewer)); 143 } 144 #endif 145 PetscCall(MatDestroy(&GEd)); 146 PetscCall(MatDestroy(&GEc)); 147 } 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *); 152 153 PetscErrorCode PCBDDCNedelecSupport(PC pc) 154 { 155 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 156 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 157 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g; 160 MPI_Comm comm; 161 IS lned, primals, allprimals, nedfieldlocal; 162 IS *eedges, *extrows, *extcols, *alleedges; 163 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter; 164 PetscScalar *vals, *work; 165 PetscReal *rwork; 166 const PetscInt *idxs, *ii, *jj, *iit, *jjt; 167 PetscInt ne, nv, Lv, order, n, field; 168 PetscInt i, j, extmem, cum, maxsize, nee; 169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs; 170 PetscInt *sfvleaves, *sfvroots; 171 PetscInt *corners, *cedges; 172 PetscInt *ecount, **eneighs, *vcount, **vneighs; 173 PetscInt *emarks; 174 PetscBool print, eerr, done, lrc[2], conforming, global, singular, setprimal; 175 176 PetscFunctionBegin; 177 /* If the discrete gradient is defined for a subset of dofs and global is true, 178 it assumes G is given in global ordering for all the dofs. 179 Otherwise, the ordering is global for the Nedelec field */ 180 order = pcbddc->nedorder; 181 conforming = pcbddc->conforming; 182 field = pcbddc->nedfield; 183 global = pcbddc->nedglobal; 184 setprimal = PETSC_FALSE; 185 print = PETSC_FALSE; 186 singular = PETSC_FALSE; 187 188 /* Command line customization */ 189 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC"); 190 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL)); 191 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular", "Infer nullspace from discrete gradient", NULL, singular, &singular, NULL)); 192 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL)); 193 /* print debug info TODO: to be removed */ 194 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL)); 195 PetscOptionsEnd(); 196 197 /* Return if there are no edges in the decomposition and the problem is not singular */ 198 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL)); 199 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n)); 200 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm)); 201 if (!singular) { 202 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 203 lrc[0] = PETSC_FALSE; 204 for (i = 0; i < n; i++) { 205 if (PetscRealPart(vals[i]) > 2.) { 206 lrc[0] = PETSC_TRUE; 207 break; 208 } 209 } 210 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 211 PetscCall(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm)); 212 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS); 213 } 214 215 /* Get Nedelec field */ 216 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal); 217 if (pcbddc->n_ISForDofsLocal && field >= 0) { 218 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field])); 219 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 220 PetscCall(ISGetLocalSize(nedfieldlocal, &ne)); 221 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 222 ne = n; 223 nedfieldlocal = NULL; 224 global = PETSC_TRUE; 225 } else if (field == PETSC_DECIDE) { 226 PetscInt rst, ren, *idx; 227 228 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 229 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 230 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren)); 231 for (i = rst; i < ren; i++) { 232 PetscInt nc; 233 234 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 235 if (nc > 1) matis->sf_rootdata[i - rst] = 1; 236 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL)); 237 } 238 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 239 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 240 PetscCall(PetscMalloc1(n, &idx)); 241 for (i = 0, ne = 0; i < n; i++) 242 if (matis->sf_leafdata[i]) idx[ne++] = i; 243 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal)); 244 } else { 245 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified"); 246 } 247 248 /* Sanity checks */ 249 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time"); 250 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis"); 251 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order); 252 253 /* Just set primal dofs and return */ 254 if (setprimal) { 255 IS enedfieldlocal; 256 PetscInt *eidxs; 257 258 PetscCall(PetscMalloc1(ne, &eidxs)); 259 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals)); 260 if (nedfieldlocal) { 261 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 262 for (i = 0, cum = 0; i < ne; i++) { 263 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i]; 264 } 265 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 266 } else { 267 for (i = 0, cum = 0; i < ne; i++) { 268 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i; 269 } 270 } 271 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals)); 272 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal)); 273 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal)); 274 PetscCall(PetscFree(eidxs)); 275 PetscCall(ISDestroy(&nedfieldlocal)); 276 PetscCall(ISDestroy(&enedfieldlocal)); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 /* Compute some l2g maps */ 281 if (nedfieldlocal) { 282 IS is; 283 284 /* need to map from the local Nedelec field to local numbering */ 285 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g)); 286 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 287 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is)); 288 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g)); 289 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 290 if (global) { 291 PetscCall(PetscObjectReference((PetscObject)al2g)); 292 el2g = al2g; 293 } else { 294 IS gis; 295 296 PetscCall(ISRenumber(is, NULL, NULL, &gis)); 297 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g)); 298 PetscCall(ISDestroy(&gis)); 299 } 300 PetscCall(ISDestroy(&is)); 301 } else { 302 /* one ref for the destruction of al2g, one for el2g */ 303 PetscCall(PetscObjectReference((PetscObject)al2g)); 304 PetscCall(PetscObjectReference((PetscObject)al2g)); 305 el2g = al2g; 306 fl2g = NULL; 307 } 308 309 /* Start communication to drop connections for interior edges (for cc analysis only) */ 310 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 311 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 312 if (nedfieldlocal) { 313 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 314 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1; 315 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 316 } else { 317 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1; 318 } 319 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 320 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 321 322 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 323 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G)); 324 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 325 if (global) { 326 PetscInt rst; 327 328 PetscCall(MatGetOwnershipRange(G, &rst, NULL)); 329 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) { 330 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst; 331 } 332 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); 333 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL)); 334 } else { 335 PetscInt *tbz; 336 337 PetscCall(PetscMalloc1(ne, &tbz)); 338 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 339 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 340 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 341 for (i = 0, cum = 0; i < ne; i++) 342 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i; 343 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 344 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz)); 345 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL)); 346 PetscCall(PetscFree(tbz)); 347 } 348 } else { /* we need the entire G to infer the nullspace */ 349 PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient)); 350 G = pcbddc->discretegradient; 351 } 352 353 /* Extract subdomain relevant rows of G */ 354 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs)); 355 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned)); 356 PetscCall(MatAIJExtractRows(G, lned, &lGall)); 357 /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */ 358 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs)); 359 PetscCall(ISDestroy(&lned)); 360 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis)); 361 PetscCall(MatDestroy(&lGall)); 362 PetscCall(MatISGetLocalMat(lGis, &lG)); 363 364 if (matis->allow_repeated) { /* multi-element support */ 365 Mat *lGn, B; 366 IS *is_rows, *tcols, tmap, nmap; 367 PetscInt subnv; 368 const PetscInt *subvidxs; 369 ISLocalToGlobalMapping mapn; 370 371 PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn)); 372 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows)); 373 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols)); 374 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 375 if (fl2g) { 376 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i])); 377 } else { 378 PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i])); 379 is_rows[i] = pcbddc->local_subs[i]; 380 } 381 PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)])); 382 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn)); 383 PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv)); 384 PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs)); 385 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i])); 386 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs)); 387 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 388 } 389 390 /* Create new MATIS with repeated vertices */ 391 PetscCall(MatCreate(comm, &B)); 392 PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N)); 393 PetscCall(MatSetType(B, MATIS)); 394 PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE)); 395 PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap)); 396 PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap)); 397 PetscCall(ISDestroy(&tmap)); 398 PetscCall(ISGetLocalSize(nmap, &subnv)); 399 PetscCall(ISGetIndices(nmap, &subvidxs)); 400 PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap)); 401 PetscCall(ISRestoreIndices(nmap, &subvidxs)); 402 PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn)); 403 PetscCall(ISDestroy(&tmap)); 404 PetscCall(ISDestroy(&nmap)); 405 PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn)); 406 PetscCall(ISLocalToGlobalMappingDestroy(&mapn)); 407 PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG)); 408 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) { 409 PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)])); 410 PetscCall(ISDestroy(&is_rows[i])); 411 PetscCall(ISDestroy(&tcols[i])); 412 } 413 PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG)); 414 PetscCall(PetscFree(lGn)); 415 PetscCall(PetscFree(is_rows)); 416 PetscCall(PetscFree(tcols)); 417 PetscCall(MatISSetLocalMat(B, lG)); 418 PetscCall(MatDestroy(&lG)); 419 420 PetscCall(MatDestroy(&lGis)); 421 lGis = B; 422 } 423 424 /* SF for nodal dofs communications */ 425 PetscCall(MatGetLocalSize(G, NULL, &Lv)); 426 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g)); 427 PetscCall(PetscObjectReference((PetscObject)vl2g)); 428 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv)); 429 PetscCall(PetscSFCreate(comm, &sfv)); 430 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs)); 431 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs)); 432 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs)); 433 i = singular ? 2 : 1; 434 PetscCall(PetscMalloc2(i * nv, &sfvleaves, i * Lv, &sfvroots)); 435 436 /* Destroy temporary G created in MATIS format and modified G */ 437 PetscCall(MatISGetLocalMat(lGis, &lG)); 438 PetscCall(PetscObjectReference((PetscObject)lG)); 439 PetscCall(MatDestroy(&lGis)); 440 PetscCall(MatDestroy(&G)); 441 442 if (print) { 443 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG")); 444 PetscCall(MatView(lG, NULL)); 445 } 446 447 /* Save lG for values insertion in change of basis */ 448 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit)); 449 450 /* Analyze the edge-nodes connections (duplicate lG) */ 451 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe)); 452 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 453 PetscCall(PetscBTCreate(nv, &btv)); 454 PetscCall(PetscBTCreate(ne, &bte)); 455 PetscCall(PetscBTCreate(ne, &btb)); 456 PetscCall(PetscBTCreate(ne, &btbd)); 457 PetscCall(PetscBTCreate(nv, &btvcand)); 458 /* need to import the boundary specification to ensure the 459 proper detection of coarse edges' endpoints */ 460 if (pcbddc->DirichletBoundariesLocal) { 461 IS is; 462 463 if (fl2g) { 464 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is)); 465 } else { 466 is = pcbddc->DirichletBoundariesLocal; 467 } 468 PetscCall(ISGetLocalSize(is, &cum)); 469 PetscCall(ISGetIndices(is, &idxs)); 470 for (i = 0; i < cum; i++) { 471 if (idxs[i] >= 0 && idxs[i] < ne) { 472 PetscCall(PetscBTSet(btb, idxs[i])); 473 PetscCall(PetscBTSet(btbd, idxs[i])); 474 } 475 } 476 PetscCall(ISRestoreIndices(is, &idxs)); 477 if (fl2g) PetscCall(ISDestroy(&is)); 478 } 479 if (pcbddc->NeumannBoundariesLocal) { 480 IS is; 481 482 if (fl2g) { 483 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is)); 484 } else { 485 is = pcbddc->NeumannBoundariesLocal; 486 } 487 PetscCall(ISGetLocalSize(is, &cum)); 488 PetscCall(ISGetIndices(is, &idxs)); 489 for (i = 0; i < cum; i++) { 490 if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i])); 491 } 492 PetscCall(ISRestoreIndices(is, &idxs)); 493 if (fl2g) PetscCall(ISDestroy(&is)); 494 } 495 496 /* Count neighs per dof */ 497 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL)); 498 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL)); 499 500 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 501 for proper detection of coarse edges' endpoints */ 502 PetscCall(PetscBTCreate(ne, &btee)); 503 for (i = 0; i < ne; i++) { 504 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i)); 505 } 506 PetscCall(PetscMalloc1(ne, &marks)); 507 if (!conforming) { 508 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 509 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 510 } 511 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 512 PetscCall(MatSeqAIJGetArray(lGe, &vals)); 513 cum = 0; 514 for (i = 0; i < ne; i++) { 515 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 516 if (!PetscBTLookup(btee, i)) { 517 marks[cum++] = i; 518 continue; 519 } 520 /* set badly connected edge dofs as primal */ 521 if (!conforming) { 522 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 523 marks[cum++] = i; 524 PetscCall(PetscBTSet(bte, i)); 525 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 526 } else { 527 /* every edge dofs should be connected through a certain number of nodal dofs 528 to other edge dofs belonging to coarse edges 529 - at most 2 endpoints 530 - order-1 interior nodal dofs 531 - no undefined nodal dofs (nconn < order) 532 */ 533 PetscInt ends = 0, ints = 0, undef = 0; 534 for (j = ii[i]; j < ii[i + 1]; j++) { 535 PetscInt v = jj[j], k; 536 PetscInt nconn = iit[v + 1] - iit[v]; 537 for (k = iit[v]; k < iit[v + 1]; k++) 538 if (!PetscBTLookup(btee, jjt[k])) nconn--; 539 if (nconn > order) ends++; 540 else if (nconn == order) ints++; 541 else undef++; 542 } 543 if (undef || ends > 2 || ints != order - 1) { 544 marks[cum++] = i; 545 PetscCall(PetscBTSet(bte, i)); 546 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j])); 547 } 548 } 549 } 550 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 551 if (!order && ii[i + 1] != ii[i]) { 552 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1); 553 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val; 554 } 555 } 556 PetscCall(PetscBTDestroy(&btee)); 557 PetscCall(MatSeqAIJRestoreArray(lGe, &vals)); 558 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 559 if (!conforming) { 560 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 561 PetscCall(MatDestroy(&lGt)); 562 } 563 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL)); 564 565 if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */ 566 PetscSF emlsf, vmlsf; 567 PetscInt *eleaves, *vleaves, *meleaves, *mvleaves; 568 PetscInt cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl; 569 570 PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs)); 571 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 572 PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs)); 573 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded"); 574 575 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf)); 576 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf)); 577 578 PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL)); 579 for (i = 0, j = 0; i < ne; i++) j += ecount[i]; 580 PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne); 581 PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j); 582 583 PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL)); 584 for (i = 0, j = 0; i < nv; i++) j += vcount[i]; 585 PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv); 586 PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j); 587 588 PetscCall(PetscMalloc1(ne, &eleaves)); 589 PetscCall(PetscMalloc1(nv, &vleaves)); 590 for (i = 0; i < ne; i++) eleaves[i] = -1; 591 for (i = 0; i < nv; i++) vleaves[i] = -1; 592 PetscCall(PetscMalloc1(emnl, &meleaves)); 593 PetscCall(PetscMalloc1(vmnl, &mvleaves)); 594 595 PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm)); 596 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 597 for (i = 0; i < n_subs; i++) { 598 const PetscInt *idxs; 599 const PetscInt subid = cum_subs + i; 600 PetscInt ns; 601 602 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns)); 603 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 604 for (j = 0; j < ns; j++) { 605 const PetscInt e = idxs[j]; 606 607 eleaves[e] = subid; 608 for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid; 609 } 610 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 611 } 612 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 613 PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 614 PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE)); 615 PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 616 PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE)); 617 PetscCall(PetscFree(eleaves)); 618 PetscCall(PetscFree(vleaves)); 619 620 PetscCall(PetscMalloc1(ne + 1, &eneighs)); 621 eneighs[0] = meleaves; 622 for (i = 1; i < ne; i++) { 623 PetscCall(PetscSortInt(ecount[i - 1], eneighs[i - 1])); 624 eneighs[i] = eneighs[i - 1] + ecount[i - 1]; 625 } 626 PetscCall(PetscMalloc1(nv + 1, &vneighs)); 627 vneighs[0] = mvleaves; 628 for (i = 1; i < nv; i++) { 629 PetscCall(PetscSortInt(vcount[i - 1], vneighs[i - 1])); 630 vneighs[i] = vneighs[i - 1] + vcount[i - 1]; 631 } 632 } else { 633 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs)); 634 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs)); 635 } 636 637 /* identify splitpoints and corner candidates */ 638 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt)); 639 if (print) { 640 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG")); 641 PetscCall(MatView(lGe, NULL)); 642 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt")); 643 PetscCall(MatView(lGt, NULL)); 644 } 645 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 646 PetscCall(MatSeqAIJGetArray(lGt, &vals)); 647 for (i = 0; i < nv; i++) { 648 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i]; 649 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 650 if (!order) { /* variable order */ 651 PetscReal vorder = 0.; 652 653 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]); 654 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON); 655 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test); 656 ord = 1; 657 } 658 PetscAssert(test % ord == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT, test, i, ord); 659 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) { 660 const PetscInt e = jj[j]; 661 662 if (PetscBTLookup(btbd, e)) { 663 bdir = PETSC_TRUE; 664 break; 665 } 666 if (vc != ecount[e]) { 667 sneighs = PETSC_FALSE; 668 } else { 669 const PetscInt *vn = vneighs[i], *en = eneighs[e]; 670 671 for (PetscInt k = 0; k < vc; k++) { 672 if (vn[k] != en[k]) { 673 sneighs = PETSC_FALSE; 674 break; 675 } 676 } 677 } 678 } 679 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */ 680 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir])); 681 PetscCall(PetscBTSet(btv, i)); 682 } else if (test == ord) { 683 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) { 684 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i)); 685 PetscCall(PetscBTSet(btv, i)); 686 } else { 687 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i)); 688 PetscCall(PetscBTSet(btvcand, i)); 689 } 690 } 691 } 692 PetscCall(PetscBTDestroy(&btbd)); 693 694 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 695 if (order != 1) { 696 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n")); 697 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 698 for (i = 0; i < nv; i++) { 699 if (PetscBTLookup(btvcand, i)) { 700 PetscBool found = PETSC_FALSE; 701 for (j = ii[i]; j < ii[i + 1] && !found; j++) { 702 PetscInt k, e = jj[j]; 703 if (PetscBTLookup(bte, e)) continue; 704 for (k = iit[e]; k < iit[e + 1]; k++) { 705 PetscInt v = jjt[k]; 706 if (v != i && PetscBTLookup(btvcand, v)) { 707 found = PETSC_TRUE; 708 break; 709 } 710 } 711 } 712 if (!found) { 713 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i)); 714 PetscCall(PetscBTClear(btvcand, i)); 715 } else { 716 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i)); 717 } 718 } 719 } 720 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 721 } 722 PetscCall(MatSeqAIJRestoreArray(lGt, &vals)); 723 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 724 PetscCall(MatDestroy(&lGe)); 725 726 /* Get the local G^T explicitly */ 727 PetscCall(MatDestroy(&lGt)); 728 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 729 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 730 731 /* Mark shared nodal dofs */ 732 PetscCall(PetscBTCreate(nv, &btvi)); 733 for (i = 0; i < nv; i++) { 734 if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i)); 735 } 736 737 if (matis->allow_repeated) { 738 PetscCall(PetscFree(eneighs[0])); 739 PetscCall(PetscFree(vneighs[0])); 740 PetscCall(PetscFree(eneighs)); 741 PetscCall(PetscFree(vneighs)); 742 } 743 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs)); 744 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs)); 745 746 /* communicate corners and splitpoints */ 747 PetscCall(PetscMalloc1(nv, &vmarks)); 748 PetscCall(PetscArrayzero(sfvleaves, nv)); 749 PetscCall(PetscArrayzero(sfvroots, Lv)); 750 for (i = 0; i < nv; i++) 751 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1; 752 753 if (print) { 754 IS tbz; 755 756 cum = 0; 757 for (i = 0; i < nv; i++) 758 if (sfvleaves[i]) vmarks[cum++] = i; 759 760 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 761 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local")); 762 PetscCall(ISView(tbz, NULL)); 763 PetscCall(ISDestroy(&tbz)); 764 } 765 766 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 767 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM)); 768 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 769 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE)); 770 771 /* Zero rows of lGt corresponding to identified corners 772 and interior nodal dofs */ 773 cum = 0; 774 for (i = 0; i < nv; i++) { 775 if (sfvleaves[i]) { 776 vmarks[cum++] = i; 777 PetscCall(PetscBTSet(btv, i)); 778 } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i; 779 } 780 PetscCall(PetscBTDestroy(&btvi)); 781 if (print) { 782 IS tbz; 783 784 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz)); 785 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior")); 786 PetscCall(ISView(tbz, NULL)); 787 PetscCall(ISDestroy(&tbz)); 788 } 789 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL)); 790 PetscCall(PetscFree(vmarks)); 791 PetscCall(PetscSFDestroy(&sfv)); 792 PetscCall(PetscFree2(sfvleaves, sfvroots)); 793 794 /* Recompute G */ 795 PetscCall(MatDestroy(&lG)); 796 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG)); 797 if (print) { 798 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG")); 799 PetscCall(MatView(lG, NULL)); 800 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt")); 801 PetscCall(MatView(lGt, NULL)); 802 } 803 804 /* Get primal dofs (if any) */ 805 cum = 0; 806 for (i = 0; i < ne; i++) { 807 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i; 808 } 809 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks)); 810 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals)); 811 if (print) { 812 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs")); 813 PetscCall(ISView(primals, NULL)); 814 } 815 PetscCall(PetscBTDestroy(&bte)); 816 /* TODO: what if the user passed in some of them ? */ 817 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 818 PetscCall(ISDestroy(&primals)); 819 820 /* Compute edge connectivity */ 821 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_")); 822 823 /* Symbolic conn = lG*lGt */ 824 PetscCall(MatProductCreate(lG, lGt, NULL, &conn)); 825 PetscCall(MatProductSetType(conn, MATPRODUCT_AB)); 826 PetscCall(MatProductSetAlgorithm(conn, "default")); 827 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT)); 828 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_")); 829 PetscCall(MatProductSetFromOptions(conn)); 830 PetscCall(MatProductSymbolic(conn)); 831 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 832 if (fl2g) { 833 PetscBT btf; 834 PetscInt *iia, *jja, *iiu, *jju; 835 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE; 836 837 /* create CSR for all local dofs */ 838 PetscCall(PetscMalloc1(n + 1, &iia)); 839 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 840 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n); 841 iiu = pcbddc->mat_graph->xadj; 842 jju = pcbddc->mat_graph->adjncy; 843 } else if (pcbddc->use_local_adj) { 844 rest = PETSC_TRUE; 845 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 846 } else { 847 free = PETSC_TRUE; 848 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju)); 849 iiu[0] = 0; 850 for (i = 0; i < n; i++) { 851 iiu[i + 1] = i + 1; 852 jju[i] = -1; 853 } 854 } 855 856 /* import sizes of CSR */ 857 iia[0] = 0; 858 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i]; 859 860 /* overwrite entries corresponding to the Nedelec field */ 861 PetscCall(PetscBTCreate(n, &btf)); 862 PetscCall(ISGetIndices(nedfieldlocal, &idxs)); 863 for (i = 0; i < ne; i++) { 864 PetscCall(PetscBTSet(btf, idxs[i])); 865 iia[idxs[i] + 1] = ii[i + 1] - ii[i]; 866 } 867 868 /* iia in CSR */ 869 for (i = 0; i < n; i++) iia[i + 1] += iia[i]; 870 871 /* jja in CSR */ 872 PetscCall(PetscMalloc1(iia[n], &jja)); 873 for (i = 0; i < n; i++) 874 if (!PetscBTLookup(btf, i)) 875 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j]; 876 877 /* map edge dofs connectivity */ 878 if (jj) { 879 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj)); 880 for (i = 0; i < ne; i++) { 881 PetscInt e = idxs[i]; 882 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j]; 883 } 884 } 885 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs)); 886 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_OWN_POINTER)); 887 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done)); 888 if (free) PetscCall(PetscFree2(iiu, jju)); 889 PetscCall(PetscBTDestroy(&btf)); 890 } else { 891 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_USE_POINTER)); 892 } 893 894 /* Analyze interface for edge dofs */ 895 PetscCall(PCBDDCAnalyzeInterface(pc)); 896 pcbddc->mat_graph->twodim = PETSC_FALSE; 897 898 /* Get coarse edges in the edge space */ 899 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 900 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 901 902 if (fl2g) { 903 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 904 PetscCall(PetscMalloc1(nee, &eedges)); 905 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 906 } else { 907 eedges = alleedges; 908 primals = allprimals; 909 } 910 911 /* Mark fine edge dofs with their coarse edge id */ 912 PetscCall(PetscArrayzero(marks, ne)); 913 PetscCall(ISGetLocalSize(primals, &cum)); 914 PetscCall(ISGetIndices(primals, &idxs)); 915 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1; 916 PetscCall(ISRestoreIndices(primals, &idxs)); 917 if (print) { 918 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs")); 919 PetscCall(ISView(primals, NULL)); 920 } 921 922 maxsize = 0; 923 for (i = 0; i < nee; i++) { 924 PetscInt size, mark = i + 1; 925 926 PetscCall(ISGetLocalSize(eedges[i], &size)); 927 PetscCall(ISGetIndices(eedges[i], &idxs)); 928 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 929 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 930 maxsize = PetscMax(maxsize, size); 931 } 932 933 /* Find coarse edge endpoints */ 934 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 935 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 936 for (i = 0; i < nee; i++) { 937 PetscInt mark = i + 1, size; 938 939 PetscCall(ISGetLocalSize(eedges[i], &size)); 940 if (!size && nedfieldlocal) continue; 941 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 942 PetscCall(ISGetIndices(eedges[i], &idxs)); 943 if (print) { 944 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i)); 945 PetscCall(ISView(eedges[i], NULL)); 946 } 947 for (j = 0; j < size; j++) { 948 PetscInt k, ee = idxs[j]; 949 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee)); 950 for (k = ii[ee]; k < ii[ee + 1]; k++) { 951 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k])); 952 if (PetscBTLookup(btv, jj[k])) { 953 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k])); 954 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */ 955 PetscInt k2; 956 PetscBool corner = PETSC_FALSE; 957 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) { 958 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]))); 959 /* it's a corner if either is connected with an edge dof belonging to a different cc or 960 if the edge dof lie on the natural part of the boundary */ 961 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) { 962 corner = PETSC_TRUE; 963 break; 964 } 965 } 966 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 967 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k])); 968 PetscCall(PetscBTSet(btv, jj[k])); 969 } else { 970 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n")); 971 } 972 } 973 } 974 } 975 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 976 } 977 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 978 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 979 PetscCall(PetscBTDestroy(&btb)); 980 981 /* Reset marked primal dofs */ 982 PetscCall(ISGetLocalSize(primals, &cum)); 983 PetscCall(ISGetIndices(primals, &idxs)); 984 for (i = 0; i < cum; i++) marks[idxs[i]] = 0; 985 PetscCall(ISRestoreIndices(primals, &idxs)); 986 987 /* Now use the initial lG */ 988 PetscCall(MatDestroy(&lG)); 989 PetscCall(MatDestroy(&lGt)); 990 lG = lGinit; 991 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt)); 992 993 /* Compute extended cols indices */ 994 PetscCall(PetscBTCreate(nv, &btvc)); 995 PetscCall(PetscBTCreate(nee, &bter)); 996 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 997 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i)); 998 i *= maxsize; 999 PetscCall(PetscCalloc1(nee, &extcols)); 1000 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs)); 1001 eerr = PETSC_FALSE; 1002 for (i = 0; i < nee; i++) { 1003 PetscInt size, found = 0; 1004 1005 cum = 0; 1006 PetscCall(ISGetLocalSize(eedges[i], &size)); 1007 if (!size && nedfieldlocal) continue; 1008 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1009 PetscCall(ISGetIndices(eedges[i], &idxs)); 1010 PetscCall(PetscBTMemzero(nv, btvc)); 1011 for (j = 0; j < size; j++) { 1012 PetscInt k, ee = idxs[j]; 1013 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1014 PetscInt vv = jj[k]; 1015 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv; 1016 else if (!PetscBTLookupSet(btvc, vv)) found++; 1017 } 1018 } 1019 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1020 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1021 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1022 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1023 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1024 /* it may happen that endpoints are not defined at this point 1025 if it is the case, mark this edge for a second pass */ 1026 if (cum != size - 1 || found != 2) { 1027 PetscCall(PetscBTSet(bter, i)); 1028 if (print) { 1029 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge")); 1030 PetscCall(ISView(eedges[i], NULL)); 1031 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol")); 1032 PetscCall(ISView(extcols[i], NULL)); 1033 } 1034 eerr = PETSC_TRUE; 1035 } 1036 } 1037 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 1038 PetscCall(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm)); 1039 if (done) { 1040 PetscInt *newprimals; 1041 1042 PetscCall(PetscMalloc1(ne, &newprimals)); 1043 PetscCall(ISGetLocalSize(primals, &cum)); 1044 PetscCall(ISGetIndices(primals, &idxs)); 1045 PetscCall(PetscArraycpy(newprimals, idxs, cum)); 1046 PetscCall(ISRestoreIndices(primals, &idxs)); 1047 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1048 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr])); 1049 for (i = 0; i < nee; i++) { 1050 PetscBool has_candidates = PETSC_FALSE; 1051 if (PetscBTLookup(bter, i)) { 1052 PetscInt size, mark = i + 1; 1053 1054 PetscCall(ISGetLocalSize(eedges[i], &size)); 1055 PetscCall(ISGetIndices(eedges[i], &idxs)); 1056 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1057 for (j = 0; j < size; j++) { 1058 PetscInt k, ee = idxs[j]; 1059 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1])); 1060 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1061 /* set all candidates located on the edge as corners */ 1062 if (PetscBTLookup(btvcand, jj[k])) { 1063 PetscInt k2, vv = jj[k]; 1064 has_candidates = PETSC_TRUE; 1065 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv)); 1066 PetscCall(PetscBTSet(btv, vv)); 1067 /* set all edge dofs connected to candidate as primals */ 1068 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) { 1069 if (marks[jjt[k2]] == mark) { 1070 PetscInt k3, ee2 = jjt[k2]; 1071 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2)); 1072 newprimals[cum++] = ee2; 1073 /* finally set the new corners */ 1074 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) { 1075 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3])); 1076 PetscCall(PetscBTSet(btv, jj[k3])); 1077 } 1078 } 1079 } 1080 } else { 1081 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k])); 1082 } 1083 } 1084 } 1085 if (!has_candidates) { /* circular edge */ 1086 PetscInt k, ee = idxs[0], *tmarks; 1087 1088 PetscCall(PetscCalloc1(ne, &tmarks)); 1089 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i)); 1090 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1091 PetscInt k2; 1092 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k])); 1093 PetscCall(PetscBTSet(btv, jj[k])); 1094 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++; 1095 } 1096 for (j = 0; j < size; j++) { 1097 if (tmarks[idxs[j]] > 1) { 1098 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j])); 1099 newprimals[cum++] = idxs[j]; 1100 } 1101 } 1102 PetscCall(PetscFree(tmarks)); 1103 } 1104 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1105 } 1106 PetscCall(ISDestroy(&extcols[i])); 1107 } 1108 PetscCall(PetscFree(extcols)); 1109 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done)); 1110 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals)); 1111 if (fl2g) { 1112 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals)); 1113 PetscCall(ISDestroy(&primals)); 1114 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1115 PetscCall(PetscFree(eedges)); 1116 } 1117 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1118 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals)); 1119 PetscCall(PetscFree(newprimals)); 1120 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals)); 1121 PetscCall(ISDestroy(&primals)); 1122 PetscCall(PCBDDCAnalyzeInterface(pc)); 1123 pcbddc->mat_graph->twodim = PETSC_FALSE; 1124 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1125 if (fl2g) { 1126 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals)); 1127 PetscCall(PetscMalloc1(nee, &eedges)); 1128 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i])); 1129 } else { 1130 eedges = alleedges; 1131 primals = allprimals; 1132 } 1133 PetscCall(PetscCalloc1(nee, &extcols)); 1134 1135 /* Mark again */ 1136 PetscCall(PetscArrayzero(marks, ne)); 1137 for (i = 0; i < nee; i++) { 1138 PetscInt size, mark = i + 1; 1139 1140 PetscCall(ISGetLocalSize(eedges[i], &size)); 1141 PetscCall(ISGetIndices(eedges[i], &idxs)); 1142 for (j = 0; j < size; j++) marks[idxs[j]] = mark; 1143 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1144 } 1145 if (print) { 1146 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass")); 1147 PetscCall(ISView(primals, NULL)); 1148 } 1149 1150 /* Recompute extended cols */ 1151 eerr = PETSC_FALSE; 1152 for (i = 0; i < nee; i++) { 1153 PetscInt size; 1154 1155 cum = 0; 1156 PetscCall(ISGetLocalSize(eedges[i], &size)); 1157 if (!size && nedfieldlocal) continue; 1158 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1159 PetscCall(ISGetIndices(eedges[i], &idxs)); 1160 for (j = 0; j < size; j++) { 1161 PetscInt k, ee = idxs[j]; 1162 for (k = ii[ee]; k < ii[ee + 1]; k++) 1163 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k]; 1164 } 1165 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1166 PetscCall(PetscSortRemoveDupsInt(&cum, extrow)); 1167 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs)); 1168 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow)); 1169 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i])); 1170 if (cum != size - 1) { 1171 if (print) { 1172 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass")); 1173 PetscCall(ISView(eedges[i], NULL)); 1174 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass")); 1175 PetscCall(ISView(extcols[i], NULL)); 1176 } 1177 eerr = PETSC_TRUE; 1178 } 1179 } 1180 } 1181 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1182 PetscCall(PetscFree2(extrow, gidxs)); 1183 PetscCall(PetscBTDestroy(&bter)); 1184 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF)); 1185 /* an error should not occur at this point */ 1186 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1187 1188 /* Check the number of endpoints */ 1189 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1190 PetscCall(PetscMalloc1(2 * nee, &corners)); 1191 PetscCall(PetscMalloc1(nee, &cedges)); 1192 for (i = 0; i < nee; i++) { 1193 PetscInt size, found = 0, gc[2]; 1194 1195 /* init with defaults */ 1196 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1; 1197 PetscCall(ISGetLocalSize(eedges[i], &size)); 1198 if (!size && nedfieldlocal) continue; 1199 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i); 1200 PetscCall(ISGetIndices(eedges[i], &idxs)); 1201 PetscCall(PetscBTMemzero(nv, btvc)); 1202 for (j = 0; j < size; j++) { 1203 PetscInt k, ee = idxs[j]; 1204 for (k = ii[ee]; k < ii[ee + 1]; k++) { 1205 PetscInt vv = jj[k]; 1206 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) { 1207 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i); 1208 corners[i * 2 + found++] = vv; 1209 } 1210 } 1211 } 1212 if (found != 2) { 1213 PetscInt e; 1214 if (fl2g) { 1215 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e)); 1216 } else { 1217 e = idxs[0]; 1218 } 1219 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]); 1220 } 1221 1222 /* get primal dof index on this coarse edge */ 1223 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc)); 1224 if (gc[0] > gc[1]) { 1225 PetscInt swap = corners[2 * i]; 1226 corners[2 * i] = corners[2 * i + 1]; 1227 corners[2 * i + 1] = swap; 1228 } 1229 cedges[i] = idxs[size - 1]; 1230 PetscCall(ISRestoreIndices(eedges[i], &idxs)); 1231 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1])); 1232 } 1233 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1234 PetscCall(PetscBTDestroy(&btvc)); 1235 1236 if (PetscDefined(USE_DEBUG)) { 1237 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1238 not interfere with neighbouring coarse edges */ 1239 PetscCall(PetscMalloc1(nee + 1, &emarks)); 1240 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1241 for (i = 0; i < nv; i++) { 1242 PetscInt emax = 0, eemax = 0; 1243 1244 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1245 PetscCall(PetscArrayzero(emarks, nee + 1)); 1246 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++; 1247 for (j = 1; j < nee + 1; j++) { 1248 if (emax < emarks[j]) { 1249 emax = emarks[j]; 1250 eemax = j; 1251 } 1252 } 1253 /* not relevant for edges */ 1254 if (!eemax) continue; 1255 1256 for (j = ii[i]; j < ii[i + 1]; j++) { 1257 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]); 1258 } 1259 } 1260 PetscCall(PetscFree(emarks)); 1261 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1262 } 1263 1264 /* Compute extended rows indices for edge blocks of the change of basis */ 1265 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1266 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem)); 1267 extmem *= maxsize; 1268 PetscCall(PetscMalloc1(extmem * nee, &extrow)); 1269 PetscCall(PetscMalloc1(nee, &extrows)); 1270 PetscCall(PetscCalloc1(nee, &extrowcum)); 1271 for (i = 0; i < nv; i++) { 1272 PetscInt mark = 0, size, start; 1273 1274 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue; 1275 for (j = ii[i]; j < ii[i + 1]; j++) 1276 if (marks[jj[j]] && !mark) mark = marks[jj[j]]; 1277 1278 /* not relevant */ 1279 if (!mark) continue; 1280 1281 /* import extended row */ 1282 mark--; 1283 start = mark * extmem + extrowcum[mark]; 1284 size = ii[i + 1] - ii[i]; 1285 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem); 1286 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size)); 1287 extrowcum[mark] += size; 1288 } 1289 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done)); 1290 PetscCall(MatDestroy(&lGt)); 1291 PetscCall(PetscFree(marks)); 1292 1293 /* Compress extrows */ 1294 cum = 0; 1295 for (i = 0; i < nee; i++) { 1296 PetscInt size = extrowcum[i], *start = extrow + i * extmem; 1297 PetscCall(PetscSortRemoveDupsInt(&size, start)); 1298 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i])); 1299 cum = PetscMax(cum, size); 1300 } 1301 PetscCall(PetscFree(extrowcum)); 1302 PetscCall(PetscBTDestroy(&btv)); 1303 PetscCall(PetscBTDestroy(&btvcand)); 1304 1305 /* Workspace for lapack inner calls and VecSetValues */ 1306 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork)); 1307 1308 /* Create change of basis matrix (preallocation can be improved) */ 1309 PetscCall(MatCreate(comm, &T)); 1310 PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap)); 1311 PetscCall(MatSetType(T, MATAIJ)); 1312 PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL)); 1313 PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL)); 1314 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g)); 1315 PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1316 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE)); 1317 PetscCall(ISLocalToGlobalMappingDestroy(&al2g)); 1318 1319 /* Defaults to identity */ 1320 for (i = pc->mat->rmap->rstart; i < pc->mat->rmap->rend; i++) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES)); 1321 1322 /* Create discrete gradient for the coarser level if needed */ 1323 PetscCall(MatDestroy(&pcbddc->nedcG)); 1324 PetscCall(ISDestroy(&pcbddc->nedclocal)); 1325 if (pcbddc->current_level < pcbddc->max_levels) { 1326 ISLocalToGlobalMapping cel2g, cvl2g; 1327 IS wis, gwis; 1328 PetscInt cnv, cne; 1329 1330 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis)); 1331 if (fl2g) { 1332 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal)); 1333 } else { 1334 PetscCall(PetscObjectReference((PetscObject)wis)); 1335 pcbddc->nedclocal = wis; 1336 } 1337 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis)); 1338 PetscCall(ISDestroy(&wis)); 1339 PetscCall(ISRenumber(gwis, NULL, &cne, &wis)); 1340 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g)); 1341 PetscCall(ISDestroy(&wis)); 1342 PetscCall(ISDestroy(&gwis)); 1343 1344 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis)); 1345 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis)); 1346 PetscCall(ISDestroy(&wis)); 1347 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis)); 1348 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g)); 1349 PetscCall(ISDestroy(&wis)); 1350 PetscCall(ISDestroy(&gwis)); 1351 1352 PetscCall(MatCreate(comm, &pcbddc->nedcG)); 1353 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv)); 1354 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ)); 1355 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL)); 1356 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL)); 1357 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g)); 1358 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g)); 1359 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g)); 1360 } 1361 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g)); 1362 1363 #if defined(PRINT_GDET) 1364 inc = 0; 1365 lev = pcbddc->current_level; 1366 #endif 1367 1368 /* Insert values in the change of basis matrix */ 1369 for (i = 0; i < nee; i++) { 1370 Mat Gins = NULL, GKins = NULL; 1371 IS cornersis = NULL; 1372 PetscScalar cvals[2]; 1373 1374 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis)); 1375 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork)); 1376 if (Gins && GKins) { 1377 const PetscScalar *data; 1378 const PetscInt *rows, *cols; 1379 PetscInt nrh, nch, nrc, ncc; 1380 1381 PetscCall(ISGetIndices(eedges[i], &cols)); 1382 /* H1 */ 1383 PetscCall(ISGetIndices(extrows[i], &rows)); 1384 PetscCall(MatGetSize(Gins, &nrh, &nch)); 1385 PetscCall(MatDenseGetArrayRead(Gins, &data)); 1386 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES)); 1387 PetscCall(MatDenseRestoreArrayRead(Gins, &data)); 1388 PetscCall(ISRestoreIndices(extrows[i], &rows)); 1389 /* complement */ 1390 PetscCall(MatGetSize(GKins, &nrc, &ncc)); 1391 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i); 1392 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i); 1393 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc); 1394 PetscCall(MatDenseGetArrayRead(GKins, &data)); 1395 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES)); 1396 PetscCall(MatDenseRestoreArrayRead(GKins, &data)); 1397 1398 /* coarse discrete gradient */ 1399 if (pcbddc->nedcG) { 1400 PetscInt cols[2]; 1401 1402 cols[0] = 2 * i; 1403 cols[1] = 2 * i + 1; 1404 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES)); 1405 } 1406 PetscCall(ISRestoreIndices(eedges[i], &cols)); 1407 } 1408 PetscCall(ISDestroy(&extrows[i])); 1409 PetscCall(ISDestroy(&extcols[i])); 1410 PetscCall(ISDestroy(&cornersis)); 1411 PetscCall(MatDestroy(&Gins)); 1412 PetscCall(MatDestroy(&GKins)); 1413 } 1414 PetscCall(ISLocalToGlobalMappingDestroy(&el2g)); 1415 1416 /* Start assembling */ 1417 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY)); 1418 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1419 1420 /* Free */ 1421 if (fl2g) { 1422 PetscCall(ISDestroy(&primals)); 1423 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i])); 1424 PetscCall(PetscFree(eedges)); 1425 } 1426 1427 /* hack mat_graph with primal dofs on the coarse edges */ 1428 { 1429 PCBDDCGraph graph = pcbddc->mat_graph; 1430 PetscInt *oqueue = graph->queue; 1431 PetscInt *ocptr = graph->cptr; 1432 PetscInt ncc, *idxs; 1433 1434 /* find first primal edge */ 1435 if (pcbddc->nedclocal) { 1436 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1437 } else { 1438 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges)); 1439 idxs = cedges; 1440 } 1441 cum = 0; 1442 while (cum < nee && cedges[cum] < 0) cum++; 1443 1444 /* adapt connected components */ 1445 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue)); 1446 graph->cptr[0] = 0; 1447 for (i = 0, ncc = 0; i < graph->ncc; i++) { 1448 PetscInt lc = ocptr[i + 1] - ocptr[i]; 1449 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */ 1450 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1; 1451 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1452 ncc++; 1453 lc--; 1454 cum++; 1455 while (cum < nee && cedges[cum] < 0) cum++; 1456 } 1457 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc; 1458 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j]; 1459 ncc++; 1460 } 1461 graph->ncc = ncc; 1462 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs)); 1463 PetscCall(PetscFree2(ocptr, oqueue)); 1464 } 1465 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g)); 1466 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals)); 1467 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph)); 1468 PetscCall(MatDestroy(&conn)); 1469 1470 PetscCall(ISDestroy(&nedfieldlocal)); 1471 PetscCall(PetscFree(extrow)); 1472 PetscCall(PetscFree2(work, rwork)); 1473 PetscCall(PetscFree(corners)); 1474 PetscCall(PetscFree(cedges)); 1475 PetscCall(PetscFree(extrows)); 1476 PetscCall(PetscFree(extcols)); 1477 PetscCall(MatDestroy(&lG)); 1478 1479 /* Complete assembling */ 1480 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY)); 1481 PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view")); 1482 if (pcbddc->nedcG) { 1483 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY)); 1484 PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_hange_view")); 1485 } 1486 1487 /* set change of basis */ 1488 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, singular)); 1489 PetscCall(MatDestroy(&T)); 1490 PetscFunctionReturn(PETSC_SUCCESS); 1491 } 1492 1493 /* the near-null space of BDDC carries information on quadrature weights, 1494 and these can be collinear -> so cheat with MatNullSpaceCreate 1495 and create a suitable set of basis vectors first */ 1496 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1497 { 1498 PetscInt i; 1499 1500 PetscFunctionBegin; 1501 for (i = 0; i < nvecs; i++) { 1502 PetscInt first, last; 1503 1504 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1505 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); 1506 if (i >= first && i < last) { 1507 PetscScalar *data; 1508 PetscCall(VecGetArray(quad_vecs[i], &data)); 1509 if (!has_const) { 1510 data[i - first] = 1.; 1511 } else { 1512 data[2 * i - first] = 1. / PetscSqrtReal(2.); 1513 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.); 1514 } 1515 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1516 } 1517 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1518 } 1519 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp)); 1520 for (i = 0; i < nvecs; i++) { /* reset vectors */ 1521 PetscInt first, last; 1522 PetscCall(VecLockReadPop(quad_vecs[i])); 1523 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last)); 1524 if (i >= first && i < last) { 1525 PetscScalar *data; 1526 PetscCall(VecGetArray(quad_vecs[i], &data)); 1527 if (!has_const) { 1528 data[i - first] = 0.; 1529 } else { 1530 data[2 * i - first] = 0.; 1531 data[2 * i - first + 1] = 0.; 1532 } 1533 PetscCall(VecRestoreArray(quad_vecs[i], &data)); 1534 } 1535 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i])); 1536 PetscCall(VecLockReadPush(quad_vecs[i])); 1537 } 1538 PetscFunctionReturn(PETSC_SUCCESS); 1539 } 1540 1541 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1542 { 1543 Mat loc_divudotp; 1544 Vec p, v, quad_vec; 1545 ISLocalToGlobalMapping map; 1546 PetscScalar *array; 1547 1548 PetscFunctionBegin; 1549 PetscCall(MatCreateVecs(A, &quad_vec, NULL)); 1550 if (!transpose) { 1551 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL)); 1552 } else { 1553 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map)); 1554 } 1555 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp)); 1556 PetscCall(VecLockReadPop(quad_vec)); 1557 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map)); 1558 1559 /* compute local quad vec */ 1560 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp)); 1561 if (!transpose) { 1562 PetscCall(MatCreateVecs(loc_divudotp, &v, &p)); 1563 } else { 1564 PetscCall(MatCreateVecs(loc_divudotp, &p, &v)); 1565 } 1566 /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */ 1567 PetscCall(VecSet(p, 1.)); 1568 if (!transpose) { 1569 PetscCall(MatMultTranspose(loc_divudotp, p, v)); 1570 } else { 1571 PetscCall(MatMult(loc_divudotp, p, v)); 1572 } 1573 PetscCall(VecDestroy(&p)); 1574 if (vl2l) { 1575 Mat lA; 1576 VecScatter sc; 1577 Vec vins; 1578 1579 PetscCall(MatISGetLocalMat(A, &lA)); 1580 PetscCall(MatCreateVecs(lA, &vins, NULL)); 1581 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc)); 1582 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1583 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD)); 1584 PetscCall(VecScatterDestroy(&sc)); 1585 PetscCall(VecDestroy(&v)); 1586 v = vins; 1587 } 1588 1589 /* mask summation of interface values */ 1590 PetscInt n, *mmask, *mask, *idxs, nmr, nr; 1591 const PetscInt *degree; 1592 PetscSF msf; 1593 1594 PetscCall(VecGetLocalSize(v, &n)); 1595 PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL)); 1596 PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf)); 1597 PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL)); 1598 PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs)); 1599 PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, °ree)); 1600 PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, °ree)); 1601 for (PetscInt i = 0, c = 0; i < nr; i++) { 1602 mmask[c] = 1; 1603 c += degree[i]; 1604 } 1605 PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1606 PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask)); 1607 PetscCall(VecGetArray(v, &array)); 1608 for (PetscInt i = 0; i < n; i++) { 1609 array[i] *= mask[i]; 1610 idxs[i] = i; 1611 } 1612 PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES)); 1613 PetscCall(VecRestoreArray(v, &array)); 1614 PetscCall(PetscFree3(mmask, mask, idxs)); 1615 PetscCall(VecDestroy(&v)); 1616 PetscCall(VecAssemblyBegin(quad_vec)); 1617 PetscCall(VecAssemblyEnd(quad_vec)); 1618 PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view")); 1619 PetscCall(VecLockReadPush(quad_vec)); 1620 PetscCall(VecDestroy(&quad_vec)); 1621 PetscFunctionReturn(PETSC_SUCCESS); 1622 } 1623 1624 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1625 { 1626 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1627 1628 PetscFunctionBegin; 1629 if (primalv) { 1630 if (pcbddc->user_primal_vertices_local) { 1631 IS list[2], newp; 1632 1633 list[0] = primalv; 1634 list[1] = pcbddc->user_primal_vertices_local; 1635 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp)); 1636 PetscCall(ISSortRemoveDups(newp)); 1637 PetscCall(ISDestroy(&list[1])); 1638 pcbddc->user_primal_vertices_local = newp; 1639 } else { 1640 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv)); 1641 } 1642 } 1643 PetscFunctionReturn(PETSC_SUCCESS); 1644 } 1645 1646 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1647 { 1648 PetscInt f, *comp = (PetscInt *)ctx; 1649 1650 PetscFunctionBegin; 1651 for (f = 0; f < Nf; f++) out[f] = X[*comp]; 1652 PetscFunctionReturn(PETSC_SUCCESS); 1653 } 1654 1655 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1656 { 1657 Vec local, global; 1658 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1659 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1660 PetscBool monolithic = PETSC_FALSE; 1661 1662 PetscFunctionBegin; 1663 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC"); 1664 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL)); 1665 PetscOptionsEnd(); 1666 /* need to convert from global to local topology information and remove references to information in global ordering */ 1667 PetscCall(MatCreateVecs(pc->pmat, &global, NULL)); 1668 PetscCall(MatCreateVecs(matis->A, &local, NULL)); 1669 PetscCall(VecBindToCPU(global, PETSC_TRUE)); 1670 PetscCall(VecBindToCPU(local, PETSC_TRUE)); 1671 if (monolithic) { /* just get block size to properly compute vertices */ 1672 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size)); 1673 goto boundary; 1674 } 1675 1676 if (pcbddc->user_provided_isfordofs) { 1677 if (pcbddc->n_ISForDofs) { 1678 PetscInt i; 1679 1680 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal)); 1681 for (i = 0; i < pcbddc->n_ISForDofs; i++) { 1682 PetscInt bs; 1683 1684 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i])); 1685 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs)); 1686 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1687 PetscCall(ISDestroy(&pcbddc->ISForDofs[i])); 1688 } 1689 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1690 pcbddc->n_ISForDofs = 0; 1691 PetscCall(PetscFree(pcbddc->ISForDofs)); 1692 } 1693 } else { 1694 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1695 DM dm; 1696 1697 PetscCall(MatGetDM(pc->pmat, &dm)); 1698 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1699 if (dm) { 1700 IS *fields; 1701 PetscInt nf, i; 1702 1703 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL)); 1704 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal)); 1705 for (i = 0; i < nf; i++) { 1706 PetscInt bs; 1707 1708 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i])); 1709 PetscCall(ISGetBlockSize(fields[i], &bs)); 1710 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs)); 1711 PetscCall(ISDestroy(&fields[i])); 1712 } 1713 PetscCall(PetscFree(fields)); 1714 pcbddc->n_ISForDofsLocal = nf; 1715 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1716 PetscContainer c; 1717 1718 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c)); 1719 if (c) { 1720 MatISLocalFields lf; 1721 PetscCall(PetscContainerGetPointer(c, (void **)&lf)); 1722 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf)); 1723 } else { /* fallback, create the default fields if bs > 1 */ 1724 PetscInt i, n = matis->A->rmap->n; 1725 PetscCall(MatGetBlockSize(pc->pmat, &i)); 1726 if (i > 1) { 1727 pcbddc->n_ISForDofsLocal = i; 1728 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal)); 1729 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i])); 1730 } 1731 } 1732 } 1733 } else { 1734 PetscInt i; 1735 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i])); 1736 } 1737 } 1738 1739 boundary: 1740 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1741 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal)); 1742 } else if (pcbddc->DirichletBoundariesLocal) { 1743 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal)); 1744 } 1745 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1746 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal)); 1747 } else if (pcbddc->NeumannBoundariesLocal) { 1748 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal)); 1749 } 1750 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local)); 1751 PetscCall(VecDestroy(&global)); 1752 PetscCall(VecDestroy(&local)); 1753 /* detect local disconnected subdomains if requested or needed */ 1754 if (pcbddc->detect_disconnected || matis->allow_repeated) { 1755 IS primalv = NULL; 1756 PetscInt i; 1757 PetscBool filter = pcbddc->detect_disconnected_filter; 1758 1759 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 1760 PetscCall(PetscFree(pcbddc->local_subs)); 1761 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv)); 1762 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv)); 1763 PetscCall(ISDestroy(&primalv)); 1764 } 1765 /* early stage corner detection */ 1766 { 1767 DM dm; 1768 1769 PetscCall(MatGetDM(pc->pmat, &dm)); 1770 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1771 if (dm) { 1772 PetscBool isda; 1773 1774 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda)); 1775 if (isda) { 1776 ISLocalToGlobalMapping l2l; 1777 IS corners; 1778 Mat lA; 1779 PetscBool gl, lo; 1780 1781 { 1782 Vec cvec; 1783 const PetscScalar *coords; 1784 PetscInt dof, n, cdim; 1785 PetscBool memc = PETSC_TRUE; 1786 1787 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1788 PetscCall(DMGetCoordinates(dm, &cvec)); 1789 PetscCall(VecGetLocalSize(cvec, &n)); 1790 PetscCall(VecGetBlockSize(cvec, &cdim)); 1791 n /= cdim; 1792 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 1793 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords)); 1794 PetscCall(VecGetArrayRead(cvec, &coords)); 1795 #if defined(PETSC_USE_COMPLEX) 1796 memc = PETSC_FALSE; 1797 #endif 1798 if (dof != 1) memc = PETSC_FALSE; 1799 if (memc) { 1800 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof)); 1801 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */ 1802 PetscReal *bcoords = pcbddc->mat_graph->coords; 1803 PetscInt i, b, d; 1804 1805 for (i = 0; i < n; i++) { 1806 for (b = 0; b < dof; b++) { 1807 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]); 1808 } 1809 } 1810 } 1811 PetscCall(VecRestoreArrayRead(cvec, &coords)); 1812 pcbddc->mat_graph->cdim = cdim; 1813 pcbddc->mat_graph->cnloc = dof * n; 1814 pcbddc->mat_graph->cloc = PETSC_FALSE; 1815 } 1816 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners)); 1817 PetscCall(MatISGetLocalMat(pc->pmat, &lA)); 1818 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL)); 1819 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA)); 1820 lo = (PetscBool)(l2l && corners); 1821 PetscCall(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 1822 if (gl) { /* From PETSc's DMDA */ 1823 const PetscInt *idx; 1824 PetscInt dof, bs, *idxout, n; 1825 1826 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL)); 1827 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs)); 1828 PetscCall(ISGetLocalSize(corners, &n)); 1829 PetscCall(ISGetIndices(corners, &idx)); 1830 if (bs == dof) { 1831 PetscCall(PetscMalloc1(n, &idxout)); 1832 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout)); 1833 } else { /* the original DMDA local-to-local map have been modified */ 1834 PetscInt i, d; 1835 1836 PetscCall(PetscMalloc1(dof * n, &idxout)); 1837 for (i = 0; i < n; i++) 1838 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d; 1839 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout)); 1840 1841 bs = 1; 1842 n *= dof; 1843 } 1844 PetscCall(ISRestoreIndices(corners, &idx)); 1845 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1846 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners)); 1847 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners)); 1848 PetscCall(ISDestroy(&corners)); 1849 pcbddc->corner_selected = PETSC_TRUE; 1850 pcbddc->corner_selection = PETSC_TRUE; 1851 } 1852 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners)); 1853 } 1854 } 1855 } 1856 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1857 DM dm; 1858 1859 PetscCall(MatGetDM(pc->pmat, &dm)); 1860 if (!dm) PetscCall(PCGetDM(pc, &dm)); 1861 if (dm) { /* this can get very expensive, I need to find a faster alternative */ 1862 Vec vcoords; 1863 PetscSection section; 1864 PetscReal *coords; 1865 PetscInt d, cdim, nl, nf, **ctxs; 1866 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1867 /* debug coordinates */ 1868 PetscViewer viewer; 1869 PetscBool flg; 1870 PetscViewerFormat format; 1871 const char *prefix; 1872 1873 PetscCall(DMGetCoordinateDim(dm, &cdim)); 1874 PetscCall(DMGetLocalSection(dm, §ion)); 1875 PetscCall(PetscSectionGetNumFields(section, &nf)); 1876 PetscCall(DMCreateGlobalVector(dm, &vcoords)); 1877 PetscCall(VecGetLocalSize(vcoords, &nl)); 1878 PetscCall(PetscMalloc1(nl * cdim, &coords)); 1879 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs)); 1880 PetscCall(PetscMalloc1(nf, &ctxs[0])); 1881 for (d = 0; d < nf; d++) funcs[d] = func_coords_private; 1882 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1; 1883 1884 /* debug coordinates */ 1885 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 1886 PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg)); 1887 if (flg) PetscCall(PetscViewerPushFormat(viewer, format)); 1888 for (d = 0; d < cdim; d++) { 1889 PetscInt i; 1890 const PetscScalar *v; 1891 char name[16]; 1892 1893 for (i = 0; i < nf; i++) ctxs[i][0] = d; 1894 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d)); 1895 PetscCall(PetscObjectSetName((PetscObject)vcoords, name)); 1896 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords)); 1897 if (flg) PetscCall(VecView(vcoords, viewer)); 1898 PetscCall(VecGetArrayRead(vcoords, &v)); 1899 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]); 1900 PetscCall(VecRestoreArrayRead(vcoords, &v)); 1901 } 1902 PetscCall(VecDestroy(&vcoords)); 1903 PetscCall(PCSetCoordinates(pc, cdim, nl, coords)); 1904 PetscCall(PetscFree(coords)); 1905 PetscCall(PetscFree(ctxs[0])); 1906 PetscCall(PetscFree2(funcs, ctxs)); 1907 if (flg) { 1908 PetscCall(PetscViewerPopFormat(viewer)); 1909 PetscCall(PetscOptionsRestoreViewer(&viewer)); 1910 } 1911 } 1912 } 1913 PetscFunctionReturn(PETSC_SUCCESS); 1914 } 1915 1916 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1917 { 1918 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 1919 IS nis; 1920 const PetscInt *idxs; 1921 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd; 1922 1923 PetscFunctionBegin; 1924 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR"); 1925 if (mop == MPI_LAND) { 1926 /* init rootdata with true */ 1927 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1; 1928 } else { 1929 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n)); 1930 } 1931 PetscCall(PetscArrayzero(matis->sf_leafdata, n)); 1932 PetscCall(ISGetLocalSize(*is, &nd)); 1933 PetscCall(ISGetIndices(*is, &idxs)); 1934 for (i = 0; i < nd; i++) 1935 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1; 1936 PetscCall(ISRestoreIndices(*is, &idxs)); 1937 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1938 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop)); 1939 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1940 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE)); 1941 if (mop == MPI_LAND) { 1942 PetscCall(PetscMalloc1(nd, &nidxs)); 1943 } else { 1944 PetscCall(PetscMalloc1(n, &nidxs)); 1945 } 1946 for (i = 0, nnd = 0; i < n; i++) 1947 if (matis->sf_leafdata[i]) nidxs[nnd++] = i; 1948 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis)); 1949 PetscCall(ISDestroy(is)); 1950 *is = nis; 1951 PetscFunctionReturn(PETSC_SUCCESS); 1952 } 1953 1954 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z) 1955 { 1956 PC_IS *pcis = (PC_IS *)pc->data; 1957 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 1958 1959 PetscFunctionBegin; 1960 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS); 1961 if (pcbddc->ChangeOfBasisMatrix) { 1962 Vec swap; 1963 1964 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change)); 1965 swap = pcbddc->work_change; 1966 pcbddc->work_change = r; 1967 r = swap; 1968 } 1969 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1970 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD)); 1971 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1972 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D)); 1973 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0)); 1974 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 1975 PetscCall(VecSet(z, 0.)); 1976 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1977 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE)); 1978 if (pcbddc->ChangeOfBasisMatrix) { 1979 pcbddc->work_change = r; 1980 PetscCall(VecCopy(z, pcbddc->work_change)); 1981 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z)); 1982 } 1983 PetscFunctionReturn(PETSC_SUCCESS); 1984 } 1985 1986 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1987 { 1988 PCBDDCBenignMatMult_ctx ctx; 1989 PetscBool apply_right, apply_left, reset_x; 1990 1991 PetscFunctionBegin; 1992 PetscCall(MatShellGetContext(A, &ctx)); 1993 if (transpose) { 1994 apply_right = ctx->apply_left; 1995 apply_left = ctx->apply_right; 1996 } else { 1997 apply_right = ctx->apply_right; 1998 apply_left = ctx->apply_left; 1999 } 2000 reset_x = PETSC_FALSE; 2001 if (apply_right) { 2002 const PetscScalar *ax; 2003 PetscInt nl, i; 2004 2005 PetscCall(VecGetLocalSize(x, &nl)); 2006 PetscCall(VecGetArrayRead(x, &ax)); 2007 PetscCall(PetscArraycpy(ctx->work, ax, nl)); 2008 PetscCall(VecRestoreArrayRead(x, &ax)); 2009 for (i = 0; i < ctx->benign_n; i++) { 2010 PetscScalar sum, val; 2011 const PetscInt *idxs; 2012 PetscInt nz, j; 2013 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2014 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2015 sum = 0.; 2016 if (ctx->apply_p0) { 2017 val = ctx->work[idxs[nz - 1]]; 2018 for (j = 0; j < nz - 1; j++) { 2019 sum += ctx->work[idxs[j]]; 2020 ctx->work[idxs[j]] += val; 2021 } 2022 } else { 2023 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]]; 2024 } 2025 ctx->work[idxs[nz - 1]] -= sum; 2026 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2027 } 2028 PetscCall(VecPlaceArray(x, ctx->work)); 2029 reset_x = PETSC_TRUE; 2030 } 2031 if (transpose) { 2032 PetscCall(MatMultTranspose(ctx->A, x, y)); 2033 } else { 2034 PetscCall(MatMult(ctx->A, x, y)); 2035 } 2036 if (reset_x) PetscCall(VecResetArray(x)); 2037 if (apply_left) { 2038 PetscScalar *ay; 2039 PetscInt i; 2040 2041 PetscCall(VecGetArray(y, &ay)); 2042 for (i = 0; i < ctx->benign_n; i++) { 2043 PetscScalar sum, val; 2044 const PetscInt *idxs; 2045 PetscInt nz, j; 2046 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz)); 2047 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2048 val = -ay[idxs[nz - 1]]; 2049 if (ctx->apply_p0) { 2050 sum = 0.; 2051 for (j = 0; j < nz - 1; j++) { 2052 sum += ay[idxs[j]]; 2053 ay[idxs[j]] += val; 2054 } 2055 ay[idxs[nz - 1]] += sum; 2056 } else { 2057 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val; 2058 ay[idxs[nz - 1]] = 0.; 2059 } 2060 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs)); 2061 } 2062 PetscCall(VecRestoreArray(y, &ay)); 2063 } 2064 PetscFunctionReturn(PETSC_SUCCESS); 2065 } 2066 2067 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2068 { 2069 PetscFunctionBegin; 2070 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE)); 2071 PetscFunctionReturn(PETSC_SUCCESS); 2072 } 2073 2074 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2075 { 2076 PetscFunctionBegin; 2077 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE)); 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2082 { 2083 PC_IS *pcis = (PC_IS *)pc->data; 2084 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2085 PCBDDCBenignMatMult_ctx ctx; 2086 2087 PetscFunctionBegin; 2088 if (!restore) { 2089 Mat A_IB, A_BI; 2090 PetscScalar *work; 2091 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2092 2093 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored"); 2094 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS); 2095 PetscCall(PetscMalloc1(pcis->n, &work)); 2096 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB)); 2097 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE)); 2098 PetscCall(MatSetType(A_IB, MATSHELL)); 2099 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private)); 2100 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private)); 2101 PetscCall(PetscNew(&ctx)); 2102 PetscCall(MatShellSetContext(A_IB, ctx)); 2103 ctx->apply_left = PETSC_TRUE; 2104 ctx->apply_right = PETSC_FALSE; 2105 ctx->apply_p0 = PETSC_FALSE; 2106 ctx->benign_n = pcbddc->benign_n; 2107 if (reuse) { 2108 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2109 ctx->free = PETSC_FALSE; 2110 } else { /* TODO: could be optimized for successive solves */ 2111 ISLocalToGlobalMapping N_to_D; 2112 PetscInt i; 2113 2114 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D)); 2115 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs)); 2116 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i])); 2117 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D)); 2118 ctx->free = PETSC_TRUE; 2119 } 2120 ctx->A = pcis->A_IB; 2121 ctx->work = work; 2122 PetscCall(MatSetUp(A_IB)); 2123 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY)); 2124 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY)); 2125 pcis->A_IB = A_IB; 2126 2127 /* A_BI as A_IB^T */ 2128 PetscCall(MatCreateTranspose(A_IB, &A_BI)); 2129 pcbddc->benign_original_mat = pcis->A_BI; 2130 pcis->A_BI = A_BI; 2131 } else { 2132 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS); 2133 PetscCall(MatShellGetContext(pcis->A_IB, &ctx)); 2134 PetscCall(MatDestroy(&pcis->A_IB)); 2135 pcis->A_IB = ctx->A; 2136 ctx->A = NULL; 2137 PetscCall(MatDestroy(&pcis->A_BI)); 2138 pcis->A_BI = pcbddc->benign_original_mat; 2139 pcbddc->benign_original_mat = NULL; 2140 if (ctx->free) { 2141 PetscInt i; 2142 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i])); 2143 PetscCall(PetscFree(ctx->benign_zerodiag_subs)); 2144 } 2145 PetscCall(PetscFree(ctx->work)); 2146 PetscCall(PetscFree(ctx)); 2147 } 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 /* used just in bddc debug mode */ 2152 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2153 { 2154 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2155 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2156 Mat An; 2157 2158 PetscFunctionBegin; 2159 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An)); 2160 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL)); 2161 if (is1) { 2162 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B)); 2163 PetscCall(MatDestroy(&An)); 2164 } else { 2165 *B = An; 2166 } 2167 PetscFunctionReturn(PETSC_SUCCESS); 2168 } 2169 2170 /* TODO: add reuse flag */ 2171 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2172 { 2173 Mat Bt; 2174 PetscScalar *a, *bdata; 2175 const PetscInt *ii, *ij; 2176 PetscInt m, n, i, nnz, *bii, *bij; 2177 PetscBool flg_row; 2178 2179 PetscFunctionBegin; 2180 PetscCall(MatGetSize(A, &n, &m)); 2181 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2182 PetscCall(MatSeqAIJGetArray(A, &a)); 2183 nnz = n; 2184 for (i = 0; i < ii[n]; i++) { 2185 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2186 } 2187 PetscCall(PetscMalloc1(n + 1, &bii)); 2188 PetscCall(PetscMalloc1(nnz, &bij)); 2189 PetscCall(PetscMalloc1(nnz, &bdata)); 2190 nnz = 0; 2191 bii[0] = 0; 2192 for (i = 0; i < n; i++) { 2193 PetscInt j; 2194 for (j = ii[i]; j < ii[i + 1]; j++) { 2195 PetscScalar entry = a[j]; 2196 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2197 bij[nnz] = ij[j]; 2198 bdata[nnz] = entry; 2199 nnz++; 2200 } 2201 } 2202 bii[i + 1] = nnz; 2203 } 2204 PetscCall(MatSeqAIJRestoreArray(A, &a)); 2205 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt)); 2206 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row)); 2207 { 2208 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data; 2209 b->free_a = PETSC_TRUE; 2210 b->free_ij = PETSC_TRUE; 2211 } 2212 if (*B == A) PetscCall(MatDestroy(&A)); 2213 *B = Bt; 2214 PetscFunctionReturn(PETSC_SUCCESS); 2215 } 2216 2217 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv) 2218 { 2219 Mat B = NULL; 2220 DM dm; 2221 IS is_dummy, *cc_n; 2222 ISLocalToGlobalMapping l2gmap_dummy; 2223 PCBDDCGraph graph; 2224 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL; 2225 PetscInt i, n; 2226 PetscInt *xadj, *adjncy; 2227 PetscBool isplex = PETSC_FALSE; 2228 2229 PetscFunctionBegin; 2230 if (ncc) *ncc = 0; 2231 if (cc) *cc = NULL; 2232 if (primalv) *primalv = NULL; 2233 PetscCall(PCBDDCGraphCreate(&graph)); 2234 PetscCall(MatGetDM(pc->pmat, &dm)); 2235 if (!dm) PetscCall(PCGetDM(pc, &dm)); 2236 if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, "")); 2237 if (filter) isplex = PETSC_FALSE; 2238 2239 if (isplex) { /* this code has been modified from plexpartition.c */ 2240 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2241 PetscInt *adj = NULL; 2242 IS cellNumbering; 2243 const PetscInt *cellNum; 2244 PetscBool useCone, useClosure; 2245 PetscSection section; 2246 PetscSegBuffer adjBuffer; 2247 PetscSF sfPoint; 2248 2249 PetscCall(DMConvert(dm, DMPLEX, &dm)); 2250 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd)); 2251 PetscCall(DMGetPointSF(dm, &sfPoint)); 2252 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL)); 2253 /* Build adjacency graph via a section/segbuffer */ 2254 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion)); 2255 PetscCall(PetscSectionSetChart(section, pStart, pEnd)); 2256 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer)); 2257 /* Always use FVM adjacency to create partitioner graph */ 2258 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure)); 2259 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE)); 2260 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering)); 2261 PetscCall(ISGetIndices(cellNumbering, &cellNum)); 2262 for (n = 0, p = pStart; p < pEnd; p++) { 2263 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2264 if (nroots > 0) { 2265 if (cellNum[p] < 0) continue; 2266 } 2267 adjSize = PETSC_DETERMINE; 2268 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj)); 2269 for (a = 0; a < adjSize; ++a) { 2270 const PetscInt point = adj[a]; 2271 if (pStart <= point && point < pEnd) { 2272 PetscInt *PETSC_RESTRICT pBuf; 2273 PetscCall(PetscSectionAddDof(section, p, 1)); 2274 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf)); 2275 *pBuf = point; 2276 } 2277 } 2278 n++; 2279 } 2280 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure)); 2281 /* Derive CSR graph from section/segbuffer */ 2282 PetscCall(PetscSectionSetUp(section)); 2283 PetscCall(PetscSectionGetStorageSize(section, &size)); 2284 PetscCall(PetscMalloc1(n + 1, &xadj)); 2285 for (idx = 0, p = pStart; p < pEnd; p++) { 2286 if (nroots > 0) { 2287 if (cellNum[p] < 0) continue; 2288 } 2289 PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++])); 2290 } 2291 xadj[n] = size; 2292 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy)); 2293 /* Clean up */ 2294 PetscCall(PetscSegBufferDestroy(&adjBuffer)); 2295 PetscCall(PetscSectionDestroy(§ion)); 2296 PetscCall(PetscFree(adj)); 2297 graph->xadj = xadj; 2298 graph->adjncy = adjncy; 2299 } else { 2300 Mat A; 2301 PetscBool isseqaij, flg_row; 2302 2303 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2304 if (!A->rmap->N || !A->cmap->N) { 2305 PetscCall(PCBDDCGraphDestroy(&graph)); 2306 PetscFunctionReturn(PETSC_SUCCESS); 2307 } 2308 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 2309 if (!isseqaij && filter) { 2310 PetscBool isseqdense; 2311 2312 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense)); 2313 if (!isseqdense) { 2314 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2315 } else { /* TODO: rectangular case and LDA */ 2316 PetscScalar *array; 2317 PetscReal chop = 1.e-6; 2318 2319 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 2320 PetscCall(MatDenseGetArray(B, &array)); 2321 PetscCall(MatGetSize(B, &n, NULL)); 2322 for (i = 0; i < n; i++) { 2323 PetscInt j; 2324 for (j = i + 1; j < n; j++) { 2325 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)])); 2326 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.; 2327 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.; 2328 } 2329 } 2330 PetscCall(MatDenseRestoreArray(B, &array)); 2331 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B)); 2332 } 2333 } else { 2334 PetscCall(PetscObjectReference((PetscObject)A)); 2335 B = A; 2336 } 2337 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2338 2339 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2340 if (filter) { 2341 PetscScalar *data; 2342 PetscInt j, cum; 2343 2344 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered)); 2345 PetscCall(MatSeqAIJGetArray(B, &data)); 2346 cum = 0; 2347 for (i = 0; i < n; i++) { 2348 PetscInt t; 2349 2350 for (j = xadj[i]; j < xadj[i + 1]; j++) { 2351 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue; 2352 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j]; 2353 } 2354 t = xadj_filtered[i]; 2355 xadj_filtered[i] = cum; 2356 cum += t; 2357 } 2358 PetscCall(MatSeqAIJRestoreArray(B, &data)); 2359 graph->xadj = xadj_filtered; 2360 graph->adjncy = adjncy_filtered; 2361 } else { 2362 graph->xadj = xadj; 2363 graph->adjncy = adjncy; 2364 } 2365 } 2366 /* compute local connected components using PCBDDCGraph */ 2367 graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */ 2368 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy)); 2369 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy)); 2370 PetscCall(ISDestroy(&is_dummy)); 2371 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT)); 2372 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy)); 2373 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL)); 2374 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 2375 2376 /* partial clean up */ 2377 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered)); 2378 if (B) { 2379 PetscBool flg_row; 2380 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 2381 PetscCall(MatDestroy(&B)); 2382 } 2383 if (isplex) { 2384 PetscCall(PetscFree(xadj)); 2385 PetscCall(PetscFree(adjncy)); 2386 } 2387 2388 /* get back data */ 2389 if (isplex) { 2390 if (ncc) *ncc = graph->ncc; 2391 if (cc || primalv) { 2392 Mat A; 2393 PetscBT btv, btvt, btvc; 2394 PetscSection subSection; 2395 PetscInt *ids, cum, cump, *cids, *pids; 2396 PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd; 2397 2398 PetscCall(DMGetDimension(dm, &dim)); 2399 PetscCall(DMPlexGetSubdomainSection(dm, &subSection)); 2400 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd)); 2401 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd)); 2402 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd)); 2403 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd)); 2404 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2405 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids)); 2406 PetscCall(PetscBTCreate(A->rmap->n, &btv)); 2407 PetscCall(PetscBTCreate(A->rmap->n, &btvt)); 2408 PetscCall(PetscBTCreate(pEnd - pStart, &btvc)); 2409 2410 /* First see if we find corners for the subdomains, i.e. a vertex 2411 shared by at least dim subdomain boundary faces. This does not 2412 cover all the possible cases with simplices but it is enough 2413 for tensor cells */ 2414 if (vStart != fStart && dim <= 3) { 2415 for (PetscInt c = cStart; c < cEnd; c++) { 2416 PetscInt nf, cnt = 0, mcnt = dim, *cfaces; 2417 const PetscInt *faces; 2418 2419 PetscCall(DMPlexGetConeSize(dm, c, &nf)); 2420 PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces)); 2421 PetscCall(DMPlexGetCone(dm, c, &faces)); 2422 for (PetscInt f = 0; f < nf; f++) { 2423 PetscInt nc, ff; 2424 2425 PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc)); 2426 PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL)); 2427 if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f]; 2428 } 2429 if (cnt >= mcnt) { 2430 PetscInt size, *closure = NULL; 2431 2432 PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2433 for (PetscInt k = 0; k < 2 * size; k += 2) { 2434 PetscInt v = closure[k]; 2435 if (v >= vStart && v < vEnd) { 2436 PetscInt vsize, *vclosure = NULL; 2437 2438 cnt = 0; 2439 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2440 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) { 2441 PetscInt f = vclosure[vk]; 2442 if (f >= fStart && f < fEnd) { 2443 PetscInt nc, ff; 2444 PetscBool valid = PETSC_FALSE; 2445 2446 for (PetscInt fk = 0; fk < nf; fk++) 2447 if (f == cfaces[fk]) valid = PETSC_TRUE; 2448 if (!valid) continue; 2449 PetscCall(DMPlexGetSupportSize(dm, f, &nc)); 2450 PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL)); 2451 if (nc == 1 && f == ff) cnt++; 2452 } 2453 } 2454 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart)); 2455 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure)); 2456 } 2457 } 2458 PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure)); 2459 } 2460 PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces)); 2461 } 2462 } 2463 2464 cids[0] = 0; 2465 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2466 PetscInt j; 2467 2468 PetscCall(PetscBTMemzero(A->rmap->n, btvt)); 2469 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) { 2470 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2471 2472 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2473 for (k = 0; k < 2 * size; k += 2) { 2474 PetscInt s, pp, p = closure[k], off, dof, cdof; 2475 2476 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof)); 2477 PetscCall(PetscSectionGetOffset(subSection, p, &off)); 2478 PetscCall(PetscSectionGetDof(subSection, p, &dof)); 2479 for (s = 0; s < dof - cdof; s++) { 2480 if (PetscBTLookupSet(btvt, off + s)) continue; 2481 if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2482 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2483 else pids[cump++] = off + s; /* cross-vertex */ 2484 } 2485 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL)); 2486 if (pp != p) { 2487 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof)); 2488 PetscCall(PetscSectionGetOffset(subSection, pp, &off)); 2489 PetscCall(PetscSectionGetDof(subSection, pp, &dof)); 2490 for (s = 0; s < dof - cdof; s++) { 2491 if (PetscBTLookupSet(btvt, off + s)) continue; 2492 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */ 2493 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s; 2494 else pids[cump++] = off + s; /* cross-vertex */ 2495 } 2496 } 2497 } 2498 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure)); 2499 } 2500 cids[i + 1] = cum; 2501 /* mark dofs as already assigned */ 2502 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j])); 2503 } 2504 if (cc) { 2505 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2506 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i])); 2507 *cc = cc_n; 2508 } 2509 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv)); 2510 PetscCall(PetscFree3(ids, cids, pids)); 2511 PetscCall(PetscBTDestroy(&btv)); 2512 PetscCall(PetscBTDestroy(&btvt)); 2513 PetscCall(PetscBTDestroy(&btvc)); 2514 PetscCall(DMDestroy(&dm)); 2515 } 2516 } else { 2517 if (ncc) *ncc = graph->ncc; 2518 if (cc) { 2519 PetscCall(PetscMalloc1(graph->ncc, &cc_n)); 2520 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i])); 2521 *cc = cc_n; 2522 } 2523 } 2524 /* clean up graph */ 2525 graph->xadj = NULL; 2526 graph->adjncy = NULL; 2527 PetscCall(PCBDDCGraphDestroy(&graph)); 2528 PetscFunctionReturn(PETSC_SUCCESS); 2529 } 2530 2531 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2532 { 2533 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2534 PC_IS *pcis = (PC_IS *)pc->data; 2535 IS dirIS = NULL; 2536 PetscInt i; 2537 2538 PetscFunctionBegin; 2539 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS)); 2540 if (zerodiag) { 2541 Mat A; 2542 Vec vec3_N; 2543 PetscScalar *vals; 2544 const PetscInt *idxs; 2545 PetscInt nz, *count; 2546 2547 /* p0 */ 2548 PetscCall(VecSet(pcis->vec1_N, 0.)); 2549 PetscCall(PetscMalloc1(pcis->n, &vals)); 2550 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2551 PetscCall(ISGetIndices(zerodiag, &idxs)); 2552 for (i = 0; i < nz; i++) vals[i] = 1.; 2553 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES)); 2554 PetscCall(VecAssemblyBegin(pcis->vec1_N)); 2555 PetscCall(VecAssemblyEnd(pcis->vec1_N)); 2556 /* v_I */ 2557 PetscCall(VecSetRandom(pcis->vec2_N, NULL)); 2558 for (i = 0; i < nz; i++) vals[i] = 0.; 2559 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES)); 2560 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2561 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2562 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.; 2563 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES)); 2564 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2565 if (dirIS) { 2566 PetscInt n; 2567 2568 PetscCall(ISGetLocalSize(dirIS, &n)); 2569 PetscCall(ISGetIndices(dirIS, &idxs)); 2570 for (i = 0; i < n; i++) vals[i] = 0.; 2571 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES)); 2572 PetscCall(ISRestoreIndices(dirIS, &idxs)); 2573 } 2574 PetscCall(VecAssemblyBegin(pcis->vec2_N)); 2575 PetscCall(VecAssemblyEnd(pcis->vec2_N)); 2576 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N)); 2577 PetscCall(VecSet(vec3_N, 0.)); 2578 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2579 PetscCall(MatMult(A, pcis->vec1_N, vec3_N)); 2580 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0])); 2581 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0])); 2582 PetscCall(PetscFree(vals)); 2583 PetscCall(VecDestroy(&vec3_N)); 2584 2585 /* there should not be any pressure dofs lying on the interface */ 2586 PetscCall(PetscCalloc1(pcis->n, &count)); 2587 PetscCall(ISGetIndices(pcis->is_B_local, &idxs)); 2588 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++; 2589 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs)); 2590 PetscCall(ISGetIndices(zerodiag, &idxs)); 2591 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]); 2592 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2593 PetscCall(PetscFree(count)); 2594 } 2595 PetscCall(ISDestroy(&dirIS)); 2596 2597 /* check PCBDDCBenignGetOrSetP0 */ 2598 PetscCall(VecSetRandom(pcis->vec1_global, NULL)); 2599 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i; 2600 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE)); 2601 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1; 2602 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE)); 2603 for (i = 0; i < pcbddc->benign_n; i++) { 2604 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2605 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i)); 2606 } 2607 PetscFunctionReturn(PETSC_SUCCESS); 2608 } 2609 2610 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2611 { 2612 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 2613 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 2614 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs; 2615 PetscInt nz, n, benign_n, bsp = 1; 2616 PetscInt *interior_dofs, n_interior_dofs, nneu; 2617 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb; 2618 2619 PetscFunctionBegin; 2620 if (reuse) goto project_b0; 2621 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 2622 PetscCall(MatDestroy(&pcbddc->benign_B0)); 2623 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n])); 2624 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 2625 has_null_pressures = PETSC_TRUE; 2626 have_null = PETSC_TRUE; 2627 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2628 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2629 Checks if all the pressure dofs in each subdomain have a zero diagonal 2630 If not, a change of basis on pressures is not needed 2631 since the local Schur complements are already SPD 2632 */ 2633 if (pcbddc->n_ISForDofsLocal) { 2634 IS iP = NULL; 2635 PetscInt p, *pp; 2636 PetscBool flg; 2637 2638 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp)); 2639 n = pcbddc->n_ISForDofsLocal; 2640 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC"); 2641 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg)); 2642 PetscOptionsEnd(); 2643 if (!flg) { 2644 n = 1; 2645 pp[0] = pcbddc->n_ISForDofsLocal - 1; 2646 } 2647 2648 bsp = 0; 2649 for (p = 0; p < n; p++) { 2650 PetscInt bs; 2651 2652 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]); 2653 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2654 bsp += bs; 2655 } 2656 PetscCall(PetscMalloc1(bsp, &bzerodiag)); 2657 bsp = 0; 2658 for (p = 0; p < n; p++) { 2659 const PetscInt *idxs; 2660 PetscInt b, bs, npl, *bidxs; 2661 2662 PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs)); 2663 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl)); 2664 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2665 PetscCall(PetscMalloc1(npl / bs, &bidxs)); 2666 for (b = 0; b < bs; b++) { 2667 PetscInt i; 2668 2669 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b]; 2670 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp])); 2671 bsp++; 2672 } 2673 PetscCall(PetscFree(bidxs)); 2674 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs)); 2675 } 2676 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures)); 2677 2678 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2679 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP)); 2680 if (iP) { 2681 IS newpressures; 2682 2683 PetscCall(ISDifference(pressures, iP, &newpressures)); 2684 PetscCall(ISDestroy(&pressures)); 2685 pressures = newpressures; 2686 } 2687 PetscCall(ISSorted(pressures, &sorted)); 2688 if (!sorted) PetscCall(ISSort(pressures)); 2689 PetscCall(PetscFree(pp)); 2690 } 2691 2692 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2693 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2694 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2695 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag)); 2696 PetscCall(ISSorted(zerodiag, &sorted)); 2697 if (!sorted) PetscCall(ISSort(zerodiag)); 2698 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2699 zerodiag_save = zerodiag; 2700 PetscCall(ISGetLocalSize(zerodiag, &nz)); 2701 if (!nz) { 2702 if (n) have_null = PETSC_FALSE; 2703 has_null_pressures = PETSC_FALSE; 2704 PetscCall(ISDestroy(&zerodiag)); 2705 } 2706 recompute_zerodiag = PETSC_FALSE; 2707 2708 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2709 zerodiag_subs = NULL; 2710 benign_n = 0; 2711 n_interior_dofs = 0; 2712 interior_dofs = NULL; 2713 nneu = 0; 2714 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu)); 2715 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2716 if (checkb) { /* need to compute interior nodes */ 2717 PetscInt n, i; 2718 PetscInt *count; 2719 ISLocalToGlobalMapping mapping; 2720 2721 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL)); 2722 PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL)); 2723 PetscCall(PetscMalloc1(n, &interior_dofs)); 2724 for (i = 0; i < n; i++) 2725 if (count[i] < 2) interior_dofs[n_interior_dofs++] = i; 2726 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL)); 2727 } 2728 if (has_null_pressures) { 2729 IS *subs; 2730 PetscInt nsubs, i, j, nl; 2731 const PetscInt *idxs; 2732 PetscScalar *array; 2733 Vec *work; 2734 2735 subs = pcbddc->local_subs; 2736 nsubs = pcbddc->n_local_subs; 2737 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2738 if (checkb) { 2739 PetscCall(VecDuplicateVecs(matis->y, 2, &work)); 2740 PetscCall(ISGetLocalSize(zerodiag, &nl)); 2741 PetscCall(ISGetIndices(zerodiag, &idxs)); 2742 /* work[0] = 1_p */ 2743 PetscCall(VecSet(work[0], 0.)); 2744 PetscCall(VecGetArray(work[0], &array)); 2745 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2746 PetscCall(VecRestoreArray(work[0], &array)); 2747 /* work[0] = 1_v */ 2748 PetscCall(VecSet(work[1], 1.)); 2749 PetscCall(VecGetArray(work[1], &array)); 2750 for (j = 0; j < nl; j++) array[idxs[j]] = 0.; 2751 PetscCall(VecRestoreArray(work[1], &array)); 2752 PetscCall(ISRestoreIndices(zerodiag, &idxs)); 2753 } 2754 2755 if (nsubs > 1 || bsp > 1) { 2756 IS *is; 2757 PetscInt b, totb; 2758 2759 totb = bsp; 2760 is = bsp > 1 ? bzerodiag : &zerodiag; 2761 nsubs = PetscMax(nsubs, 1); 2762 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs)); 2763 for (b = 0; b < totb; b++) { 2764 for (i = 0; i < nsubs; i++) { 2765 ISLocalToGlobalMapping l2g; 2766 IS t_zerodiag_subs; 2767 PetscInt nl; 2768 2769 if (subs) { 2770 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g)); 2771 } else { 2772 IS tis; 2773 2774 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL)); 2775 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis)); 2776 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g)); 2777 PetscCall(ISDestroy(&tis)); 2778 } 2779 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs)); 2780 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl)); 2781 if (nl) { 2782 PetscBool valid = PETSC_TRUE; 2783 2784 if (checkb) { 2785 PetscCall(VecSet(matis->x, 0)); 2786 PetscCall(ISGetLocalSize(subs[i], &nl)); 2787 PetscCall(ISGetIndices(subs[i], &idxs)); 2788 PetscCall(VecGetArray(matis->x, &array)); 2789 for (j = 0; j < nl; j++) array[idxs[j]] = 1.; 2790 PetscCall(VecRestoreArray(matis->x, &array)); 2791 PetscCall(ISRestoreIndices(subs[i], &idxs)); 2792 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x)); 2793 PetscCall(MatMult(matis->A, matis->x, matis->y)); 2794 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y)); 2795 PetscCall(VecGetArray(matis->y, &array)); 2796 for (j = 0; j < n_interior_dofs; j++) { 2797 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2798 valid = PETSC_FALSE; 2799 break; 2800 } 2801 } 2802 PetscCall(VecRestoreArray(matis->y, &array)); 2803 } 2804 if (valid && nneu) { 2805 const PetscInt *idxs; 2806 PetscInt nzb; 2807 2808 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2809 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL)); 2810 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 2811 if (nzb) valid = PETSC_FALSE; 2812 } 2813 if (valid && pressures) { 2814 IS t_pressure_subs, tmp; 2815 PetscInt i1, i2; 2816 2817 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs)); 2818 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp)); 2819 PetscCall(ISGetLocalSize(tmp, &i1)); 2820 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2)); 2821 if (i2 != i1) valid = PETSC_FALSE; 2822 PetscCall(ISDestroy(&t_pressure_subs)); 2823 PetscCall(ISDestroy(&tmp)); 2824 } 2825 if (valid) { 2826 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n])); 2827 benign_n++; 2828 } else recompute_zerodiag = PETSC_TRUE; 2829 } 2830 PetscCall(ISDestroy(&t_zerodiag_subs)); 2831 PetscCall(ISLocalToGlobalMappingDestroy(&l2g)); 2832 } 2833 } 2834 } else { /* there's just one subdomain (or zero if they have not been detected */ 2835 PetscBool valid = PETSC_TRUE; 2836 2837 if (nneu) valid = PETSC_FALSE; 2838 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid)); 2839 if (valid && checkb) { 2840 PetscCall(MatMult(matis->A, work[0], matis->x)); 2841 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x)); 2842 PetscCall(VecGetArray(matis->x, &array)); 2843 for (j = 0; j < n_interior_dofs; j++) { 2844 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2845 valid = PETSC_FALSE; 2846 break; 2847 } 2848 } 2849 PetscCall(VecRestoreArray(matis->x, &array)); 2850 } 2851 if (valid) { 2852 benign_n = 1; 2853 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs)); 2854 PetscCall(PetscObjectReference((PetscObject)zerodiag)); 2855 zerodiag_subs[0] = zerodiag; 2856 } 2857 } 2858 if (checkb) PetscCall(VecDestroyVecs(2, &work)); 2859 } 2860 PetscCall(PetscFree(interior_dofs)); 2861 2862 if (!benign_n) { 2863 PetscInt n; 2864 2865 PetscCall(ISDestroy(&zerodiag)); 2866 recompute_zerodiag = PETSC_FALSE; 2867 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2868 if (n) have_null = PETSC_FALSE; 2869 } 2870 2871 /* final check for null pressures */ 2872 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null)); 2873 2874 if (recompute_zerodiag) { 2875 PetscCall(ISDestroy(&zerodiag)); 2876 if (benign_n == 1) { 2877 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0])); 2878 zerodiag = zerodiag_subs[0]; 2879 } else { 2880 PetscInt i, nzn, *new_idxs; 2881 2882 nzn = 0; 2883 for (i = 0; i < benign_n; i++) { 2884 PetscInt ns; 2885 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2886 nzn += ns; 2887 } 2888 PetscCall(PetscMalloc1(nzn, &new_idxs)); 2889 nzn = 0; 2890 for (i = 0; i < benign_n; i++) { 2891 PetscInt ns, *idxs; 2892 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns)); 2893 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2894 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns)); 2895 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs)); 2896 nzn += ns; 2897 } 2898 PetscCall(PetscSortInt(nzn, new_idxs)); 2899 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag)); 2900 } 2901 have_null = PETSC_FALSE; 2902 } 2903 2904 /* determines if the coarse solver will be singular or not */ 2905 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc))); 2906 2907 /* Prepare matrix to compute no-net-flux */ 2908 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2909 Mat A, loc_divudotp; 2910 ISLocalToGlobalMapping rl2g, cl2g, l2gmap; 2911 IS row, col, isused = NULL; 2912 PetscInt M, N, n, st, n_isused; 2913 2914 if (pressures) { 2915 isused = pressures; 2916 } else { 2917 isused = zerodiag_save; 2918 } 2919 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL)); 2920 PetscCall(MatISGetLocalMat(pc->pmat, &A)); 2921 PetscCall(MatGetLocalSize(A, &n, NULL)); 2922 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field"); 2923 n_isused = 0; 2924 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused)); 2925 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 2926 st = st - n_isused; 2927 if (n) { 2928 const PetscInt *gidxs; 2929 2930 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp)); 2931 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 2932 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2933 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2934 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col)); 2935 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 2936 } else { 2937 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp)); 2938 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row)); 2939 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col)); 2940 } 2941 PetscCall(MatGetSize(pc->pmat, NULL, &N)); 2942 PetscCall(ISGetSize(row, &M)); 2943 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 2944 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 2945 PetscCall(ISDestroy(&row)); 2946 PetscCall(ISDestroy(&col)); 2947 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp)); 2948 PetscCall(MatSetType(pcbddc->divudotp, MATIS)); 2949 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N)); 2950 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g)); 2951 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 2952 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 2953 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp)); 2954 PetscCall(MatDestroy(&loc_divudotp)); 2955 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2956 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY)); 2957 } 2958 PetscCall(ISDestroy(&zerodiag_save)); 2959 PetscCall(ISDestroy(&pressures)); 2960 if (bzerodiag) { 2961 PetscInt i; 2962 2963 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i])); 2964 PetscCall(PetscFree(bzerodiag)); 2965 } 2966 pcbddc->benign_n = benign_n; 2967 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2968 2969 /* determines if the problem has subdomains with 0 pressure block */ 2970 have_null = (PetscBool)(!!pcbddc->benign_n); 2971 PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 2972 2973 project_b0: 2974 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL)); 2975 /* change of basis and p0 dofs */ 2976 if (pcbddc->benign_n) { 2977 PetscInt i, s, *nnz; 2978 2979 /* local change of basis for pressures */ 2980 PetscCall(MatDestroy(&pcbddc->benign_change)); 2981 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change)); 2982 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ)); 2983 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE)); 2984 PetscCall(PetscMalloc1(n, &nnz)); 2985 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */ 2986 for (i = 0; i < pcbddc->benign_n; i++) { 2987 const PetscInt *idxs; 2988 PetscInt nzs, j; 2989 2990 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs)); 2991 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2992 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */ 2993 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */ 2994 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs)); 2995 } 2996 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz)); 2997 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 2998 PetscCall(PetscFree(nnz)); 2999 /* set identity by default */ 3000 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES)); 3001 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3002 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0)); 3003 /* set change on pressures */ 3004 for (s = 0; s < pcbddc->benign_n; s++) { 3005 PetscScalar *array; 3006 const PetscInt *idxs; 3007 PetscInt nzs; 3008 3009 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs)); 3010 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3011 for (i = 0; i < nzs - 1; i++) { 3012 PetscScalar vals[2]; 3013 PetscInt cols[2]; 3014 3015 cols[0] = idxs[i]; 3016 cols[1] = idxs[nzs - 1]; 3017 vals[0] = 1.; 3018 vals[1] = 1.; 3019 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES)); 3020 } 3021 PetscCall(PetscMalloc1(nzs, &array)); 3022 for (i = 0; i < nzs - 1; i++) array[i] = -1.; 3023 array[nzs - 1] = 1.; 3024 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES)); 3025 /* store local idxs for p0 */ 3026 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1]; 3027 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs)); 3028 PetscCall(PetscFree(array)); 3029 } 3030 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3031 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY)); 3032 3033 /* project if needed */ 3034 if (pcbddc->benign_change_explicit) { 3035 Mat M; 3036 3037 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M)); 3038 PetscCall(MatDestroy(&pcbddc->local_mat)); 3039 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat)); 3040 PetscCall(MatDestroy(&M)); 3041 } 3042 /* store global idxs for p0 */ 3043 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx)); 3044 } 3045 *zerodiaglocal = zerodiag; 3046 PetscFunctionReturn(PETSC_SUCCESS); 3047 } 3048 3049 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 3050 { 3051 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3052 PetscScalar *array; 3053 3054 PetscFunctionBegin; 3055 if (!pcbddc->benign_sf) { 3056 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf)); 3057 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx)); 3058 } 3059 if (get) { 3060 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 3061 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3062 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE)); 3063 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 3064 } else { 3065 PetscCall(VecGetArray(v, &array)); 3066 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3067 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE)); 3068 PetscCall(VecRestoreArray(v, &array)); 3069 } 3070 PetscFunctionReturn(PETSC_SUCCESS); 3071 } 3072 3073 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 3074 { 3075 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3076 3077 PetscFunctionBegin; 3078 /* TODO: add error checking 3079 - avoid nested pop (or push) calls. 3080 - cannot push before pop. 3081 - cannot call this if pcbddc->local_mat is NULL 3082 */ 3083 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 3084 if (pop) { 3085 if (pcbddc->benign_change_explicit) { 3086 IS is_p0; 3087 MatReuse reuse; 3088 3089 /* extract B_0 */ 3090 reuse = MAT_INITIAL_MATRIX; 3091 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX; 3092 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 3093 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0)); 3094 /* remove rows and cols from local problem */ 3095 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE)); 3096 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 3097 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL)); 3098 PetscCall(ISDestroy(&is_p0)); 3099 } else { 3100 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 3101 PetscScalar *vals; 3102 PetscInt i, n, *idxs_ins; 3103 3104 PetscCall(VecGetLocalSize(matis->y, &n)); 3105 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals)); 3106 if (!pcbddc->benign_B0) { 3107 PetscInt *nnz; 3108 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0)); 3109 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ)); 3110 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE)); 3111 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz)); 3112 for (i = 0; i < pcbddc->benign_n; i++) { 3113 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i])); 3114 nnz[i] = n - nnz[i]; 3115 } 3116 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz)); 3117 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3118 PetscCall(PetscFree(nnz)); 3119 } 3120 3121 for (i = 0; i < pcbddc->benign_n; i++) { 3122 PetscScalar *array; 3123 PetscInt *idxs, j, nz, cum; 3124 3125 PetscCall(VecSet(matis->x, 0.)); 3126 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz)); 3127 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3128 for (j = 0; j < nz; j++) vals[j] = 1.; 3129 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES)); 3130 PetscCall(VecAssemblyBegin(matis->x)); 3131 PetscCall(VecAssemblyEnd(matis->x)); 3132 PetscCall(VecSet(matis->y, 0.)); 3133 PetscCall(MatMult(matis->A, matis->x, matis->y)); 3134 PetscCall(VecGetArray(matis->y, &array)); 3135 cum = 0; 3136 for (j = 0; j < n; j++) { 3137 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3138 vals[cum] = array[j]; 3139 idxs_ins[cum] = j; 3140 cum++; 3141 } 3142 } 3143 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES)); 3144 PetscCall(VecRestoreArray(matis->y, &array)); 3145 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs)); 3146 } 3147 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3148 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY)); 3149 PetscCall(PetscFree2(idxs_ins, vals)); 3150 } 3151 } else { /* push */ 3152 3153 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!"); 3154 for (PetscInt i = 0; i < pcbddc->benign_n; i++) { 3155 PetscScalar *B0_vals; 3156 PetscInt *B0_cols, B0_ncol; 3157 3158 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3159 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES)); 3160 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES)); 3161 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES)); 3162 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals)); 3163 } 3164 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3165 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY)); 3166 } 3167 PetscFunctionReturn(PETSC_SUCCESS); 3168 } 3169 3170 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3171 { 3172 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3173 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3174 PetscBLASInt B_dummyint, B_neigs, B_ierr, B_lwork; 3175 PetscBLASInt *B_iwork, *B_ifail; 3176 PetscScalar *work, lwork; 3177 PetscScalar *St, *S, *eigv; 3178 PetscScalar *Sarray, *Starray; 3179 PetscReal *eigs, thresh, lthresh, uthresh; 3180 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs; 3181 PetscBool allocated_S_St, upart; 3182 #if defined(PETSC_USE_COMPLEX) 3183 PetscReal *rwork; 3184 #endif 3185 3186 PetscFunctionBegin; 3187 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS); 3188 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data"); 3189 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3190 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric, 3191 sub_schurs->is_posdef); 3192 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3193 3194 if (pcbddc->dbg_flag) { 3195 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 3196 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3197 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 3198 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n")); 3199 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 3200 } 3201 3202 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef)); 3203 3204 /* max size of subsets */ 3205 mss = 0; 3206 for (i = 0; i < sub_schurs->n_subs; i++) { 3207 PetscInt subset_size; 3208 3209 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3210 mss = PetscMax(mss, subset_size); 3211 } 3212 3213 /* min/max and threshold */ 3214 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3215 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3216 nmax = PetscMax(nmin, nmax); 3217 allocated_S_St = PETSC_FALSE; 3218 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3219 allocated_S_St = PETSC_TRUE; 3220 } 3221 3222 /* allocate lapack workspace */ 3223 cum = cum2 = 0; 3224 maxneigs = 0; 3225 for (i = 0; i < sub_schurs->n_subs; i++) { 3226 PetscInt n, subset_size; 3227 3228 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3229 n = PetscMin(subset_size, nmax); 3230 cum += subset_size; 3231 cum2 += subset_size * n; 3232 maxneigs = PetscMax(maxneigs, n); 3233 } 3234 lwork = 0; 3235 if (mss) { 3236 PetscScalar sdummy = 0.; 3237 PetscBLASInt B_itype = 1; 3238 PetscBLASInt B_N = mss, idummy = 0; 3239 PetscReal rdummy = 0., zero = 0.0; 3240 PetscReal eps = 0.0; /* dlamch? */ 3241 3242 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3243 B_lwork = -1; 3244 /* some implementations may complain about NULL pointers, even if we are querying */ 3245 S = &sdummy; 3246 St = &sdummy; 3247 eigs = &rdummy; 3248 eigv = &sdummy; 3249 B_iwork = &idummy; 3250 B_ifail = &idummy; 3251 #if defined(PETSC_USE_COMPLEX) 3252 rwork = &rdummy; 3253 #endif 3254 thresh = 1.0; 3255 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3256 #if defined(PETSC_USE_COMPLEX) 3257 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3258 #else 3259 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3260 #endif 3261 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr); 3262 PetscCall(PetscFPTrapPop()); 3263 } 3264 3265 nv = 0; 3266 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3267 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 3268 } 3269 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 3270 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St)); 3271 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail)); 3272 #if defined(PETSC_USE_COMPLEX) 3273 PetscCall(PetscMalloc1(7 * mss, &rwork)); 3274 #endif 3275 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2, 3276 &pcbddc->adaptive_constraints_data)); 3277 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs)); 3278 3279 maxneigs = 0; 3280 cum = cumarray = 0; 3281 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3282 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3283 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3284 const PetscInt *idxs; 3285 3286 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 3287 for (cum = 0; cum < nv; cum++) { 3288 pcbddc->adaptive_constraints_n[cum] = 1; 3289 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3290 pcbddc->adaptive_constraints_data[cum] = 1.0; 3291 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1; 3292 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1; 3293 } 3294 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 3295 } 3296 3297 if (mss) { /* multilevel */ 3298 if (sub_schurs->gdsw) { 3299 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3300 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3301 } else { 3302 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3303 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3304 } 3305 } 3306 3307 lthresh = pcbddc->adaptive_threshold[0]; 3308 uthresh = pcbddc->adaptive_threshold[1]; 3309 upart = pcbddc->use_deluxe_scaling; 3310 for (i = 0; i < sub_schurs->n_subs; i++) { 3311 const PetscInt *idxs; 3312 PetscReal upper, lower; 3313 PetscInt j, subset_size, eigs_start = 0; 3314 PetscBLASInt B_N; 3315 PetscBool same_data = PETSC_FALSE; 3316 PetscBool scal = PETSC_FALSE; 3317 3318 if (upart) { 3319 upper = PETSC_MAX_REAL; 3320 lower = uthresh; 3321 } else { 3322 if (sub_schurs->gdsw) { 3323 upper = uthresh; 3324 lower = PETSC_MIN_REAL; 3325 } else { 3326 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling"); 3327 upper = 1. / uthresh; 3328 lower = 0.; 3329 } 3330 } 3331 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 3332 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 3333 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 3334 /* this is experimental: we assume the dofs have been properly grouped to have 3335 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3336 if (!sub_schurs->is_posdef) { 3337 Mat T; 3338 3339 for (j = 0; j < subset_size; j++) { 3340 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) { 3341 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T)); 3342 PetscCall(MatScale(T, -1.0)); 3343 PetscCall(MatDestroy(&T)); 3344 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T)); 3345 PetscCall(MatScale(T, -1.0)); 3346 PetscCall(MatDestroy(&T)); 3347 if (sub_schurs->change_primal_sub) { 3348 PetscInt nz, k; 3349 const PetscInt *idxs; 3350 3351 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz)); 3352 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs)); 3353 for (k = 0; k < nz; k++) { 3354 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0; 3355 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0; 3356 } 3357 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs)); 3358 } 3359 scal = PETSC_TRUE; 3360 break; 3361 } 3362 } 3363 } 3364 3365 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3366 if (sub_schurs->is_symmetric) { 3367 PetscInt j, k; 3368 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */ 3369 PetscCall(PetscArrayzero(S, subset_size * subset_size)); 3370 PetscCall(PetscArrayzero(St, subset_size * subset_size)); 3371 } 3372 for (j = 0; j < subset_size; j++) { 3373 for (k = j; k < subset_size; k++) { 3374 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3375 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3376 } 3377 } 3378 } else { 3379 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3380 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3381 } 3382 } else { 3383 S = Sarray + cumarray; 3384 St = Starray + cumarray; 3385 } 3386 /* see if we can save some work */ 3387 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data)); 3388 3389 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3390 B_neigs = 0; 3391 } else { 3392 PetscBLASInt B_itype = 1; 3393 PetscBLASInt B_IL, B_IU; 3394 PetscReal eps = -1.0; /* dlamch? */ 3395 PetscInt nmin_s; 3396 PetscBool compute_range; 3397 3398 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 3399 B_neigs = 0; 3400 compute_range = (PetscBool)!same_data; 3401 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3402 3403 if (pcbddc->dbg_flag) { 3404 PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count + 1, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof; 3405 3406 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc)); 3407 PetscCall( 3408 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc)); 3409 } 3410 3411 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3412 if (compute_range) { 3413 /* ask for eigenvalues larger than thresh */ 3414 if (sub_schurs->is_posdef) { 3415 #if defined(PETSC_USE_COMPLEX) 3416 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3417 #else 3418 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3419 #endif 3420 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3421 } else { /* no theory so far, but it works nicely */ 3422 PetscInt recipe = 0, recipe_m = 1; 3423 PetscReal bb[2]; 3424 3425 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL)); 3426 switch (recipe) { 3427 case 0: 3428 if (scal) { 3429 bb[0] = PETSC_MIN_REAL; 3430 bb[1] = lthresh; 3431 } else { 3432 bb[0] = uthresh; 3433 bb[1] = PETSC_MAX_REAL; 3434 } 3435 #if defined(PETSC_USE_COMPLEX) 3436 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3437 #else 3438 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3439 #endif 3440 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3441 break; 3442 case 1: 3443 bb[0] = PETSC_MIN_REAL; 3444 bb[1] = lthresh * lthresh; 3445 #if defined(PETSC_USE_COMPLEX) 3446 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3447 #else 3448 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3449 #endif 3450 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3451 if (!scal) { 3452 PetscBLASInt B_neigs2 = 0; 3453 3454 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3455 bb[1] = PETSC_MAX_REAL; 3456 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3457 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3458 #if defined(PETSC_USE_COMPLEX) 3459 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3460 #else 3461 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3462 #endif 3463 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3464 B_neigs += B_neigs2; 3465 } 3466 break; 3467 case 2: 3468 if (scal) { 3469 bb[0] = PETSC_MIN_REAL; 3470 bb[1] = 0; 3471 #if defined(PETSC_USE_COMPLEX) 3472 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3473 #else 3474 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3475 #endif 3476 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3477 } else { 3478 PetscBLASInt B_neigs2 = 0; 3479 PetscBool do_copy = PETSC_FALSE; 3480 3481 lthresh = PetscMax(lthresh, 0.0); 3482 if (lthresh > 0.0) { 3483 bb[0] = PETSC_MIN_REAL; 3484 bb[1] = lthresh * lthresh; 3485 3486 do_copy = PETSC_TRUE; 3487 #if defined(PETSC_USE_COMPLEX) 3488 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3489 #else 3490 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3491 #endif 3492 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3493 } 3494 bb[0] = PetscMax(lthresh * lthresh, uthresh); 3495 bb[1] = PETSC_MAX_REAL; 3496 if (do_copy) { 3497 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3498 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3499 } 3500 #if defined(PETSC_USE_COMPLEX) 3501 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3502 #else 3503 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3504 #endif 3505 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3506 B_neigs += B_neigs2; 3507 } 3508 break; 3509 case 3: 3510 if (scal) { 3511 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL)); 3512 } else { 3513 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL)); 3514 } 3515 if (!scal) { 3516 bb[0] = uthresh; 3517 bb[1] = PETSC_MAX_REAL; 3518 #if defined(PETSC_USE_COMPLEX) 3519 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3520 #else 3521 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3522 #endif 3523 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3524 } 3525 if (recipe_m > 0 && B_N - B_neigs > 0) { 3526 PetscBLASInt B_neigs2 = 0; 3527 3528 B_IL = 1; 3529 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU)); 3530 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3531 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3532 #if defined(PETSC_USE_COMPLEX) 3533 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3534 #else 3535 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3536 #endif 3537 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3538 B_neigs += B_neigs2; 3539 } 3540 break; 3541 case 4: 3542 bb[0] = PETSC_MIN_REAL; 3543 bb[1] = lthresh; 3544 #if defined(PETSC_USE_COMPLEX) 3545 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3546 #else 3547 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3548 #endif 3549 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3550 { 3551 PetscBLASInt B_neigs2 = 0; 3552 3553 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh); 3554 bb[1] = PETSC_MAX_REAL; 3555 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3556 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3557 #if defined(PETSC_USE_COMPLEX) 3558 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3559 #else 3560 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3561 #endif 3562 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3563 B_neigs += B_neigs2; 3564 } 3565 break; 3566 case 5: /* same as before: first compute all eigenvalues, then filter */ 3567 #if defined(PETSC_USE_COMPLEX) 3568 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3569 #else 3570 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3571 #endif 3572 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3573 { 3574 PetscInt e, k, ne; 3575 for (e = 0, ne = 0; e < B_neigs; e++) { 3576 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3577 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k]; 3578 eigs[ne] = eigs[e]; 3579 ne++; 3580 } 3581 } 3582 PetscCall(PetscArraycpy(eigv, S, B_N * ne)); 3583 B_neigs = ne; 3584 } 3585 break; 3586 default: 3587 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe); 3588 } 3589 } 3590 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3591 B_IU = PetscMax(1, PetscMin(B_N, nmax)); 3592 B_IL = 1; 3593 #if defined(PETSC_USE_COMPLEX) 3594 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3595 #else 3596 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3597 #endif 3598 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3599 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3600 PetscInt k; 3601 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 3602 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax)); 3603 PetscCall(PetscBLASIntCast(nmax, &B_neigs)); 3604 nmin = nmax; 3605 PetscCall(PetscArrayzero(eigv, subset_size * nmax)); 3606 for (k = 0; k < nmax; k++) { 3607 eigs[k] = 1. / PETSC_SMALL; 3608 eigv[k * (subset_size + 1)] = 1.0; 3609 } 3610 } 3611 PetscCall(PetscFPTrapPop()); 3612 if (B_ierr) { 3613 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3614 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3615 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3616 } 3617 3618 if (B_neigs > nmax) { 3619 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax)); 3620 if (upart) eigs_start = scal ? 0 : B_neigs - nmax; 3621 B_neigs = nmax; 3622 } 3623 3624 nmin_s = PetscMin(nmin, B_N); 3625 if (B_neigs < nmin_s) { 3626 PetscBLASInt B_neigs2 = 0; 3627 3628 if (upart) { 3629 if (scal) { 3630 B_IU = nmin_s; 3631 B_IL = B_neigs + 1; 3632 } else { 3633 B_IL = B_N - nmin_s + 1; 3634 B_IU = B_N - B_neigs; 3635 } 3636 } else { 3637 B_IL = B_neigs + 1; 3638 B_IU = nmin_s; 3639 } 3640 if (pcbddc->dbg_flag) { 3641 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU)); 3642 } 3643 if (sub_schurs->is_symmetric) { 3644 PetscInt j, k; 3645 for (j = 0; j < subset_size; j++) { 3646 for (k = j; k < subset_size; k++) { 3647 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k]; 3648 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k]; 3649 } 3650 } 3651 } else { 3652 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size)); 3653 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size)); 3654 } 3655 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 3656 #if defined(PETSC_USE_COMPLEX) 3657 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr)); 3658 #else 3659 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr)); 3660 #endif 3661 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0)); 3662 PetscCall(PetscFPTrapPop()); 3663 B_neigs += B_neigs2; 3664 } 3665 if (B_ierr) { 3666 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr); 3667 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr); 3668 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1); 3669 } 3670 if (pcbddc->dbg_flag) { 3671 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs)); 3672 for (j = 0; j < B_neigs; j++) { 3673 if (!sub_schurs->gdsw) { 3674 if (eigs[j] == 0.0) { 3675 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n")); 3676 } else { 3677 if (upart) { 3678 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start])); 3679 } else { 3680 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1. / eigs[j + eigs_start]))); 3681 } 3682 } 3683 } else { 3684 double pg = (double)eigs[j + eigs_start]; 3685 if (pg < 2 * PETSC_SMALL) pg = 0.0; 3686 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg)); 3687 } 3688 } 3689 } 3690 } 3691 /* change the basis back to the original one */ 3692 if (sub_schurs->change) { 3693 Mat change, phi, phit; 3694 3695 if (pcbddc->dbg_flag > 2) { 3696 PetscInt ii; 3697 for (ii = 0; ii < B_neigs; ii++) { 3698 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3699 for (j = 0; j < B_N; j++) { 3700 #if defined(PETSC_USE_COMPLEX) 3701 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]); 3702 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]); 3703 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3704 #else 3705 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j]))); 3706 #endif 3707 } 3708 } 3709 } 3710 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL)); 3711 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit)); 3712 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi)); 3713 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN)); 3714 PetscCall(MatDestroy(&phit)); 3715 PetscCall(MatDestroy(&phi)); 3716 } 3717 maxneigs = PetscMax(B_neigs, maxneigs); 3718 pcbddc->adaptive_constraints_n[i + nv] = B_neigs; 3719 if (B_neigs) { 3720 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size)); 3721 3722 if (pcbddc->dbg_flag > 1) { 3723 PetscInt ii; 3724 for (ii = 0; ii < B_neigs; ii++) { 3725 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N)); 3726 for (j = 0; j < B_N; j++) { 3727 #if defined(PETSC_USE_COMPLEX) 3728 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3729 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]); 3730 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c)); 3731 #else 3732 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]))); 3733 #endif 3734 } 3735 } 3736 } 3737 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size)); 3738 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3739 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs; 3740 cum++; 3741 } 3742 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 3743 /* shift for next computation */ 3744 cumarray += subset_size * subset_size; 3745 } 3746 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 3747 3748 if (mss) { 3749 if (sub_schurs->gdsw) { 3750 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray)); 3751 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3752 } else { 3753 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray)); 3754 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray)); 3755 /* destroy matrices (junk) */ 3756 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 3757 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 3758 } 3759 } 3760 if (allocated_S_St) PetscCall(PetscFree2(S, St)); 3761 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail)); 3762 #if defined(PETSC_USE_COMPLEX) 3763 PetscCall(PetscFree(rwork)); 3764 #endif 3765 if (pcbddc->dbg_flag) { 3766 PetscInt maxneigs_r; 3767 PetscCall(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc))); 3768 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r)); 3769 } 3770 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0)); 3771 PetscFunctionReturn(PETSC_SUCCESS); 3772 } 3773 3774 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3775 { 3776 Mat coarse_submat; 3777 3778 PetscFunctionBegin; 3779 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3780 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3781 PetscCall(PCBDDCSetUpLocalScatters(pc)); 3782 3783 /* Setup local neumann solver ksp_R */ 3784 /* PCBDDCSetUpLocalScatters should be called first! */ 3785 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE)); 3786 3787 /* 3788 Setup local correction and local part of coarse basis. 3789 Gives back the dense local part of the coarse matrix in column major ordering 3790 */ 3791 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat)); 3792 3793 /* Compute total number of coarse nodes and setup coarse solver */ 3794 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat)); 3795 PetscCall(MatDestroy(&coarse_submat)); 3796 PetscFunctionReturn(PETSC_SUCCESS); 3797 } 3798 3799 PetscErrorCode PCBDDCResetCustomization(PC pc) 3800 { 3801 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3802 3803 PetscFunctionBegin; 3804 PetscCall(ISDestroy(&pcbddc->user_primal_vertices)); 3805 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local)); 3806 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries)); 3807 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal)); 3808 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries)); 3809 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 3810 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 3811 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal)); 3812 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL)); 3813 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL)); 3814 PetscFunctionReturn(PETSC_SUCCESS); 3815 } 3816 3817 PetscErrorCode PCBDDCResetTopography(PC pc) 3818 { 3819 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3820 PetscInt i; 3821 3822 PetscFunctionBegin; 3823 PetscCall(MatDestroy(&pcbddc->nedcG)); 3824 PetscCall(ISDestroy(&pcbddc->nedclocal)); 3825 PetscCall(MatDestroy(&pcbddc->discretegradient)); 3826 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix)); 3827 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 3828 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 3829 PetscCall(VecDestroy(&pcbddc->work_change)); 3830 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 3831 PetscCall(MatDestroy(&pcbddc->divudotp)); 3832 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l)); 3833 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph)); 3834 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i])); 3835 pcbddc->n_local_subs = 0; 3836 PetscCall(PetscFree(pcbddc->local_subs)); 3837 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs)); 3838 pcbddc->graphanalyzed = PETSC_FALSE; 3839 pcbddc->recompute_topography = PETSC_TRUE; 3840 pcbddc->corner_selected = PETSC_FALSE; 3841 PetscFunctionReturn(PETSC_SUCCESS); 3842 } 3843 3844 PetscErrorCode PCBDDCResetSolvers(PC pc) 3845 { 3846 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3847 3848 PetscFunctionBegin; 3849 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 3850 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 3851 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 3852 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 3853 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 3854 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3855 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3856 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 3857 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 3858 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3859 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3860 PetscCall(ISDestroy(&pcbddc->is_R_local)); 3861 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 3862 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 3863 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 3864 PetscCall(KSPReset(pcbddc->ksp_D)); 3865 PetscCall(KSPReset(pcbddc->ksp_R)); 3866 PetscCall(KSPReset(pcbddc->coarse_ksp)); 3867 PetscCall(MatDestroy(&pcbddc->local_mat)); 3868 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 3869 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 3870 PetscCall(PetscFree(pcbddc->global_primal_indices)); 3871 PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 3872 PetscCall(MatDestroy(&pcbddc->benign_change)); 3873 PetscCall(VecDestroy(&pcbddc->benign_vec)); 3874 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE)); 3875 PetscCall(MatDestroy(&pcbddc->benign_B0)); 3876 PetscCall(PetscSFDestroy(&pcbddc->benign_sf)); 3877 if (pcbddc->benign_zerodiag_subs) { 3878 PetscInt i; 3879 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i])); 3880 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs)); 3881 } 3882 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0)); 3883 PetscFunctionReturn(PETSC_SUCCESS); 3884 } 3885 3886 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3887 { 3888 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 3889 PC_IS *pcis = (PC_IS *)pc->data; 3890 VecType impVecType; 3891 PetscInt n_constraints, n_R, old_size; 3892 3893 PetscFunctionBegin; 3894 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3895 n_R = pcis->n - pcbddc->n_vertices; 3896 PetscCall(VecGetType(pcis->vec1_N, &impVecType)); 3897 /* local work vectors (try to avoid unneeded work)*/ 3898 /* R nodes */ 3899 old_size = -1; 3900 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size)); 3901 if (n_R != old_size) { 3902 PetscCall(VecDestroy(&pcbddc->vec1_R)); 3903 PetscCall(VecDestroy(&pcbddc->vec2_R)); 3904 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R)); 3905 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R)); 3906 PetscCall(VecSetType(pcbddc->vec1_R, impVecType)); 3907 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R)); 3908 } 3909 /* local primal dofs */ 3910 old_size = -1; 3911 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size)); 3912 if (pcbddc->local_primal_size != old_size) { 3913 PetscCall(VecDestroy(&pcbddc->vec1_P)); 3914 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P)); 3915 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size)); 3916 PetscCall(VecSetType(pcbddc->vec1_P, impVecType)); 3917 } 3918 /* local explicit constraints */ 3919 old_size = -1; 3920 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size)); 3921 if (n_constraints && n_constraints != old_size) { 3922 PetscCall(VecDestroy(&pcbddc->vec1_C)); 3923 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C)); 3924 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints)); 3925 PetscCall(VecSetType(pcbddc->vec1_C, impVecType)); 3926 } 3927 PetscFunctionReturn(PETSC_SUCCESS); 3928 } 3929 3930 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode) 3931 { 3932 PetscBool flg; 3933 const PetscScalar *a; 3934 3935 PetscFunctionBegin; 3936 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg)); 3937 if (flg) { 3938 PetscCall(MatDenseGetArrayRead(S, &a)); 3939 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE)); 3940 PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode)); 3941 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE)); 3942 PetscCall(MatDenseRestoreArrayRead(S, &a)); 3943 } else { 3944 const PetscInt *ii, *jj; 3945 PetscInt n; 3946 PetscInt buf[8192], *bufc = NULL; 3947 PetscBool freeb = PETSC_FALSE; 3948 Mat Sm = S; 3949 3950 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg)); 3951 if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm)); 3952 else PetscCall(PetscObjectReference((PetscObject)S)); 3953 PetscCall(MatSeqAIJGetArrayRead(Sm, &a)); 3954 PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 3955 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure"); 3956 if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) { 3957 bufc = buf; 3958 } else { 3959 PetscCall(PetscMalloc1(nc, &bufc)); 3960 freeb = PETSC_TRUE; 3961 } 3962 3963 for (PetscInt i = 0; i < n; i++) { 3964 const PetscInt nci = ii[i + 1] - ii[i]; 3965 3966 for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]]; 3967 PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode)); 3968 } 3969 PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg)); 3970 PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a)); 3971 PetscCall(MatDestroy(&Sm)); 3972 if (freeb) PetscCall(PetscFree(bufc)); 3973 } 3974 PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY)); 3975 PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY)); 3976 PetscFunctionReturn(PETSC_SUCCESS); 3977 } 3978 3979 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat) 3980 { 3981 Mat_SeqAIJ *aij; 3982 PetscInt *ii, *jj; 3983 PetscScalar *aa; 3984 PetscInt nnz = 0, m, nc; 3985 const PetscScalar *a; 3986 const PetscScalar zero = 0.0; 3987 3988 PetscFunctionBegin; 3989 PetscCall(MatGetLocalSize(D, &m, &nc)); 3990 PetscCall(MatDenseGetArrayRead(D, &a)); 3991 PetscCall(PetscMalloc1(m + 1, &ii)); 3992 PetscCall(PetscMalloc1(m * nc, &jj)); 3993 PetscCall(PetscMalloc1(m * nc, &aa)); 3994 ii[0] = 0; 3995 for (PetscInt k = 0; k < m; k++) { 3996 for (PetscInt s = 0; s < nc; s++) { 3997 const PetscInt c = s + k * nc; 3998 const PetscScalar v = a[k + s * m]; 3999 4000 if (PetscUnlikely(j[c] < 0 || v == zero)) continue; 4001 jj[nnz] = j[c]; 4002 aa[nnz] = a[k + s * m]; 4003 nnz++; 4004 } 4005 ii[k + 1] = nnz; 4006 } 4007 4008 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat)); 4009 PetscCall(MatDenseRestoreArrayRead(D, &a)); 4010 4011 aij = (Mat_SeqAIJ *)(*mat)->data; 4012 aij->free_a = PETSC_TRUE; 4013 aij->free_ij = PETSC_TRUE; 4014 PetscFunctionReturn(PETSC_SUCCESS); 4015 } 4016 4017 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */ 4018 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B) 4019 { 4020 PetscInt n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL; 4021 const PetscBool allowzeropivot = PETSC_FALSE; 4022 PetscBool zeropivotdetected = PETSC_FALSE; 4023 const PetscReal shift = 0.0; 4024 PetscInt ipvt[5], *ii, *jj, *indi, *indj; 4025 PetscScalar work[25], *v_work = NULL, *aa, *diag; 4026 PetscLogDouble flops = 0.0; 4027 4028 PetscFunctionBegin; 4029 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices"); 4030 for (PetscInt i = 0; i < nblocks; i++) { 4031 ncnt += bsizes[i]; 4032 ncnt2 += PetscSqr(bsizes[i]); 4033 } 4034 PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n); 4035 for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]); 4036 if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots)); 4037 4038 PetscCall(PetscMalloc1(n + 1, &ii)); 4039 PetscCall(PetscMalloc1(ncnt2, &jj)); 4040 PetscCall(PetscCalloc1(ncnt2, &aa)); 4041 4042 ncnt = 0; 4043 ii[0] = 0; 4044 indi = ii; 4045 indj = jj; 4046 diag = aa; 4047 for (PetscInt i = 0; i < nblocks; i++) { 4048 const PetscInt bs = bsizes[i]; 4049 4050 for (PetscInt k = 0; k < bs; k++) { 4051 indi[k + 1] = indi[k] + bs; 4052 for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j; 4053 } 4054 PetscCall(MatGetValues(A, bs, indj, bs, indj, diag)); 4055 switch (bs) { 4056 case 1: 4057 *diag = 1.0 / (*diag); 4058 break; 4059 case 2: 4060 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 4061 break; 4062 case 3: 4063 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 4064 break; 4065 case 4: 4066 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 4067 break; 4068 case 5: 4069 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 4070 break; 4071 case 6: 4072 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 4073 break; 4074 case 7: 4075 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 4076 break; 4077 default: 4078 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 4079 } 4080 ncnt += bs; 4081 flops += 2.0 * PetscPowInt(bs, 3) / 3.0; 4082 diag += bs * bs; 4083 indj += bs * bs; 4084 indi += bs; 4085 } 4086 PetscCall(PetscLogFlops(flops)); 4087 PetscCall(PetscFree2(v_work, v_pivots)); 4088 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B)); 4089 { 4090 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data; 4091 aij->free_a = PETSC_TRUE; 4092 aij->free_ij = PETSC_TRUE; 4093 } 4094 PetscFunctionReturn(PETSC_SUCCESS); 4095 } 4096 4097 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B) 4098 { 4099 const PetscScalar *rarr; 4100 PetscScalar *larr; 4101 PetscSF vsf; 4102 PetscInt n, rld, lld; 4103 4104 PetscFunctionBegin; 4105 PetscCall(MatGetSize(A, NULL, &n)); 4106 PetscCall(MatDenseGetLDA(A, &rld)); 4107 PetscCall(MatDenseGetLDA(B, &lld)); 4108 PetscCall(MatDenseGetArrayRead(A, &rarr)); 4109 PetscCall(MatDenseGetArrayWrite(B, &larr)); 4110 PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf)); 4111 PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4112 PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE)); 4113 PetscCall(MatDenseRestoreArrayRead(A, &rarr)); 4114 PetscCall(MatDenseRestoreArrayWrite(B, &larr)); 4115 PetscCall(PetscSFDestroy(&vsf)); 4116 PetscFunctionReturn(PETSC_SUCCESS); 4117 } 4118 4119 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat) 4120 { 4121 PC_IS *pcis = (PC_IS *)pc->data; 4122 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 4123 PCBDDCGraph graph = pcbddc->mat_graph; 4124 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4125 /* submatrices of local problem */ 4126 Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL; 4127 /* submatrices of local coarse problem */ 4128 Mat S_CV = NULL, S_VC = NULL, S_CC = NULL; 4129 /* working matrices */ 4130 Mat C_CR; 4131 4132 /* additional working stuff */ 4133 PC pc_R; 4134 IS is_R, is_V, is_C; 4135 const PetscInt *idx_V, *idx_C; 4136 Mat F, Brhs = NULL; 4137 Vec dummy_vec; 4138 PetscBool isLU, isCHOL, need_benign_correction, sparserhs; 4139 PetscInt *idx_V_B; 4140 PetscInt lda_rhs, n_vertices, n_constraints, *p0_lidx_I; 4141 PetscInt n_eff_vertices, n_eff_constraints; 4142 PetscInt i, n_R, n_D, n_B; 4143 PetscScalar one = 1.0, m_one = -1.0; 4144 4145 /* Multi-element support */ 4146 PetscBool multi_element = graph->multi_element; 4147 PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL; 4148 PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL; 4149 IS is_C_perm = NULL; 4150 PetscInt n_C_bss = 0, *C_bss = NULL; 4151 Mat coarse_phi_multi; 4152 4153 PetscFunctionBegin; 4154 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented"); 4155 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 4156 4157 /* Set Non-overlapping dimensions */ 4158 n_vertices = pcbddc->n_vertices; 4159 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 4160 n_B = pcis->n_B; 4161 n_D = pcis->n - n_B; 4162 n_R = pcis->n - n_vertices; 4163 4164 /* vertices in boundary numbering */ 4165 PetscCall(PetscMalloc1(n_vertices, &idx_V_B)); 4166 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B)); 4167 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i); 4168 4169 /* these two cases still need to be optimized */ 4170 if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE; 4171 4172 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 4173 if (multi_element) { 4174 PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 4175 4176 PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat)); 4177 PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size)); 4178 PetscCall(MatSetType(*coarse_submat, MATSEQAIJ)); 4179 PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE)); 4180 PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 4181 4182 /* group vertices and constraints by subdomain id */ 4183 const PetscInt *vidxs = pcbddc->primal_indices_local_idxs; 4184 const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices; 4185 PetscInt *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz; 4186 PetscInt n_el = PetscMax(graph->n_local_subs, 1); 4187 4188 PetscCall(PetscCalloc1(2 * n_el, &count_eff)); 4189 PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V)); 4190 PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C)); 4191 for (PetscInt i = 0; i < n_vertices; i++) { 4192 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4193 4194 V_to_eff_V[i] = count_eff[s]; 4195 count_eff[s] += 1; 4196 } 4197 for (PetscInt i = 0; i < n_constraints; i++) { 4198 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1; 4199 4200 C_to_eff_C[i] = count_eff[s]; 4201 count_eff[s] += 1; 4202 } 4203 4204 /* preallocation */ 4205 PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz)); 4206 for (PetscInt i = 0; i < n_vertices; i++) { 4207 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub; 4208 4209 nnz[i] = count_eff[s] + count_eff[s + 1]; 4210 } 4211 for (PetscInt i = 0; i < n_constraints; i++) { 4212 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub; 4213 4214 nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1]; 4215 } 4216 PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz)); 4217 PetscCall(PetscFree(nnz)); 4218 4219 n_eff_vertices = 0; 4220 n_eff_constraints = 0; 4221 for (PetscInt i = 0; i < n_el; i++) { 4222 n_eff_vertices = PetscMax(n_eff_vertices, count_eff[2 * i]); 4223 n_eff_constraints = PetscMax(n_eff_constraints, count_eff[2 * i + 1]); 4224 count_eff[2 * i] = 0; 4225 count_eff[2 * i + 1] = 0; 4226 } 4227 4228 const PetscInt *idx; 4229 PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C)); 4230 4231 for (PetscInt i = 0; i < n_vertices; i++) { 4232 const PetscInt e = graph->nodes[vidxs[i]].local_sub; 4233 const PetscInt s = 2 * e; 4234 4235 V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i; 4236 count_eff[s] += 1; 4237 } 4238 for (PetscInt i = 0; i < n_constraints; i++) { 4239 const PetscInt e = graph->nodes[cidxs[i]].local_sub; 4240 const PetscInt s = 2 * e + 1; 4241 4242 C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i; 4243 count_eff[s] += 1; 4244 } 4245 4246 PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J)); 4247 PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J)); 4248 PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J)); 4249 PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J)); 4250 for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1; 4251 for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1; 4252 for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1; 4253 for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1; 4254 4255 PetscCall(ISGetIndices(pcbddc->is_R_local, &idx)); 4256 for (PetscInt i = 0; i < n_R; i++) { 4257 const PetscInt e = graph->nodes[idx[i]].local_sub; 4258 const PetscInt s = 2 * e; 4259 PetscInt j; 4260 4261 for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4262 for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4263 } 4264 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx)); 4265 PetscCall(ISGetIndices(pcis->is_B_local, &idx)); 4266 for (PetscInt i = 0; i < n_B; i++) { 4267 const PetscInt e = graph->nodes[idx[i]].local_sub; 4268 const PetscInt s = 2 * e; 4269 PetscInt j; 4270 4271 for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j]; 4272 for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j]; 4273 } 4274 PetscCall(ISRestoreIndices(pcis->is_B_local, &idx)); 4275 4276 /* permutation and blocksizes for block invert of S_CC */ 4277 PetscInt *idxp; 4278 4279 PetscCall(PetscMalloc1(n_constraints, &idxp)); 4280 PetscCall(PetscMalloc1(n_el, &C_bss)); 4281 n_C_bss = 0; 4282 for (PetscInt e = 0, cnt = 0; e < n_el; e++) { 4283 const PetscInt nc = count_eff[2 * e + 1]; 4284 4285 if (nc) C_bss[n_C_bss++] = nc; 4286 for (PetscInt c = 0; c < nc; c++) { idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; } 4287 cnt += nc; 4288 } 4289 4290 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm)); 4291 4292 PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C)); 4293 PetscCall(PetscFree(count_eff)); 4294 } else { 4295 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat)); 4296 n_eff_constraints = n_constraints; 4297 n_eff_vertices = n_vertices; 4298 } 4299 4300 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 4301 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R)); 4302 PetscCall(PCSetUp(pc_R)); 4303 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU)); 4304 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL)); 4305 lda_rhs = n_R; 4306 need_benign_correction = PETSC_FALSE; 4307 if (isLU || isCHOL) { 4308 PetscCall(PCFactorGetMatrix(pc_R, &F)); 4309 } else if (sub_schurs && sub_schurs->reuse_solver) { 4310 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4311 MatFactorType type; 4312 4313 F = reuse_solver->F; 4314 PetscCall(MatGetFactorType(F, &type)); 4315 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 4316 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE; 4317 PetscCall(MatGetSize(F, &lda_rhs, NULL)); 4318 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 4319 } else F = NULL; 4320 4321 /* determine if we can use a sparse right-hand side */ 4322 sparserhs = PETSC_FALSE; 4323 if (F && !multi_element) { 4324 MatSolverType solver; 4325 4326 PetscCall(MatFactorGetSolverType(F, &solver)); 4327 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs)); 4328 } 4329 4330 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 4331 dummy_vec = NULL; 4332 if (need_benign_correction && lda_rhs != n_R && F) { 4333 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec)); 4334 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE)); 4335 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name)); 4336 } 4337 4338 PetscCall(MatDestroy(&pcbddc->local_auxmat1)); 4339 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4340 4341 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R)); 4342 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V)); 4343 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C)); 4344 PetscCall(ISGetIndices(is_V, &idx_V)); 4345 PetscCall(ISGetIndices(is_C, &idx_C)); 4346 4347 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 4348 if (n_constraints) { 4349 Mat C_B; 4350 4351 /* Extract constraints on R nodes: C_{CR} */ 4352 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR)); 4353 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 4354 4355 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 4356 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 4357 if (!sparserhs) { 4358 PetscScalar *marr; 4359 4360 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs)); 4361 PetscCall(MatDenseGetArrayWrite(Brhs, &marr)); 4362 for (i = 0; i < n_constraints; i++) { 4363 const PetscScalar *row_cmat_values; 4364 const PetscInt *row_cmat_indices; 4365 PetscInt size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i; 4366 4367 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4368 for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j]; 4369 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values)); 4370 } 4371 PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr)); 4372 } else { 4373 Mat tC_CR; 4374 4375 PetscCall(MatScale(C_CR, -1.0)); 4376 if (lda_rhs != n_R) { 4377 PetscScalar *aa; 4378 PetscInt r, *ii, *jj; 4379 PetscBool done; 4380 4381 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4382 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4383 PetscCall(MatSeqAIJGetArray(C_CR, &aa)); 4384 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR)); 4385 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4386 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4387 } else { 4388 PetscCall(PetscObjectReference((PetscObject)C_CR)); 4389 tC_CR = C_CR; 4390 } 4391 PetscCall(MatCreateTranspose(tC_CR, &Brhs)); 4392 PetscCall(MatDestroy(&tC_CR)); 4393 } 4394 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R)); 4395 if (F) { 4396 if (need_benign_correction) { 4397 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4398 4399 /* rhs is already zero on interior dofs, no need to change the rhs */ 4400 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n)); 4401 } 4402 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R)); 4403 if (need_benign_correction) { 4404 PetscScalar *marr; 4405 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4406 4407 /* XXX multi_element? */ 4408 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4409 if (lda_rhs != n_R) { 4410 for (i = 0; i < n_eff_constraints; i++) { 4411 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4412 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4413 PetscCall(VecResetArray(dummy_vec)); 4414 } 4415 } else { 4416 for (i = 0; i < n_eff_constraints; i++) { 4417 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4418 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4419 PetscCall(VecResetArray(pcbddc->vec1_R)); 4420 } 4421 } 4422 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4423 } 4424 } else { 4425 const PetscScalar *barr; 4426 PetscScalar *marr; 4427 4428 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4429 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr)); 4430 for (i = 0; i < n_eff_constraints; i++) { 4431 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4432 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4433 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4434 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4435 PetscCall(VecResetArray(pcbddc->vec1_R)); 4436 PetscCall(VecResetArray(pcbddc->vec2_R)); 4437 } 4438 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4439 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr)); 4440 } 4441 if (sparserhs) PetscCall(MatScale(C_CR, -1.0)); 4442 PetscCall(MatDestroy(&Brhs)); 4443 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */ 4444 if (!pcbddc->switch_static) { 4445 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2)); 4446 for (i = 0; i < n_eff_constraints; i++) { 4447 Vec r, b; 4448 PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r)); 4449 PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b)); 4450 PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4451 PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD)); 4452 PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b)); 4453 PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r)); 4454 } 4455 if (multi_element) { 4456 Mat T; 4457 4458 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4459 PetscCall(MatDestroy(&local_auxmat2_R)); 4460 local_auxmat2_R = T; 4461 PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T)); 4462 PetscCall(MatDestroy(&pcbddc->local_auxmat2)); 4463 pcbddc->local_auxmat2 = T; 4464 } 4465 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_CC)); 4466 } else { 4467 if (multi_element) { 4468 Mat T; 4469 4470 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T)); 4471 PetscCall(MatDestroy(&local_auxmat2_R)); 4472 local_auxmat2_R = T; 4473 } 4474 if (lda_rhs != n_R) { 4475 PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2)); 4476 } else { 4477 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R)); 4478 pcbddc->local_auxmat2 = local_auxmat2_R; 4479 } 4480 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_CC)); 4481 } 4482 PetscCall(MatScale(S_CC, m_one)); 4483 if (multi_element) { 4484 Mat T, T2; 4485 IS isp, ispi; 4486 4487 isp = is_C_perm; 4488 4489 PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi)); 4490 PetscCall(MatPermute(S_CC, isp, isp, &T)); 4491 PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2)); 4492 PetscCall(MatDestroy(&T)); 4493 PetscCall(MatDestroy(&S_CC)); 4494 PetscCall(MatPermute(T2, ispi, ispi, &S_CC)); 4495 PetscCall(MatDestroy(&T2)); 4496 PetscCall(ISDestroy(&ispi)); 4497 } else { 4498 if (isCHOL) { 4499 PetscCall(MatCholeskyFactor(S_CC, NULL, NULL)); 4500 } else { 4501 PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL)); 4502 } 4503 PetscCall(MatSeqDenseInvertFactors_Private(S_CC)); 4504 } 4505 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4506 PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1)); 4507 PetscCall(MatDestroy(&C_B)); 4508 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES)); 4509 } 4510 4511 /* Get submatrices from subdomain matrix */ 4512 if (n_vertices) { 4513 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4514 PetscBool oldpin; 4515 #endif 4516 IS is_aux; 4517 4518 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4519 IS tis; 4520 4521 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis)); 4522 PetscCall(ISSort(tis)); 4523 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux)); 4524 PetscCall(ISDestroy(&tis)); 4525 } else { 4526 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux)); 4527 } 4528 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4529 oldpin = pcbddc->local_mat->boundtocpu; 4530 #endif 4531 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE)); 4532 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV)); 4533 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR)); 4534 /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */ 4535 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4536 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV)); 4537 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 4538 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin)); 4539 #endif 4540 PetscCall(ISDestroy(&is_aux)); 4541 } 4542 PetscCall(ISDestroy(&is_C_perm)); 4543 PetscCall(PetscFree(C_bss)); 4544 4545 p0_lidx_I = NULL; 4546 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4547 const PetscInt *idxs; 4548 4549 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 4550 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I)); 4551 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i])); 4552 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 4553 } 4554 4555 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4556 4557 /* Matrices of coarse basis functions (local) */ 4558 PetscCall(MatDestroy(&pcbddc->coarse_phi_B)); 4559 PetscCall(MatDestroy(&pcbddc->coarse_psi_B)); 4560 PetscCall(MatDestroy(&pcbddc->coarse_phi_D)); 4561 PetscCall(MatDestroy(&pcbddc->coarse_psi_D)); 4562 if (!multi_element) { 4563 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B)); 4564 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D)); 4565 coarse_phi_multi = NULL; 4566 } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */ 4567 IS is_rows[2] = {pcbddc->is_R_local, NULL}; 4568 IS is_cols[2] = {is_V, is_C}; 4569 4570 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1])); 4571 PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi)); 4572 PetscCall(ISDestroy(&is_rows[1])); 4573 } 4574 4575 /* vertices */ 4576 if (n_vertices) { 4577 PetscBool restoreavr = PETSC_FALSE; 4578 Mat A_RRmA_RV = NULL; 4579 4580 PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4581 PetscCall(MatDestroy(&A_VV)); 4582 4583 if (n_R) { 4584 Mat A_RV_bcorr = NULL, S_VV; 4585 4586 PetscCall(MatScale(A_RV, m_one)); 4587 if (need_benign_correction) { 4588 ISLocalToGlobalMapping RtoN; 4589 IS is_p0; 4590 PetscInt *idxs_p0, n; 4591 4592 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0)); 4593 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN)); 4594 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0)); 4595 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n); 4596 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN)); 4597 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0)); 4598 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr)); 4599 PetscCall(ISDestroy(&is_p0)); 4600 } 4601 4602 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV)); 4603 if (!sparserhs || need_benign_correction) { 4604 if (lda_rhs == n_R && !multi_element) { 4605 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV)); 4606 } else { 4607 Mat T; 4608 PetscScalar *av, *array; 4609 const PetscInt *xadj, *adjncy; 4610 PetscInt n; 4611 PetscBool flg_row; 4612 4613 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T)); 4614 PetscCall(MatDenseGetArrayWrite(T, &array)); 4615 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV)); 4616 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4617 PetscCall(MatSeqAIJGetArray(A_RV, &av)); 4618 for (i = 0; i < n; i++) { 4619 PetscInt j; 4620 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j]; 4621 } 4622 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4623 PetscCall(MatDenseRestoreArrayWrite(T, &array)); 4624 PetscCall(MatDestroy(&A_RV)); 4625 A_RV = T; 4626 } 4627 if (need_benign_correction) { 4628 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4629 PetscScalar *marr; 4630 4631 /* XXX multi_element */ 4632 PetscCall(MatDenseGetArray(A_RV, &marr)); 4633 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4634 4635 | 0 0 0 | (V) 4636 L = | 0 0 -1 | (P-p0) 4637 | 0 0 -1 | (p0) 4638 4639 */ 4640 for (i = 0; i < reuse_solver->benign_n; i++) { 4641 const PetscScalar *vals; 4642 const PetscInt *idxs, *idxs_zero; 4643 PetscInt n, j, nz; 4644 4645 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4646 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4647 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4648 for (j = 0; j < n; j++) { 4649 PetscScalar val = vals[j]; 4650 PetscInt k, col = idxs[j]; 4651 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val; 4652 } 4653 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4654 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4655 } 4656 PetscCall(MatDenseRestoreArray(A_RV, &marr)); 4657 } 4658 PetscCall(PetscObjectReference((PetscObject)A_RV)); 4659 Brhs = A_RV; 4660 } else { 4661 Mat tA_RVT, A_RVT; 4662 4663 if (!pcbddc->symmetric_primal) { 4664 /* A_RV already scaled by -1 */ 4665 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT)); 4666 } else { 4667 restoreavr = PETSC_TRUE; 4668 PetscCall(MatScale(A_VR, -1.0)); 4669 PetscCall(PetscObjectReference((PetscObject)A_VR)); 4670 A_RVT = A_VR; 4671 } 4672 if (lda_rhs != n_R) { 4673 PetscScalar *aa; 4674 PetscInt r, *ii, *jj; 4675 PetscBool done; 4676 4677 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4678 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed"); 4679 PetscCall(MatSeqAIJGetArray(A_RVT, &aa)); 4680 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT)); 4681 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done)); 4682 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed"); 4683 } else { 4684 PetscCall(PetscObjectReference((PetscObject)A_RVT)); 4685 tA_RVT = A_RVT; 4686 } 4687 PetscCall(MatCreateTranspose(tA_RVT, &Brhs)); 4688 PetscCall(MatDestroy(&tA_RVT)); 4689 PetscCall(MatDestroy(&A_RVT)); 4690 } 4691 if (F) { 4692 /* need to correct the rhs */ 4693 if (need_benign_correction) { 4694 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4695 PetscScalar *marr; 4696 4697 PetscCall(MatDenseGetArray(Brhs, &marr)); 4698 if (lda_rhs != n_R) { 4699 for (i = 0; i < n_eff_vertices; i++) { 4700 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4701 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE)); 4702 PetscCall(VecResetArray(dummy_vec)); 4703 } 4704 } else { 4705 for (i = 0; i < n_eff_vertices; i++) { 4706 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4707 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE)); 4708 PetscCall(VecResetArray(pcbddc->vec1_R)); 4709 } 4710 } 4711 PetscCall(MatDenseRestoreArray(Brhs, &marr)); 4712 } 4713 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV)); 4714 if (restoreavr) PetscCall(MatScale(A_VR, -1.0)); 4715 /* need to correct the solution */ 4716 if (need_benign_correction) { 4717 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4718 PetscScalar *marr; 4719 4720 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4721 if (lda_rhs != n_R) { 4722 for (i = 0; i < n_eff_vertices; i++) { 4723 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs)); 4724 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE)); 4725 PetscCall(VecResetArray(dummy_vec)); 4726 } 4727 } else { 4728 for (i = 0; i < n_eff_vertices; i++) { 4729 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs)); 4730 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE)); 4731 PetscCall(VecResetArray(pcbddc->vec1_R)); 4732 } 4733 } 4734 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4735 } 4736 } else { 4737 const PetscScalar *barr; 4738 PetscScalar *marr; 4739 4740 PetscCall(MatDenseGetArrayRead(Brhs, &barr)); 4741 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr)); 4742 for (i = 0; i < n_eff_vertices; i++) { 4743 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs)); 4744 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs)); 4745 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 4746 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 4747 PetscCall(VecResetArray(pcbddc->vec1_R)); 4748 PetscCall(VecResetArray(pcbddc->vec2_R)); 4749 } 4750 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr)); 4751 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr)); 4752 } 4753 PetscCall(MatDestroy(&A_RV)); 4754 PetscCall(MatDestroy(&Brhs)); 4755 /* S_VV and S_CV */ 4756 if (n_constraints) { 4757 Mat B; 4758 4759 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B)); 4760 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B)); 4761 4762 /* S_CV = pcbddc->local_auxmat1 * B */ 4763 if (multi_element) { 4764 Mat T; 4765 4766 PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T)); 4767 PetscCall(MatDestroy(&B)); 4768 B = T; 4769 } 4770 PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV)); 4771 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB)); 4772 PetscCall(MatProductSetFromOptions(S_CV)); 4773 PetscCall(MatProductSymbolic(S_CV)); 4774 PetscCall(MatProductNumeric(S_CV)); 4775 PetscCall(MatProductClear(S_CV)); 4776 PetscCall(MatDestroy(&B)); 4777 4778 /* B = local_auxmat2_R * S_CV */ 4779 PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B)); 4780 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4781 PetscCall(MatProductSetFromOptions(B)); 4782 PetscCall(MatProductSymbolic(B)); 4783 PetscCall(MatProductNumeric(B)); 4784 4785 PetscCall(MatScale(S_CV, m_one)); 4786 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES)); 4787 4788 if (multi_element) { 4789 Mat T; 4790 4791 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4792 PetscCall(MatDestroy(&A_RRmA_RV)); 4793 A_RRmA_RV = T; 4794 } 4795 PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */ 4796 PetscCall(MatDestroy(&B)); 4797 } else if (multi_element) { 4798 Mat T; 4799 4800 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T)); 4801 PetscCall(MatDestroy(&A_RRmA_RV)); 4802 A_RRmA_RV = T; 4803 } 4804 4805 if (lda_rhs != n_R) { 4806 Mat T; 4807 4808 PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T)); 4809 PetscCall(MatDestroy(&A_RRmA_RV)); 4810 A_RRmA_RV = T; 4811 } 4812 4813 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4814 if (need_benign_correction) { /* XXX SPARSE */ 4815 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4816 PetscScalar *sums; 4817 const PetscScalar *marr; 4818 4819 PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr)); 4820 PetscCall(PetscMalloc1(n_vertices, &sums)); 4821 for (i = 0; i < reuse_solver->benign_n; i++) { 4822 const PetscScalar *vals; 4823 const PetscInt *idxs, *idxs_zero; 4824 PetscInt n, j, nz; 4825 4826 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz)); 4827 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4828 for (j = 0; j < n_vertices; j++) { 4829 sums[j] = 0.; 4830 for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R]; 4831 } 4832 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4833 for (j = 0; j < n; j++) { 4834 PetscScalar val = vals[j]; 4835 for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES)); 4836 } 4837 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals)); 4838 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero)); 4839 } 4840 PetscCall(PetscFree(sums)); 4841 PetscCall(MatDestroy(&A_RV_bcorr)); 4842 PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr)); 4843 } 4844 4845 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VV)); 4846 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES)); 4847 PetscCall(MatDestroy(&S_VV)); 4848 } 4849 4850 /* coarse basis functions */ 4851 if (coarse_phi_multi) { 4852 Mat Vid; 4853 4854 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid)); 4855 PetscCall(MatShift_Basic(Vid, 1.0)); 4856 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV)); 4857 PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid)); 4858 PetscCall(MatDestroy(&Vid)); 4859 } else { 4860 if (A_RRmA_RV) { 4861 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B)); 4862 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4863 PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D)); 4864 if (pcbddc->benign_n) { 4865 for (i = 0; i < n_vertices; i++) { PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); } 4866 } 4867 } 4868 } 4869 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES)); 4870 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 4871 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY)); 4872 } 4873 PetscCall(MatDestroy(&A_RRmA_RV)); 4874 } 4875 PetscCall(MatDestroy(&A_RV)); 4876 PetscCall(VecDestroy(&dummy_vec)); 4877 4878 if (n_constraints) { 4879 Mat B, B2; 4880 4881 PetscCall(MatScale(S_CC, m_one)); 4882 PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B)); 4883 PetscCall(MatProductSetType(B, MATPRODUCT_AB)); 4884 PetscCall(MatProductSetFromOptions(B)); 4885 PetscCall(MatProductSymbolic(B)); 4886 PetscCall(MatProductNumeric(B)); 4887 4888 if (n_vertices) { 4889 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4890 PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC)); 4891 } else { 4892 if (lda_rhs != n_R) { 4893 Mat tB; 4894 4895 PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB)); 4896 PetscCall(MatDestroy(&B)); 4897 B = tB; 4898 } 4899 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VC)); 4900 } 4901 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES)); 4902 } 4903 4904 /* coarse basis functions */ 4905 if (coarse_phi_multi) { 4906 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B)); 4907 } else { 4908 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 4909 PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2)); 4910 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2)); 4911 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4912 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2)); 4913 PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2)); 4914 if (pcbddc->benign_n) { 4915 for (i = 0; i < n_constraints; i++) { PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); } 4916 } 4917 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2)); 4918 } 4919 } 4920 PetscCall(MatDestroy(&B)); 4921 } 4922 4923 /* assemble sparse coarse basis functions */ 4924 if (coarse_phi_multi) { 4925 Mat T; 4926 4927 PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T)); 4928 PetscCall(MatDestroy(&coarse_phi_multi)); 4929 PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B)); 4930 if (pcbddc->switch_static || pcbddc->dbg_flag) { PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); } 4931 PetscCall(MatDestroy(&T)); 4932 } 4933 PetscCall(MatDestroy(&local_auxmat2_R)); 4934 PetscCall(PetscFree(p0_lidx_I)); 4935 4936 /* coarse matrix entries relative to B_0 */ 4937 if (pcbddc->benign_n) { 4938 Mat B0_B, B0_BPHI; 4939 IS is_dummy; 4940 const PetscScalar *data; 4941 PetscInt j; 4942 4943 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 4944 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 4945 PetscCall(ISDestroy(&is_dummy)); 4946 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 4947 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 4948 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data)); 4949 for (j = 0; j < pcbddc->benign_n; j++) { 4950 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4951 for (i = 0; i < pcbddc->local_primal_size; i++) { 4952 PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 4953 PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES)); 4954 } 4955 } 4956 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data)); 4957 PetscCall(MatDestroy(&B0_B)); 4958 PetscCall(MatDestroy(&B0_BPHI)); 4959 } 4960 4961 /* compute other basis functions for non-symmetric problems */ 4962 if (!pcbddc->symmetric_primal) { 4963 Mat B_V = NULL, B_C = NULL; 4964 PetscScalar *marray, *work; 4965 4966 /* TODO multi_element MatDenseScatter */ 4967 if (n_constraints) { 4968 Mat S_CCT, C_CRT; 4969 4970 PetscCall(MatScale(S_CC, m_one)); 4971 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT)); 4972 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT)); 4973 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C)); 4974 PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C)); 4975 PetscCall(MatDestroy(&S_CCT)); 4976 if (n_vertices) { 4977 Mat S_VCT; 4978 4979 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT)); 4980 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V)); 4981 PetscCall(MatDestroy(&S_VCT)); 4982 PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V)); 4983 } 4984 PetscCall(MatDestroy(&C_CRT)); 4985 } else { 4986 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V)); 4987 } 4988 if (n_vertices && n_R) { 4989 PetscScalar *av, *marray; 4990 const PetscInt *xadj, *adjncy; 4991 PetscInt n; 4992 PetscBool flg_row; 4993 4994 /* B_V = B_V - A_VR^T */ 4995 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR)); 4996 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 4997 PetscCall(MatSeqAIJGetArray(A_VR, &av)); 4998 PetscCall(MatDenseGetArray(B_V, &marray)); 4999 for (i = 0; i < n; i++) { 5000 PetscInt j; 5001 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j]; 5002 } 5003 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5004 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row)); 5005 PetscCall(MatDestroy(&A_VR)); 5006 } 5007 5008 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 5009 PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work)); 5010 if (n_vertices) { 5011 PetscCall(MatDenseGetArray(B_V, &marray)); 5012 for (i = 0; i < n_vertices; i++) { 5013 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R)); 5014 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5015 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5016 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5017 PetscCall(VecResetArray(pcbddc->vec1_R)); 5018 PetscCall(VecResetArray(pcbddc->vec2_R)); 5019 } 5020 PetscCall(MatDenseRestoreArray(B_V, &marray)); 5021 } 5022 if (B_C) { 5023 PetscCall(MatDenseGetArray(B_C, &marray)); 5024 for (i = n_vertices; i < n_constraints + n_vertices; i++) { 5025 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R)); 5026 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R)); 5027 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R)); 5028 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 5029 PetscCall(VecResetArray(pcbddc->vec1_R)); 5030 PetscCall(VecResetArray(pcbddc->vec2_R)); 5031 } 5032 PetscCall(MatDenseRestoreArray(B_C, &marray)); 5033 } 5034 /* coarse basis functions */ 5035 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B)); 5036 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D)); 5037 for (i = 0; i < pcbddc->local_primal_size; i++) { 5038 Vec v; 5039 5040 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R)); 5041 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v)); 5042 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5043 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5044 if (i < n_vertices) { 5045 PetscScalar one = 1.0; 5046 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES)); 5047 PetscCall(VecAssemblyBegin(v)); 5048 PetscCall(VecAssemblyEnd(v)); 5049 } 5050 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v)); 5051 5052 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5053 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v)); 5054 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5055 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD)); 5056 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v)); 5057 } 5058 PetscCall(VecResetArray(pcbddc->vec1_R)); 5059 } 5060 PetscCall(MatDestroy(&B_V)); 5061 PetscCall(MatDestroy(&B_C)); 5062 PetscCall(PetscFree(work)); 5063 } else { 5064 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B)); 5065 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 5066 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D)); 5067 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 5068 } 5069 PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5070 PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY)); 5071 5072 /* free memory */ 5073 PetscCall(PetscFree(V_to_eff_V)); 5074 PetscCall(PetscFree(C_to_eff_C)); 5075 PetscCall(PetscFree(R_eff_V_J)); 5076 PetscCall(PetscFree(R_eff_C_J)); 5077 PetscCall(PetscFree(B_eff_V_J)); 5078 PetscCall(PetscFree(B_eff_C_J)); 5079 PetscCall(ISDestroy(&is_R)); 5080 PetscCall(ISRestoreIndices(is_V, &idx_V)); 5081 PetscCall(ISRestoreIndices(is_C, &idx_C)); 5082 PetscCall(ISDestroy(&is_V)); 5083 PetscCall(ISDestroy(&is_C)); 5084 PetscCall(PetscFree(idx_V_B)); 5085 PetscCall(MatDestroy(&S_CV)); 5086 PetscCall(MatDestroy(&S_VC)); 5087 PetscCall(MatDestroy(&S_CC)); 5088 if (n_vertices) PetscCall(MatDestroy(&A_VR)); 5089 if (n_constraints) PetscCall(MatDestroy(&C_CR)); 5090 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0)); 5091 5092 /* Checking coarse_sub_mat and coarse basis functions */ 5093 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5094 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 5095 if (pcbddc->dbg_flag) { 5096 Mat AUXMAT, TM1, TM2, TM3, TM4; 5097 Mat coarse_phi_D, coarse_phi_B; 5098 Mat coarse_psi_D, coarse_psi_B; 5099 Mat A_II, A_BB, A_IB, A_BI; 5100 Mat C_B, CPHI; 5101 IS is_dummy; 5102 Vec mones; 5103 MatType checkmattype = MATSEQAIJ; 5104 PetscReal real_value; 5105 5106 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5107 Mat A; 5108 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A)); 5109 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II)); 5110 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB)); 5111 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI)); 5112 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB)); 5113 PetscCall(MatDestroy(&A)); 5114 } else { 5115 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II)); 5116 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB)); 5117 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI)); 5118 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB)); 5119 } 5120 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D)); 5121 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B)); 5122 if (!pcbddc->symmetric_primal) { 5123 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D)); 5124 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B)); 5125 } 5126 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5127 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal)); 5128 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5129 if (!pcbddc->symmetric_primal) { 5130 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5131 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5132 PetscCall(MatDestroy(&AUXMAT)); 5133 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5134 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5135 PetscCall(MatDestroy(&AUXMAT)); 5136 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5137 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5138 PetscCall(MatDestroy(&AUXMAT)); 5139 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5140 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5141 PetscCall(MatDestroy(&AUXMAT)); 5142 } else { 5143 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1)); 5144 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2)); 5145 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5146 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3)); 5147 PetscCall(MatDestroy(&AUXMAT)); 5148 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT)); 5149 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4)); 5150 PetscCall(MatDestroy(&AUXMAT)); 5151 } 5152 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN)); 5153 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN)); 5154 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN)); 5155 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1)); 5156 if (pcbddc->benign_n) { 5157 Mat B0_B, B0_BPHI; 5158 const PetscScalar *data2; 5159 PetscScalar *data; 5160 PetscInt j; 5161 5162 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 5163 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 5164 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI)); 5165 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI)); 5166 PetscCall(MatDenseGetArray(TM1, &data)); 5167 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2)); 5168 for (j = 0; j < pcbddc->benign_n; j++) { 5169 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 5170 for (i = 0; i < pcbddc->local_primal_size; i++) { 5171 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j]; 5172 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j]; 5173 } 5174 } 5175 PetscCall(MatDenseRestoreArray(TM1, &data)); 5176 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2)); 5177 PetscCall(MatDestroy(&B0_B)); 5178 PetscCall(ISDestroy(&is_dummy)); 5179 PetscCall(MatDestroy(&B0_BPHI)); 5180 } 5181 PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN)); 5182 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value)); 5183 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5184 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5185 5186 /* check constraints */ 5187 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy)); 5188 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B)); 5189 if (!pcbddc->benign_n) { /* TODO: add benign case */ 5190 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5191 } else { 5192 PetscScalar *data; 5193 Mat tmat; 5194 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data)); 5195 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat)); 5196 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data)); 5197 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI)); 5198 PetscCall(MatDestroy(&tmat)); 5199 } 5200 PetscCall(MatCreateVecs(CPHI, &mones, NULL)); 5201 PetscCall(VecSet(mones, -1.0)); 5202 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5203 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5204 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5205 if (!pcbddc->symmetric_primal) { 5206 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI)); 5207 PetscCall(VecSet(mones, -1.0)); 5208 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES)); 5209 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value)); 5210 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value)); 5211 } 5212 PetscCall(MatDestroy(&C_B)); 5213 PetscCall(MatDestroy(&CPHI)); 5214 PetscCall(ISDestroy(&is_dummy)); 5215 PetscCall(VecDestroy(&mones)); 5216 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5217 PetscCall(MatDestroy(&A_II)); 5218 PetscCall(MatDestroy(&A_BB)); 5219 PetscCall(MatDestroy(&A_IB)); 5220 PetscCall(MatDestroy(&A_BI)); 5221 PetscCall(MatDestroy(&TM1)); 5222 PetscCall(MatDestroy(&TM2)); 5223 PetscCall(MatDestroy(&TM3)); 5224 PetscCall(MatDestroy(&TM4)); 5225 PetscCall(MatDestroy(&coarse_phi_D)); 5226 PetscCall(MatDestroy(&coarse_phi_B)); 5227 if (!pcbddc->symmetric_primal) { 5228 PetscCall(MatDestroy(&coarse_psi_D)); 5229 PetscCall(MatDestroy(&coarse_psi_B)); 5230 } 5231 } 5232 5233 #if 0 5234 { 5235 PetscViewer viewer; 5236 char filename[256]; 5237 5238 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level)); 5239 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer)); 5240 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 5241 PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat")); 5242 PetscCall(MatView(*coarse_submat,viewer)); 5243 if (pcbddc->coarse_phi_B) { 5244 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B")); 5245 PetscCall(MatView(pcbddc->coarse_phi_B,viewer)); 5246 } 5247 if (pcbddc->coarse_phi_D) { 5248 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D")); 5249 PetscCall(MatView(pcbddc->coarse_phi_D,viewer)); 5250 } 5251 if (pcbddc->coarse_psi_B) { 5252 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B")); 5253 PetscCall(MatView(pcbddc->coarse_psi_B,viewer)); 5254 } 5255 if (pcbddc->coarse_psi_D) { 5256 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D")); 5257 PetscCall(MatView(pcbddc->coarse_psi_D,viewer)); 5258 } 5259 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A")); 5260 PetscCall(MatView(pcbddc->local_mat,viewer)); 5261 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C")); 5262 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer)); 5263 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I")); 5264 PetscCall(ISView(pcis->is_I_local,viewer)); 5265 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B")); 5266 PetscCall(ISView(pcis->is_B_local,viewer)); 5267 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R")); 5268 PetscCall(ISView(pcbddc->is_R_local,viewer)); 5269 PetscCall(PetscOptionsRestoreViewer(&viewer)); 5270 } 5271 #endif 5272 5273 /* device support */ 5274 { 5275 PetscBool iscuda, iship, iskokkos; 5276 MatType mtype = NULL; 5277 5278 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, "")); 5279 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, "")); 5280 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, "")); 5281 if (iskokkos) { 5282 if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE; 5283 else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE; 5284 } 5285 if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP); 5286 else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP; 5287 else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA; 5288 if (mtype) { 5289 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1)); 5290 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2)); 5291 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B)); 5292 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D)); 5293 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B)); 5294 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D)); 5295 } 5296 } 5297 PetscFunctionReturn(PETSC_SUCCESS); 5298 } 5299 5300 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B) 5301 { 5302 Mat *work_mat; 5303 IS isrow_s, iscol_s; 5304 PetscBool rsorted, csorted; 5305 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL; 5306 5307 PetscFunctionBegin; 5308 PetscCall(ISSorted(isrow, &rsorted)); 5309 PetscCall(ISSorted(iscol, &csorted)); 5310 PetscCall(ISGetLocalSize(isrow, &rsize)); 5311 PetscCall(ISGetLocalSize(iscol, &csize)); 5312 5313 if (!rsorted) { 5314 const PetscInt *idxs; 5315 PetscInt *idxs_sorted, i; 5316 5317 PetscCall(PetscMalloc1(rsize, &idxs_perm_r)); 5318 PetscCall(PetscMalloc1(rsize, &idxs_sorted)); 5319 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i; 5320 PetscCall(ISGetIndices(isrow, &idxs)); 5321 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r)); 5322 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]]; 5323 PetscCall(ISRestoreIndices(isrow, &idxs)); 5324 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s)); 5325 } else { 5326 PetscCall(PetscObjectReference((PetscObject)isrow)); 5327 isrow_s = isrow; 5328 } 5329 5330 if (!csorted) { 5331 if (isrow == iscol) { 5332 PetscCall(PetscObjectReference((PetscObject)isrow_s)); 5333 iscol_s = isrow_s; 5334 } else { 5335 const PetscInt *idxs; 5336 PetscInt *idxs_sorted, i; 5337 5338 PetscCall(PetscMalloc1(csize, &idxs_perm_c)); 5339 PetscCall(PetscMalloc1(csize, &idxs_sorted)); 5340 for (i = 0; i < csize; i++) idxs_perm_c[i] = i; 5341 PetscCall(ISGetIndices(iscol, &idxs)); 5342 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c)); 5343 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]]; 5344 PetscCall(ISRestoreIndices(iscol, &idxs)); 5345 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s)); 5346 } 5347 } else { 5348 PetscCall(PetscObjectReference((PetscObject)iscol)); 5349 iscol_s = iscol; 5350 } 5351 5352 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat)); 5353 5354 if (!rsorted || !csorted) { 5355 Mat new_mat; 5356 IS is_perm_r, is_perm_c; 5357 5358 if (!rsorted) { 5359 PetscInt *idxs_r, i; 5360 PetscCall(PetscMalloc1(rsize, &idxs_r)); 5361 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i; 5362 PetscCall(PetscFree(idxs_perm_r)); 5363 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r)); 5364 } else { 5365 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r)); 5366 } 5367 PetscCall(ISSetPermutation(is_perm_r)); 5368 5369 if (!csorted) { 5370 if (isrow_s == iscol_s) { 5371 PetscCall(PetscObjectReference((PetscObject)is_perm_r)); 5372 is_perm_c = is_perm_r; 5373 } else { 5374 PetscInt *idxs_c, i; 5375 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present"); 5376 PetscCall(PetscMalloc1(csize, &idxs_c)); 5377 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i; 5378 PetscCall(PetscFree(idxs_perm_c)); 5379 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c)); 5380 } 5381 } else { 5382 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c)); 5383 } 5384 PetscCall(ISSetPermutation(is_perm_c)); 5385 5386 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat)); 5387 PetscCall(MatDestroy(&work_mat[0])); 5388 work_mat[0] = new_mat; 5389 PetscCall(ISDestroy(&is_perm_r)); 5390 PetscCall(ISDestroy(&is_perm_c)); 5391 } 5392 5393 PetscCall(PetscObjectReference((PetscObject)work_mat[0])); 5394 *B = work_mat[0]; 5395 PetscCall(MatDestroyMatrices(1, &work_mat)); 5396 PetscCall(ISDestroy(&isrow_s)); 5397 PetscCall(ISDestroy(&iscol_s)); 5398 PetscFunctionReturn(PETSC_SUCCESS); 5399 } 5400 5401 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 5402 { 5403 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5404 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5405 Mat new_mat, lA; 5406 IS is_local, is_global; 5407 PetscInt local_size; 5408 PetscBool isseqaij, issym, isset; 5409 5410 PetscFunctionBegin; 5411 PetscCall(MatDestroy(&pcbddc->local_mat)); 5412 PetscCall(MatGetSize(matis->A, &local_size, NULL)); 5413 if (pcbddc->mat_graph->multi_element) { 5414 Mat *mats, *bdiags; 5415 IS *gsubs; 5416 PetscInt nsubs = pcbddc->n_local_subs; 5417 5418 PetscCall(PetscCalloc1(nsubs * nsubs, &mats)); 5419 PetscCall(PetscMalloc1(nsubs, &gsubs)); 5420 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i])); 5421 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags)); 5422 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i])); 5423 PetscCall(PetscFree(gsubs)); 5424 5425 for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i]; 5426 PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat)); 5427 PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat)); 5428 PetscCall(MatDestroySubMatrices(nsubs, &bdiags)); 5429 PetscCall(PetscFree(mats)); 5430 } else { 5431 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local)); 5432 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global)); 5433 PetscCall(ISDestroy(&is_local)); 5434 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat)); 5435 PetscCall(ISDestroy(&is_global)); 5436 } 5437 if (pcbddc->dbg_flag) { 5438 Vec x, x_change; 5439 PetscReal error; 5440 5441 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change)); 5442 PetscCall(VecSetRandom(x, NULL)); 5443 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change)); 5444 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5445 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD)); 5446 PetscCall(MatMult(new_mat, matis->x, matis->y)); 5447 if (!pcbddc->change_interior) { 5448 const PetscScalar *x, *y, *v; 5449 PetscReal lerror = 0.; 5450 PetscInt i; 5451 5452 PetscCall(VecGetArrayRead(matis->x, &x)); 5453 PetscCall(VecGetArrayRead(matis->y, &y)); 5454 PetscCall(VecGetArrayRead(matis->counter, &v)); 5455 for (i = 0; i < local_size; i++) 5456 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]); 5457 PetscCall(VecRestoreArrayRead(matis->x, &x)); 5458 PetscCall(VecRestoreArrayRead(matis->y, &y)); 5459 PetscCall(VecRestoreArrayRead(matis->counter, &v)); 5460 PetscCall(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc))); 5461 if (error > PETSC_SMALL) { 5462 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5463 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error); 5464 } else { 5465 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error); 5466 } 5467 } 5468 } 5469 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5470 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE)); 5471 PetscCall(VecAXPY(x, -1.0, x_change)); 5472 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 5473 if (error > PETSC_SMALL) { 5474 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5475 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 5476 } else { 5477 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error); 5478 } 5479 } 5480 PetscCall(VecDestroy(&x)); 5481 PetscCall(VecDestroy(&x_change)); 5482 } 5483 5484 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5485 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA)); 5486 5487 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5488 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij)); 5489 if (isseqaij) { 5490 PetscCall(MatDestroy(&pcbddc->local_mat)); 5491 PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5492 if (lA) { 5493 Mat work; 5494 PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5495 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5496 PetscCall(MatDestroy(&work)); 5497 } 5498 } else { 5499 Mat work_mat; 5500 5501 PetscCall(MatDestroy(&pcbddc->local_mat)); 5502 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5503 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat)); 5504 PetscCall(MatDestroy(&work_mat)); 5505 if (lA) { 5506 Mat work; 5507 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat)); 5508 PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work)); 5509 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work)); 5510 PetscCall(MatDestroy(&work)); 5511 } 5512 } 5513 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym)); 5514 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym)); 5515 PetscCall(MatDestroy(&new_mat)); 5516 PetscFunctionReturn(PETSC_SUCCESS); 5517 } 5518 5519 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5520 { 5521 PC_IS *pcis = (PC_IS *)pc->data; 5522 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5523 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5524 PetscInt *idx_R_local = NULL; 5525 PetscInt n_vertices, i, j, n_R, n_D, n_B; 5526 PetscInt vbs, bs; 5527 PetscBT bitmask = NULL; 5528 5529 PetscFunctionBegin; 5530 /* 5531 No need to setup local scatters if 5532 - primal space is unchanged 5533 AND 5534 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5535 AND 5536 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5537 */ 5538 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS); 5539 /* destroy old objects */ 5540 PetscCall(ISDestroy(&pcbddc->is_R_local)); 5541 PetscCall(VecScatterDestroy(&pcbddc->R_to_B)); 5542 PetscCall(VecScatterDestroy(&pcbddc->R_to_D)); 5543 /* Set Non-overlapping dimensions */ 5544 n_B = pcis->n_B; 5545 n_D = pcis->n - n_B; 5546 n_vertices = pcbddc->n_vertices; 5547 5548 /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5549 5550 /* create auxiliary bitmask and allocate workspace */ 5551 if (!sub_schurs || !sub_schurs->reuse_solver) { 5552 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local)); 5553 PetscCall(PetscBTCreate(pcis->n, &bitmask)); 5554 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i])); 5555 5556 for (i = 0, n_R = 0; i < pcis->n; i++) { 5557 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i; 5558 } 5559 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5560 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5561 5562 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5563 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R)); 5564 } 5565 5566 /* Block code */ 5567 vbs = 1; 5568 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs)); 5569 if (bs > 1 && !(n_vertices % bs)) { 5570 PetscBool is_blocked = PETSC_TRUE; 5571 PetscInt *vary; 5572 if (!sub_schurs || !sub_schurs->reuse_solver) { 5573 PetscCall(PetscMalloc1(pcis->n / bs, &vary)); 5574 PetscCall(PetscArrayzero(vary, pcis->n / bs)); 5575 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5576 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5577 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++; 5578 for (i = 0; i < pcis->n / bs; i++) { 5579 if (vary[i] != 0 && vary[i] != bs) { 5580 is_blocked = PETSC_FALSE; 5581 break; 5582 } 5583 } 5584 PetscCall(PetscFree(vary)); 5585 } else { 5586 /* Verify directly the R set */ 5587 for (i = 0; i < n_R / bs; i++) { 5588 PetscInt j, node = idx_R_local[bs * i]; 5589 for (j = 1; j < bs; j++) { 5590 if (node != idx_R_local[bs * i + j] - j) { 5591 is_blocked = PETSC_FALSE; 5592 break; 5593 } 5594 } 5595 } 5596 } 5597 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5598 vbs = bs; 5599 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs; 5600 } 5601 } 5602 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local)); 5603 if (sub_schurs && sub_schurs->reuse_solver) { 5604 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5605 5606 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local)); 5607 PetscCall(ISDestroy(&reuse_solver->is_R)); 5608 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local)); 5609 reuse_solver->is_R = pcbddc->is_R_local; 5610 } else { 5611 PetscCall(PetscFree(idx_R_local)); 5612 } 5613 5614 /* print some info if requested */ 5615 if (pcbddc->dbg_flag) { 5616 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 5617 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5618 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 5619 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank)); 5620 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B)); 5621 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices, 5622 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size)); 5623 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 5624 } 5625 5626 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5627 if (!sub_schurs || !sub_schurs->reuse_solver) { 5628 IS is_aux1, is_aux2; 5629 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local; 5630 5631 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5632 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1)); 5633 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2)); 5634 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5635 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i])); 5636 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices)); 5637 for (i = 0, j = 0; i < n_R; i++) { 5638 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5639 } 5640 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5641 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5642 for (i = 0, j = 0; i < n_B; i++) { 5643 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i; 5644 } 5645 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices)); 5646 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2)); 5647 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B)); 5648 PetscCall(ISDestroy(&is_aux1)); 5649 PetscCall(ISDestroy(&is_aux2)); 5650 5651 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5652 PetscCall(PetscMalloc1(n_D, &aux_array1)); 5653 for (i = 0, j = 0; i < n_R; i++) { 5654 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i; 5655 } 5656 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1)); 5657 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5658 PetscCall(ISDestroy(&is_aux1)); 5659 } 5660 PetscCall(PetscBTDestroy(&bitmask)); 5661 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local)); 5662 } else { 5663 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5664 IS tis; 5665 PetscInt schur_size; 5666 5667 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size)); 5668 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis)); 5669 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B)); 5670 PetscCall(ISDestroy(&tis)); 5671 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5672 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis)); 5673 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D)); 5674 PetscCall(ISDestroy(&tis)); 5675 } 5676 } 5677 PetscFunctionReturn(PETSC_SUCCESS); 5678 } 5679 5680 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B) 5681 { 5682 MatNullSpace NullSpace; 5683 Mat dmat; 5684 const Vec *nullvecs; 5685 Vec v, v2, *nullvecs2; 5686 VecScatter sct = NULL; 5687 PetscContainer c; 5688 PetscScalar *ddata; 5689 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs; 5690 PetscBool nnsp_has_cnst; 5691 5692 PetscFunctionBegin; 5693 if (!is && !B) { /* MATIS */ 5694 Mat_IS *matis = (Mat_IS *)A->data; 5695 5696 if (!B) PetscCall(MatISGetLocalMat(A, &B)); 5697 sct = matis->cctx; 5698 PetscCall(PetscObjectReference((PetscObject)sct)); 5699 } else { 5700 PetscCall(MatGetNullSpace(B, &NullSpace)); 5701 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace)); 5702 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5703 } 5704 PetscCall(MatGetNullSpace(A, &NullSpace)); 5705 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace)); 5706 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS); 5707 5708 PetscCall(MatCreateVecs(A, &v, NULL)); 5709 PetscCall(MatCreateVecs(B, &v2, NULL)); 5710 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct)); 5711 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs)); 5712 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst; 5713 PetscCall(PetscMalloc1(bsiz, &nullvecs2)); 5714 PetscCall(VecGetBlockSize(v2, &bs)); 5715 PetscCall(VecGetSize(v2, &N)); 5716 PetscCall(VecGetLocalSize(v2, &n)); 5717 PetscCall(PetscMalloc1(n * bsiz, &ddata)); 5718 for (k = 0; k < nnsp_size; k++) { 5719 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k])); 5720 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5721 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD)); 5722 } 5723 if (nnsp_has_cnst) { 5724 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size])); 5725 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0)); 5726 } 5727 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2)); 5728 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace)); 5729 5730 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat)); 5731 PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c)); 5732 PetscCall(PetscContainerSetPointer(c, ddata)); 5733 PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault)); 5734 PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c)); 5735 PetscCall(PetscContainerDestroy(&c)); 5736 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat)); 5737 PetscCall(MatDestroy(&dmat)); 5738 5739 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k])); 5740 PetscCall(PetscFree(nullvecs2)); 5741 PetscCall(MatSetNearNullSpace(B, NullSpace)); 5742 PetscCall(MatNullSpaceDestroy(&NullSpace)); 5743 PetscCall(VecDestroy(&v)); 5744 PetscCall(VecDestroy(&v2)); 5745 PetscCall(VecScatterDestroy(&sct)); 5746 PetscFunctionReturn(PETSC_SUCCESS); 5747 } 5748 5749 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5750 { 5751 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 5752 PC_IS *pcis = (PC_IS *)pc->data; 5753 PC pc_temp; 5754 Mat A_RR; 5755 MatNullSpace nnsp; 5756 MatReuse reuse; 5757 PetscScalar m_one = -1.0; 5758 PetscReal value; 5759 PetscInt n_D, n_R; 5760 PetscBool issbaij, opts, isset, issym; 5761 void (*f)(void) = NULL; 5762 char dir_prefix[256], neu_prefix[256], str_level[16]; 5763 size_t len; 5764 5765 PetscFunctionBegin; 5766 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 5767 /* approximate solver, propagate NearNullSpace if needed */ 5768 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) { 5769 MatNullSpace gnnsp1, gnnsp2; 5770 PetscBool lhas, ghas; 5771 5772 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp)); 5773 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1)); 5774 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2)); 5775 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE; 5776 PetscCall(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 5777 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL)); 5778 } 5779 5780 /* compute prefixes */ 5781 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix))); 5782 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix))); 5783 if (!pcbddc->current_level) { 5784 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix))); 5785 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix))); 5786 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5787 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5788 } else { 5789 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)pcbddc->current_level)); 5790 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 5791 len -= 15; /* remove "pc_bddc_coarse_" */ 5792 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 5793 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 5794 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5795 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1)); 5796 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1)); 5797 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix))); 5798 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix))); 5799 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix))); 5800 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix))); 5801 } 5802 5803 /* DIRICHLET PROBLEM */ 5804 if (dirichlet) { 5805 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5806 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5807 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented"); 5808 if (pcbddc->dbg_flag) { 5809 Mat A_IIn; 5810 5811 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn)); 5812 PetscCall(MatDestroy(&pcis->A_II)); 5813 pcis->A_II = A_IIn; 5814 } 5815 } 5816 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5817 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym)); 5818 5819 /* Matrix for Dirichlet problem is pcis->A_II */ 5820 n_D = pcis->n - pcis->n_B; 5821 opts = PETSC_FALSE; 5822 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5823 opts = PETSC_TRUE; 5824 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D)); 5825 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel)); 5826 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1)); 5827 /* default */ 5828 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY)); 5829 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix)); 5830 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij)); 5831 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5832 if (issbaij) { 5833 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5834 } else { 5835 PetscCall(PCSetType(pc_temp, PCLU)); 5836 } 5837 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure)); 5838 } 5839 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix)); 5840 PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view")); 5841 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II)); 5842 /* Allow user's customization */ 5843 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D)); 5844 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5845 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5846 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II)); 5847 } 5848 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp)); 5849 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5850 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5851 if (f && pcbddc->mat_graph->cloc && !nnsp) { 5852 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 5853 const PetscInt *idxs; 5854 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 5855 5856 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl)); 5857 PetscCall(ISGetIndices(pcis->is_I_local, &idxs)); 5858 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 5859 for (i = 0; i < nl; i++) { 5860 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 5861 } 5862 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs)); 5863 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 5864 PetscCall(PetscFree(scoords)); 5865 } 5866 if (sub_schurs && sub_schurs->reuse_solver) { 5867 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5868 5869 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver)); 5870 } 5871 5872 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5873 if (!n_D) { 5874 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp)); 5875 PetscCall(PCSetType(pc_temp, PCNONE)); 5876 } 5877 PetscCall(KSPSetUp(pcbddc->ksp_D)); 5878 /* set ksp_D into pcis data */ 5879 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D)); 5880 PetscCall(KSPDestroy(&pcis->ksp_D)); 5881 pcis->ksp_D = pcbddc->ksp_D; 5882 } 5883 5884 /* NEUMANN PROBLEM */ 5885 A_RR = NULL; 5886 if (neumann) { 5887 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5888 PetscInt ibs, mbs; 5889 PetscBool issbaij, reuse_neumann_solver, isset, issym; 5890 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 5891 5892 reuse_neumann_solver = PETSC_FALSE; 5893 if (sub_schurs && sub_schurs->reuse_solver) { 5894 IS iP; 5895 5896 reuse_neumann_solver = PETSC_TRUE; 5897 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP)); 5898 if (iP) reuse_neumann_solver = PETSC_FALSE; 5899 } 5900 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5901 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R)); 5902 if (pcbddc->ksp_R) { /* already created ksp */ 5903 PetscInt nn_R; 5904 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR)); 5905 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5906 PetscCall(MatGetSize(A_RR, &nn_R, NULL)); 5907 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5908 PetscCall(KSPReset(pcbddc->ksp_R)); 5909 PetscCall(MatDestroy(&A_RR)); 5910 reuse = MAT_INITIAL_MATRIX; 5911 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5912 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5913 PetscCall(MatDestroy(&A_RR)); 5914 reuse = MAT_INITIAL_MATRIX; 5915 } else { /* safe to reuse the matrix */ 5916 reuse = MAT_REUSE_MATRIX; 5917 } 5918 } 5919 /* last check */ 5920 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5921 PetscCall(MatDestroy(&A_RR)); 5922 reuse = MAT_INITIAL_MATRIX; 5923 } 5924 } else { /* first time, so we need to create the matrix */ 5925 reuse = MAT_INITIAL_MATRIX; 5926 } 5927 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection 5928 TODO: Get Rid of these conversions */ 5929 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs)); 5930 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs)); 5931 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij)); 5932 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5933 if (matis->A == pcbddc->local_mat) { 5934 PetscCall(MatDestroy(&pcbddc->local_mat)); 5935 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5936 } else { 5937 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5938 } 5939 } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */ 5940 if (matis->A == pcbddc->local_mat) { 5941 PetscCall(MatDestroy(&pcbddc->local_mat)); 5942 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat)); 5943 } else { 5944 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat)); 5945 } 5946 } 5947 /* extract A_RR */ 5948 if (reuse_neumann_solver) { 5949 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5950 5951 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5952 PetscCall(MatDestroy(&A_RR)); 5953 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5954 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR)); 5955 } else { 5956 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR)); 5957 } 5958 } else { 5959 PetscCall(MatDestroy(&A_RR)); 5960 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL)); 5961 PetscCall(PetscObjectReference((PetscObject)A_RR)); 5962 } 5963 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5964 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR)); 5965 } 5966 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym)); 5967 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym)); 5968 opts = PETSC_FALSE; 5969 if (!pcbddc->ksp_R) { /* create object if not present */ 5970 opts = PETSC_TRUE; 5971 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R)); 5972 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel)); 5973 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1)); 5974 /* default */ 5975 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY)); 5976 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix)); 5977 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5978 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij)); 5979 if (issbaij) { 5980 PetscCall(PCSetType(pc_temp, PCCHOLESKY)); 5981 } else { 5982 PetscCall(PCSetType(pc_temp, PCLU)); 5983 } 5984 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure)); 5985 } 5986 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix)); 5987 PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view")); 5988 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR)); 5989 if (opts) { /* Allow user's customization once */ 5990 PetscCall(KSPSetFromOptions(pcbddc->ksp_R)); 5991 } 5992 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5993 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */ 5994 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR)); 5995 } 5996 PetscCall(MatGetNearNullSpace(A_RR, &nnsp)); 5997 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 5998 PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f)); 5999 if (f && pcbddc->mat_graph->cloc && !nnsp) { 6000 PetscReal *coords = pcbddc->mat_graph->coords, *scoords; 6001 const PetscInt *idxs; 6002 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d; 6003 6004 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl)); 6005 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs)); 6006 PetscCall(PetscMalloc1(nl * cdim, &scoords)); 6007 for (i = 0; i < nl; i++) { 6008 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d]; 6009 } 6010 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs)); 6011 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords)); 6012 PetscCall(PetscFree(scoords)); 6013 } 6014 6015 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 6016 if (!n_R) { 6017 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp)); 6018 PetscCall(PCSetType(pc_temp, PCNONE)); 6019 } 6020 /* Reuse solver if it is present */ 6021 if (reuse_neumann_solver) { 6022 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6023 6024 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver)); 6025 } 6026 PetscCall(KSPSetUp(pcbddc->ksp_R)); 6027 } 6028 6029 if (pcbddc->dbg_flag) { 6030 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6031 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6032 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 6033 } 6034 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0)); 6035 6036 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 6037 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE)); 6038 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1])); 6039 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3])); 6040 /* check Dirichlet and Neumann solvers */ 6041 if (pcbddc->dbg_flag) { 6042 if (dirichlet) { /* Dirichlet */ 6043 PetscCall(VecSetRandom(pcis->vec1_D, NULL)); 6044 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D)); 6045 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D)); 6046 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D)); 6047 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D)); 6048 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value)); 6049 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value)); 6050 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6051 } 6052 if (neumann) { /* Neumann */ 6053 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL)); 6054 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R)); 6055 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R)); 6056 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R)); 6057 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R)); 6058 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value)); 6059 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value)); 6060 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6061 } 6062 } 6063 /* free Neumann problem's matrix */ 6064 PetscCall(MatDestroy(&A_RR)); 6065 PetscFunctionReturn(PETSC_SUCCESS); 6066 } 6067 6068 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 6069 { 6070 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6071 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6072 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE; 6073 6074 PetscFunctionBegin; 6075 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.)); 6076 if (!pcbddc->switch_static) { 6077 if (applytranspose && pcbddc->local_auxmat1) { 6078 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C)); 6079 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6080 } 6081 if (!reuse_solver) { 6082 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6083 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6084 } else { 6085 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6086 6087 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6088 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD)); 6089 } 6090 } else { 6091 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6092 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6093 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6094 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6095 if (applytranspose && pcbddc->local_auxmat1) { 6096 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C)); 6097 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B)); 6098 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6099 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE)); 6100 } 6101 } 6102 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6103 if (!reuse_solver || pcbddc->switch_static) { 6104 if (applytranspose) { 6105 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6106 } else { 6107 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R)); 6108 } 6109 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R)); 6110 } else { 6111 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6112 6113 if (applytranspose) { 6114 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6115 } else { 6116 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B)); 6117 } 6118 } 6119 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0)); 6120 PetscCall(VecSet(inout_B, 0.)); 6121 if (!pcbddc->switch_static) { 6122 if (!reuse_solver) { 6123 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6124 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6125 } else { 6126 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 6127 6128 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6129 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE)); 6130 } 6131 if (!applytranspose && pcbddc->local_auxmat1) { 6132 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6133 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B)); 6134 } 6135 } else { 6136 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6137 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6138 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6139 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6140 if (!applytranspose && pcbddc->local_auxmat1) { 6141 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C)); 6142 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R)); 6143 } 6144 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6145 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD)); 6146 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6147 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD)); 6148 } 6149 PetscFunctionReturn(PETSC_SUCCESS); 6150 } 6151 6152 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 6153 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 6154 { 6155 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6156 PC_IS *pcis = (PC_IS *)pc->data; 6157 const PetscScalar zero = 0.0; 6158 6159 PetscFunctionBegin; 6160 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 6161 if (!pcbddc->benign_apply_coarse_only) { 6162 if (applytranspose) { 6163 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P)); 6164 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6165 } else { 6166 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P)); 6167 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P)); 6168 } 6169 } else { 6170 PetscCall(VecSet(pcbddc->vec1_P, zero)); 6171 } 6172 6173 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 6174 if (pcbddc->benign_n) { 6175 PetscScalar *array; 6176 PetscInt j; 6177 6178 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6179 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j]; 6180 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6181 } 6182 6183 /* start communications from local primal nodes to rhs of coarse solver */ 6184 PetscCall(VecSet(pcbddc->coarse_vec, zero)); 6185 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD)); 6186 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD)); 6187 6188 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 6189 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6190 if (pcbddc->coarse_ksp) { 6191 Mat coarse_mat; 6192 Vec rhs, sol; 6193 MatNullSpace nullsp; 6194 PetscBool isbddc = PETSC_FALSE; 6195 6196 if (pcbddc->benign_have_null) { 6197 PC coarse_pc; 6198 6199 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6200 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 6201 /* we need to propagate to coarser levels the need for a possible benign correction */ 6202 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 6203 PC_BDDC *coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6204 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 6205 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 6206 } 6207 } 6208 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs)); 6209 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol)); 6210 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 6211 if (applytranspose) { 6212 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented"); 6213 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol)); 6214 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6215 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp)); 6216 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6217 } else { 6218 PetscCall(MatGetNullSpace(coarse_mat, &nullsp)); 6219 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 6220 PC coarse_pc; 6221 6222 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs)); 6223 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6224 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp)); 6225 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol)); 6226 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp)); 6227 } else { 6228 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol)); 6229 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol)); 6230 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol)); 6231 } 6232 } 6233 /* we don't need the benign correction at coarser levels anymore */ 6234 if (pcbddc->benign_have_null && isbddc) { 6235 PC coarse_pc; 6236 PC_BDDC *coarsepcbddc; 6237 6238 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 6239 coarsepcbddc = (PC_BDDC *)coarse_pc->data; 6240 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 6241 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 6242 } 6243 } 6244 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0)); 6245 6246 /* Local solution on R nodes */ 6247 if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose)); 6248 /* communications from coarse sol to local primal nodes */ 6249 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE)); 6250 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE)); 6251 6252 /* Sum contributions from the two levels */ 6253 if (!pcbddc->benign_apply_coarse_only) { 6254 if (applytranspose) { 6255 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6256 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6257 } else { 6258 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B)); 6259 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D)); 6260 } 6261 /* store p0 */ 6262 if (pcbddc->benign_n) { 6263 PetscScalar *array; 6264 PetscInt j; 6265 6266 PetscCall(VecGetArray(pcbddc->vec1_P, &array)); 6267 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j]; 6268 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array)); 6269 } 6270 } else { /* expand the coarse solution */ 6271 if (applytranspose) { 6272 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B)); 6273 } else { 6274 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B)); 6275 } 6276 } 6277 PetscFunctionReturn(PETSC_SUCCESS); 6278 } 6279 6280 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode) 6281 { 6282 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6283 Vec from, to; 6284 const PetscScalar *array; 6285 6286 PetscFunctionBegin; 6287 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6288 from = pcbddc->coarse_vec; 6289 to = pcbddc->vec1_P; 6290 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6291 Vec tvec; 6292 6293 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6294 PetscCall(VecResetArray(tvec)); 6295 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec)); 6296 PetscCall(VecGetArrayRead(tvec, &array)); 6297 PetscCall(VecPlaceArray(from, array)); 6298 PetscCall(VecRestoreArrayRead(tvec, &array)); 6299 } 6300 } else { /* from local to global -> put data in coarse right-hand side */ 6301 from = pcbddc->vec1_P; 6302 to = pcbddc->coarse_vec; 6303 } 6304 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6305 PetscFunctionReturn(PETSC_SUCCESS); 6306 } 6307 6308 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 6309 { 6310 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6311 Vec from, to; 6312 const PetscScalar *array; 6313 6314 PetscFunctionBegin; 6315 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 6316 from = pcbddc->coarse_vec; 6317 to = pcbddc->vec1_P; 6318 } else { /* from local to global -> put data in coarse right-hand side */ 6319 from = pcbddc->vec1_P; 6320 to = pcbddc->coarse_vec; 6321 } 6322 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode)); 6323 if (smode == SCATTER_FORWARD) { 6324 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 6325 Vec tvec; 6326 6327 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec)); 6328 PetscCall(VecGetArrayRead(to, &array)); 6329 PetscCall(VecPlaceArray(tvec, array)); 6330 PetscCall(VecRestoreArrayRead(to, &array)); 6331 } 6332 } else { 6333 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 6334 PetscCall(VecResetArray(from)); 6335 } 6336 } 6337 PetscFunctionReturn(PETSC_SUCCESS); 6338 } 6339 6340 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 6341 { 6342 PC_IS *pcis = (PC_IS *)pc->data; 6343 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 6344 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 6345 /* one and zero */ 6346 PetscScalar one = 1.0, zero = 0.0; 6347 /* space to store constraints and their local indices */ 6348 PetscScalar *constraints_data; 6349 PetscInt *constraints_idxs, *constraints_idxs_B; 6350 PetscInt *constraints_idxs_ptr, *constraints_data_ptr; 6351 PetscInt *constraints_n; 6352 /* iterators */ 6353 PetscInt i, j, k, total_counts, total_counts_cc, cum; 6354 /* BLAS integers */ 6355 PetscBLASInt lwork, lierr; 6356 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1; 6357 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC; 6358 /* reuse */ 6359 PetscInt olocal_primal_size, olocal_primal_size_cc; 6360 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult; 6361 /* change of basis */ 6362 PetscBool qr_needed; 6363 PetscBT change_basis, qr_needed_idx; 6364 /* auxiliary stuff */ 6365 PetscInt *nnz, *is_indices; 6366 PetscInt ncc; 6367 /* some quantities */ 6368 PetscInt n_vertices, total_primal_vertices, valid_constraints; 6369 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints; 6370 PetscReal tol; /* tolerance for retaining eigenmodes */ 6371 6372 PetscFunctionBegin; 6373 tol = PetscSqrtReal(PETSC_SMALL); 6374 /* Destroy Mat objects computed previously */ 6375 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix)); 6376 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 6377 PetscCall(MatDestroy(&pcbddc->switch_static_change)); 6378 /* save info on constraints from previous setup (if any) */ 6379 olocal_primal_size = pcbddc->local_primal_size; 6380 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 6381 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult)); 6382 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc)); 6383 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc)); 6384 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult)); 6385 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs)); 6386 6387 if (!pcbddc->adaptive_selection) { 6388 IS ISForVertices, *ISForFaces, *ISForEdges; 6389 MatNullSpace nearnullsp; 6390 const Vec *nearnullvecs; 6391 Vec *localnearnullsp; 6392 PetscScalar *array; 6393 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne; 6394 PetscBool nnsp_has_cnst; 6395 /* LAPACK working arrays for SVD or POD */ 6396 PetscBool skip_lapack, boolforchange; 6397 PetscScalar *work; 6398 PetscReal *singular_vals; 6399 #if defined(PETSC_USE_COMPLEX) 6400 PetscReal *rwork; 6401 #endif 6402 PetscScalar *temp_basis = NULL, *correlation_mat = NULL; 6403 PetscBLASInt dummy_int = 1; 6404 PetscScalar dummy_scalar = 1.; 6405 PetscBool use_pod = PETSC_FALSE; 6406 6407 /* MKL SVD with same input gives different results on different processes! */ 6408 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS) 6409 use_pod = PETSC_TRUE; 6410 #endif 6411 /* Get index sets for faces, edges and vertices from graph */ 6412 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices)); 6413 o_nf = n_ISForFaces; 6414 o_ne = n_ISForEdges; 6415 n_vertices = 0; 6416 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices)); 6417 /* print some info */ 6418 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 6419 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc)); 6420 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 6421 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 6422 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6423 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices)); 6424 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges)); 6425 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces)); 6426 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 6427 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 6428 } 6429 6430 if (!pcbddc->use_vertices) n_vertices = 0; 6431 if (!pcbddc->use_edges) n_ISForEdges = 0; 6432 if (!pcbddc->use_faces) n_ISForFaces = 0; 6433 6434 /* check if near null space is attached to global mat */ 6435 if (pcbddc->use_nnsp) { 6436 PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp)); 6437 } else nearnullsp = NULL; 6438 6439 if (nearnullsp) { 6440 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs)); 6441 /* remove any stored info */ 6442 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace)); 6443 PetscCall(PetscFree(pcbddc->onearnullvecs_state)); 6444 /* store information for BDDC solver reuse */ 6445 PetscCall(PetscObjectReference((PetscObject)nearnullsp)); 6446 pcbddc->onearnullspace = nearnullsp; 6447 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state)); 6448 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i])); 6449 } else { /* if near null space is not provided BDDC uses constants by default */ 6450 nnsp_size = 0; 6451 nnsp_has_cnst = PETSC_TRUE; 6452 } 6453 /* get max number of constraints on a single cc */ 6454 max_constraints = nnsp_size; 6455 if (nnsp_has_cnst) max_constraints++; 6456 6457 /* 6458 Evaluate maximum storage size needed by the procedure 6459 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 6460 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 6461 There can be multiple constraints per connected component 6462 */ 6463 ncc = n_vertices + n_ISForFaces + n_ISForEdges; 6464 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n)); 6465 6466 total_counts = n_ISForFaces + n_ISForEdges; 6467 total_counts *= max_constraints; 6468 total_counts += n_vertices; 6469 PetscCall(PetscBTCreate(total_counts, &change_basis)); 6470 6471 total_counts = 0; 6472 max_size_of_constraint = 0; 6473 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) { 6474 IS used_is; 6475 if (i < n_ISForEdges) { 6476 used_is = ISForEdges[i]; 6477 } else { 6478 used_is = ISForFaces[i - n_ISForEdges]; 6479 } 6480 PetscCall(ISGetSize(used_is, &j)); 6481 total_counts += j; 6482 max_size_of_constraint = PetscMax(j, max_size_of_constraint); 6483 } 6484 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B)); 6485 6486 /* get local part of global near null space vectors */ 6487 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp)); 6488 for (k = 0; k < nnsp_size; k++) { 6489 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k])); 6490 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6491 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD)); 6492 } 6493 6494 /* whether or not to skip lapack calls */ 6495 skip_lapack = PETSC_TRUE; 6496 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 6497 6498 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 6499 if (!skip_lapack) { 6500 PetscScalar temp_work; 6501 6502 if (use_pod) { 6503 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 6504 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat)); 6505 PetscCall(PetscMalloc1(max_constraints, &singular_vals)); 6506 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis)); 6507 #if defined(PETSC_USE_COMPLEX) 6508 PetscCall(PetscMalloc1(3 * max_constraints, &rwork)); 6509 #endif 6510 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6511 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6512 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA)); 6513 lwork = -1; 6514 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6515 #if !defined(PETSC_USE_COMPLEX) 6516 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr)); 6517 #else 6518 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr)); 6519 #endif 6520 PetscCall(PetscFPTrapPop()); 6521 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr); 6522 } else { 6523 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6524 /* SVD */ 6525 PetscInt max_n, min_n; 6526 max_n = max_size_of_constraint; 6527 min_n = max_constraints; 6528 if (max_size_of_constraint < max_constraints) { 6529 min_n = max_size_of_constraint; 6530 max_n = max_constraints; 6531 } 6532 PetscCall(PetscMalloc1(min_n, &singular_vals)); 6533 #if defined(PETSC_USE_COMPLEX) 6534 PetscCall(PetscMalloc1(5 * min_n, &rwork)); 6535 #endif 6536 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6537 lwork = -1; 6538 PetscCall(PetscBLASIntCast(max_n, &Blas_M)); 6539 PetscCall(PetscBLASIntCast(min_n, &Blas_N)); 6540 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA)); 6541 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6542 #if !defined(PETSC_USE_COMPLEX) 6543 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr)); 6544 #else 6545 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr)); 6546 #endif 6547 PetscCall(PetscFPTrapPop()); 6548 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr); 6549 #else 6550 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6551 #endif /* on missing GESVD */ 6552 } 6553 /* Allocate optimal workspace */ 6554 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork)); 6555 PetscCall(PetscMalloc1(lwork, &work)); 6556 } 6557 /* Now we can loop on constraining sets */ 6558 total_counts = 0; 6559 constraints_idxs_ptr[0] = 0; 6560 constraints_data_ptr[0] = 0; 6561 /* vertices */ 6562 if (n_vertices) { 6563 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices)); 6564 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices)); 6565 for (i = 0; i < n_vertices; i++) { 6566 constraints_n[total_counts] = 1; 6567 constraints_data[total_counts] = 1.0; 6568 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1; 6569 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1; 6570 total_counts++; 6571 } 6572 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices)); 6573 } 6574 6575 /* edges and faces */ 6576 total_counts_cc = total_counts; 6577 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) { 6578 IS used_is; 6579 PetscBool idxs_copied = PETSC_FALSE; 6580 6581 if (ncc < n_ISForEdges) { 6582 used_is = ISForEdges[ncc]; 6583 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6584 } else { 6585 used_is = ISForFaces[ncc - n_ISForEdges]; 6586 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6587 } 6588 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6589 6590 PetscCall(ISGetSize(used_is, &size_of_constraint)); 6591 if (!size_of_constraint) continue; 6592 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices)); 6593 if (nnsp_has_cnst) { 6594 PetscScalar quad_value; 6595 6596 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6597 idxs_copied = PETSC_TRUE; 6598 6599 if (!pcbddc->use_nnsp_true) { 6600 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint)); 6601 } else { 6602 quad_value = 1.0; 6603 } 6604 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value; 6605 temp_constraints++; 6606 total_counts++; 6607 } 6608 for (k = 0; k < nnsp_size; k++) { 6609 PetscReal real_value; 6610 PetscScalar *ptr_to_data; 6611 6612 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6613 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint]; 6614 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]]; 6615 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array)); 6616 /* check if array is null on the connected component */ 6617 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6618 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one)); 6619 if (real_value > tol * size_of_constraint) { /* keep indices and values */ 6620 temp_constraints++; 6621 total_counts++; 6622 if (!idxs_copied) { 6623 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint)); 6624 idxs_copied = PETSC_TRUE; 6625 } 6626 } 6627 } 6628 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices)); 6629 valid_constraints = temp_constraints; 6630 if (!pcbddc->use_nnsp_true && temp_constraints) { 6631 if (temp_constraints == 1) { /* just normalize the constraint */ 6632 PetscScalar norm, *ptr_to_data; 6633 6634 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6635 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6636 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one)); 6637 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm)); 6638 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one)); 6639 } else { /* perform SVD */ 6640 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6641 6642 if (use_pod) { 6643 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6644 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6645 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6646 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6647 from that computed using LAPACKgesvd 6648 -> This is due to a different computation of eigenvectors in LAPACKheev 6649 -> The quality of the POD-computed basis will be the same */ 6650 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints)); 6651 /* Store upper triangular part of correlation matrix */ 6652 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 6653 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6654 for (j = 0; j < temp_constraints; j++) { 6655 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one)); 6656 } 6657 /* compute eigenvalues and eigenvectors of correlation matrix */ 6658 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6659 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA)); 6660 #if !defined(PETSC_USE_COMPLEX) 6661 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr)); 6662 #else 6663 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr)); 6664 #endif 6665 PetscCall(PetscFPTrapPop()); 6666 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr); 6667 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6668 j = 0; 6669 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++; 6670 total_counts = total_counts - j; 6671 valid_constraints = temp_constraints - j; 6672 /* scale and copy POD basis into used quadrature memory */ 6673 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6674 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6675 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K)); 6676 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6677 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB)); 6678 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 6679 if (j < temp_constraints) { 6680 PetscInt ii; 6681 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]); 6682 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6683 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC)); 6684 PetscCall(PetscFPTrapPop()); 6685 for (k = 0; k < temp_constraints - j; k++) { 6686 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii]; 6687 } 6688 } 6689 } else { 6690 #if !defined(PETSC_MISSING_LAPACK_GESVD) 6691 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 6692 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N)); 6693 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 6694 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 6695 #if !defined(PETSC_USE_COMPLEX) 6696 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr)); 6697 #else 6698 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr)); 6699 #endif 6700 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr); 6701 PetscCall(PetscFPTrapPop()); 6702 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6703 k = temp_constraints; 6704 if (k > size_of_constraint) k = size_of_constraint; 6705 j = 0; 6706 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++; 6707 valid_constraints = k - j; 6708 total_counts = total_counts - temp_constraints + valid_constraints; 6709 #else 6710 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen"); 6711 #endif /* on missing GESVD */ 6712 } 6713 } 6714 } 6715 /* update pointers information */ 6716 if (valid_constraints) { 6717 constraints_n[total_counts_cc] = valid_constraints; 6718 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint; 6719 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints; 6720 /* set change_of_basis flag */ 6721 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc)); 6722 total_counts_cc++; 6723 } 6724 } 6725 /* free workspace */ 6726 if (!skip_lapack) { 6727 PetscCall(PetscFree(work)); 6728 #if defined(PETSC_USE_COMPLEX) 6729 PetscCall(PetscFree(rwork)); 6730 #endif 6731 PetscCall(PetscFree(singular_vals)); 6732 PetscCall(PetscFree(correlation_mat)); 6733 PetscCall(PetscFree(temp_basis)); 6734 } 6735 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k])); 6736 PetscCall(PetscFree(localnearnullsp)); 6737 /* free index sets of faces, edges and vertices */ 6738 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices)); 6739 } else { 6740 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6741 6742 total_counts = 0; 6743 n_vertices = 0; 6744 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 6745 max_constraints = 0; 6746 total_counts_cc = 0; 6747 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6748 total_counts += pcbddc->adaptive_constraints_n[i]; 6749 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6750 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]); 6751 } 6752 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6753 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6754 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6755 constraints_data = pcbddc->adaptive_constraints_data; 6756 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6757 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n)); 6758 total_counts_cc = 0; 6759 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) { 6760 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6761 } 6762 6763 max_size_of_constraint = 0; 6764 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]); 6765 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B)); 6766 /* Change of basis */ 6767 PetscCall(PetscBTCreate(total_counts_cc, &change_basis)); 6768 if (pcbddc->use_change_of_basis) { 6769 for (i = 0; i < sub_schurs->n_subs; i++) { 6770 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices)); 6771 } 6772 } 6773 } 6774 pcbddc->local_primal_size = total_counts; 6775 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs)); 6776 6777 /* map constraints_idxs in boundary numbering */ 6778 if (pcbddc->use_change_of_basis) { 6779 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B)); 6780 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i); 6781 } 6782 6783 /* Create constraint matrix */ 6784 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix)); 6785 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ)); 6786 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n)); 6787 6788 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6789 /* determine if a QR strategy is needed for change of basis */ 6790 qr_needed = pcbddc->use_qr_single; 6791 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx)); 6792 total_primal_vertices = 0; 6793 pcbddc->local_primal_size_cc = 0; 6794 for (i = 0; i < total_counts_cc; i++) { 6795 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6796 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6797 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6798 pcbddc->local_primal_size_cc += 1; 6799 } else if (PetscBTLookup(change_basis, i)) { 6800 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6801 pcbddc->local_primal_size_cc += constraints_n[i]; 6802 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6803 PetscCall(PetscBTSet(qr_needed_idx, i)); 6804 qr_needed = PETSC_TRUE; 6805 } 6806 } else { 6807 pcbddc->local_primal_size_cc += 1; 6808 } 6809 } 6810 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6811 pcbddc->n_vertices = total_primal_vertices; 6812 /* permute indices in order to have a sorted set of vertices */ 6813 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs)); 6814 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult)); 6815 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices)); 6816 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1; 6817 6818 /* nonzero structure of constraint matrix */ 6819 /* and get reference dof for local constraints */ 6820 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz)); 6821 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1; 6822 6823 j = total_primal_vertices; 6824 total_counts = total_primal_vertices; 6825 cum = total_primal_vertices; 6826 for (i = n_vertices; i < total_counts_cc; i++) { 6827 if (!PetscBTLookup(change_basis, i)) { 6828 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6829 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6830 cum++; 6831 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6832 for (k = 0; k < constraints_n[i]; k++) { 6833 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k]; 6834 nnz[j + k] = size_of_constraint; 6835 } 6836 j += constraints_n[i]; 6837 } 6838 } 6839 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz)); 6840 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6841 PetscCall(PetscFree(nnz)); 6842 6843 /* set values in constraint matrix */ 6844 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES)); 6845 total_counts = total_primal_vertices; 6846 for (i = n_vertices; i < total_counts_cc; i++) { 6847 if (!PetscBTLookup(change_basis, i)) { 6848 PetscInt *cols; 6849 6850 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6851 cols = constraints_idxs + constraints_idxs_ptr[i]; 6852 for (k = 0; k < constraints_n[i]; k++) { 6853 PetscInt row = total_counts + k; 6854 PetscScalar *vals; 6855 6856 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint; 6857 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES)); 6858 } 6859 total_counts += constraints_n[i]; 6860 } 6861 } 6862 /* assembling */ 6863 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6864 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY)); 6865 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view")); 6866 6867 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6868 if (pcbddc->use_change_of_basis) { 6869 /* dual and primal dofs on a single cc */ 6870 PetscInt dual_dofs, primal_dofs; 6871 /* working stuff for GEQRF */ 6872 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t; 6873 PetscBLASInt lqr_work; 6874 /* working stuff for UNGQR */ 6875 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0; 6876 PetscBLASInt lgqr_work; 6877 /* working stuff for TRTRS */ 6878 PetscScalar *trs_rhs = NULL; 6879 PetscBLASInt Blas_NRHS; 6880 /* pointers for values insertion into change of basis matrix */ 6881 PetscInt *start_rows, *start_cols; 6882 PetscScalar *start_vals; 6883 /* working stuff for values insertion */ 6884 PetscBT is_primal; 6885 PetscInt *aux_primal_numbering_B; 6886 /* matrix sizes */ 6887 PetscInt global_size, local_size; 6888 /* temporary change of basis */ 6889 Mat localChangeOfBasisMatrix; 6890 /* extra space for debugging */ 6891 PetscScalar *dbg_work = NULL; 6892 6893 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix)); 6894 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ)); 6895 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n)); 6896 /* nonzeros for local mat */ 6897 PetscCall(PetscMalloc1(pcis->n, &nnz)); 6898 if (!pcbddc->benign_change || pcbddc->fake_change) { 6899 for (i = 0; i < pcis->n; i++) nnz[i] = 1; 6900 } else { 6901 const PetscInt *ii; 6902 PetscInt n; 6903 PetscBool flg_row; 6904 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6905 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i]; 6906 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row)); 6907 } 6908 for (i = n_vertices; i < total_counts_cc; i++) { 6909 if (PetscBTLookup(change_basis, i)) { 6910 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]; 6911 if (PetscBTLookup(qr_needed_idx, i)) { 6912 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint; 6913 } else { 6914 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6915 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2; 6916 } 6917 } 6918 } 6919 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz)); 6920 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 6921 PetscCall(PetscFree(nnz)); 6922 /* Set interior change in the matrix */ 6923 if (!pcbddc->benign_change || pcbddc->fake_change) { 6924 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES)); 6925 } else { 6926 const PetscInt *ii, *jj; 6927 PetscScalar *aa; 6928 PetscInt n; 6929 PetscBool flg_row; 6930 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6931 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa)); 6932 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES)); 6933 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa)); 6934 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row)); 6935 } 6936 6937 if (pcbddc->dbg_flag) { 6938 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 6939 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank)); 6940 } 6941 6942 /* Now we loop on the constraints which need a change of basis */ 6943 /* 6944 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6945 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6946 6947 Basic blocks of change of basis matrix T computed: 6948 6949 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6950 6951 | 1 0 ... 0 s_1/S | 6952 | 0 1 ... 0 s_2/S | 6953 | ... | 6954 | 0 ... 1 s_{n-1}/S | 6955 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6956 6957 with S = \sum_{i=1}^n s_i^2 6958 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6959 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6960 6961 - QR decomposition of constraints otherwise 6962 */ 6963 if (qr_needed && max_size_of_constraint) { 6964 /* space to store Q */ 6965 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis)); 6966 /* array to store scaling factors for reflectors */ 6967 PetscCall(PetscMalloc1(max_constraints, &qr_tau)); 6968 /* first we issue queries for optimal work */ 6969 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6970 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N)); 6971 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6972 lqr_work = -1; 6973 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr)); 6974 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr); 6975 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work)); 6976 PetscCall(PetscMalloc1(lqr_work, &qr_work)); 6977 lgqr_work = -1; 6978 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M)); 6979 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N)); 6980 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K)); 6981 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA)); 6982 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */ 6983 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr)); 6984 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr); 6985 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work)); 6986 PetscCall(PetscMalloc1(lgqr_work, &gqr_work)); 6987 /* array to store rhs and solution of triangular solver */ 6988 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs)); 6989 /* allocating workspace for check */ 6990 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work)); 6991 } 6992 /* array to store whether a node is primal or not */ 6993 PetscCall(PetscBTCreate(pcis->n_B, &is_primal)); 6994 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B)); 6995 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B)); 6996 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i); 6997 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i])); 6998 PetscCall(PetscFree(aux_primal_numbering_B)); 6999 7000 /* loop on constraints and see whether or not they need a change of basis and compute it */ 7001 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) { 7002 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts]; 7003 if (PetscBTLookup(change_basis, total_counts)) { 7004 /* get constraint info */ 7005 primal_dofs = constraints_n[total_counts]; 7006 dual_dofs = size_of_constraint - primal_dofs; 7007 7008 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint)); 7009 7010 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */ 7011 7012 /* copy quadrature constraints for change of basis check */ 7013 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7014 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 7015 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7016 7017 /* compute QR decomposition of constraints */ 7018 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7019 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7020 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7021 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7022 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr)); 7023 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr); 7024 PetscCall(PetscFPTrapPop()); 7025 7026 /* explicitly compute R^-T */ 7027 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs)); 7028 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0; 7029 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7030 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS)); 7031 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7032 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7033 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7034 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr)); 7035 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr); 7036 PetscCall(PetscFPTrapPop()); 7037 7038 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */ 7039 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7040 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7041 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7042 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7043 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7044 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr)); 7045 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr); 7046 PetscCall(PetscFPTrapPop()); 7047 7048 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 7049 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 7050 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 7051 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M)); 7052 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N)); 7053 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K)); 7054 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7055 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB)); 7056 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC)); 7057 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7058 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC)); 7059 PetscCall(PetscFPTrapPop()); 7060 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs)); 7061 7062 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 7063 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 7064 /* insert cols for primal dofs */ 7065 for (j = 0; j < primal_dofs; j++) { 7066 start_vals = &qr_basis[j * size_of_constraint]; 7067 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7068 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7069 } 7070 /* insert cols for dual dofs */ 7071 for (j = 0, k = 0; j < dual_dofs; k++) { 7072 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) { 7073 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint]; 7074 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7075 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES)); 7076 j++; 7077 } 7078 } 7079 7080 /* check change of basis */ 7081 if (pcbddc->dbg_flag) { 7082 PetscInt ii, jj; 7083 PetscBool valid_qr = PETSC_TRUE; 7084 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M)); 7085 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7086 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K)); 7087 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA)); 7088 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB)); 7089 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC)); 7090 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 7091 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC)); 7092 PetscCall(PetscFPTrapPop()); 7093 for (jj = 0; jj < size_of_constraint; jj++) { 7094 for (ii = 0; ii < primal_dofs; ii++) { 7095 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE; 7096 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 7097 } 7098 } 7099 if (!valid_qr) { 7100 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n")); 7101 for (jj = 0; jj < size_of_constraint; jj++) { 7102 for (ii = 0; ii < primal_dofs; ii++) { 7103 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) { 7104 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7105 } 7106 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) { 7107 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]))); 7108 } 7109 } 7110 } 7111 } else { 7112 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n")); 7113 } 7114 } 7115 } else { /* simple transformation block */ 7116 PetscInt row, col; 7117 PetscScalar val, norm; 7118 7119 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N)); 7120 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one)); 7121 for (j = 0; j < size_of_constraint; j++) { 7122 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j]; 7123 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j]; 7124 if (!PetscBTLookup(is_primal, row_B)) { 7125 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 7126 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES)); 7127 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES)); 7128 } else { 7129 for (k = 0; k < size_of_constraint; k++) { 7130 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k]; 7131 if (row != col) { 7132 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]]; 7133 } else { 7134 val = constraints_data[constraints_data_ptr[total_counts]] / norm; 7135 } 7136 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES)); 7137 } 7138 } 7139 } 7140 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n")); 7141 } 7142 } else { 7143 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint)); 7144 } 7145 } 7146 7147 /* free workspace */ 7148 if (qr_needed) { 7149 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work)); 7150 PetscCall(PetscFree(trs_rhs)); 7151 PetscCall(PetscFree(qr_tau)); 7152 PetscCall(PetscFree(qr_work)); 7153 PetscCall(PetscFree(gqr_work)); 7154 PetscCall(PetscFree(qr_basis)); 7155 } 7156 PetscCall(PetscBTDestroy(&is_primal)); 7157 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7158 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY)); 7159 7160 /* assembling of global change of variable */ 7161 if (!pcbddc->fake_change) { 7162 Mat tmat; 7163 PetscInt bs; 7164 7165 PetscCall(VecGetSize(pcis->vec1_global, &global_size)); 7166 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size)); 7167 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat)); 7168 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix)); 7169 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY)); 7170 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY)); 7171 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix)); 7172 PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ)); 7173 PetscCall(MatGetBlockSize(pc->pmat, &bs)); 7174 PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs)); 7175 PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size)); 7176 PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE)); 7177 PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix)); 7178 PetscCall(MatDestroy(&tmat)); 7179 PetscCall(VecSet(pcis->vec1_global, 0.0)); 7180 PetscCall(VecSet(pcis->vec1_N, 1.0)); 7181 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7182 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 7183 PetscCall(VecReciprocal(pcis->vec1_global)); 7184 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL)); 7185 7186 /* check */ 7187 if (pcbddc->dbg_flag) { 7188 PetscReal error; 7189 Vec x, x_change; 7190 7191 PetscCall(VecDuplicate(pcis->vec1_global, &x)); 7192 PetscCall(VecDuplicate(pcis->vec1_global, &x_change)); 7193 PetscCall(VecSetRandom(x, NULL)); 7194 PetscCall(VecCopy(x, pcis->vec1_global)); 7195 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7196 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD)); 7197 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N)); 7198 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7199 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE)); 7200 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change)); 7201 PetscCall(VecAXPY(x, -1.0, x_change)); 7202 PetscCall(VecNorm(x, NORM_INFINITY, &error)); 7203 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error); 7204 PetscCall(VecDestroy(&x)); 7205 PetscCall(VecDestroy(&x_change)); 7206 } 7207 /* adapt sub_schurs computed (if any) */ 7208 if (pcbddc->use_deluxe_scaling) { 7209 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 7210 7211 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 7212 if (sub_schurs && sub_schurs->S_Ej_all) { 7213 Mat S_new, tmat; 7214 IS is_all_N, is_V_Sall = NULL; 7215 7216 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N)); 7217 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat)); 7218 if (pcbddc->deluxe_zerorows) { 7219 ISLocalToGlobalMapping NtoSall; 7220 IS is_V; 7221 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V)); 7222 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall)); 7223 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall)); 7224 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall)); 7225 PetscCall(ISDestroy(&is_V)); 7226 } 7227 PetscCall(ISDestroy(&is_all_N)); 7228 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7229 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 7230 PetscCall(PetscObjectReference((PetscObject)S_new)); 7231 if (pcbddc->deluxe_zerorows) { 7232 const PetscScalar *array; 7233 const PetscInt *idxs_V, *idxs_all; 7234 PetscInt i, n_V; 7235 7236 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7237 PetscCall(ISGetLocalSize(is_V_Sall, &n_V)); 7238 PetscCall(ISGetIndices(is_V_Sall, &idxs_V)); 7239 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all)); 7240 PetscCall(VecGetArrayRead(pcis->D, &array)); 7241 for (i = 0; i < n_V; i++) { 7242 PetscScalar val; 7243 PetscInt idx; 7244 7245 idx = idxs_V[i]; 7246 val = array[idxs_all[idxs_V[i]]]; 7247 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES)); 7248 } 7249 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY)); 7250 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY)); 7251 PetscCall(VecRestoreArrayRead(pcis->D, &array)); 7252 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all)); 7253 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V)); 7254 } 7255 sub_schurs->S_Ej_all = S_new; 7256 PetscCall(MatDestroy(&S_new)); 7257 if (sub_schurs->sum_S_Ej_all) { 7258 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new)); 7259 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 7260 PetscCall(PetscObjectReference((PetscObject)S_new)); 7261 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL)); 7262 sub_schurs->sum_S_Ej_all = S_new; 7263 PetscCall(MatDestroy(&S_new)); 7264 } 7265 PetscCall(ISDestroy(&is_V_Sall)); 7266 PetscCall(MatDestroy(&tmat)); 7267 } 7268 /* destroy any change of basis context in sub_schurs */ 7269 if (sub_schurs && sub_schurs->change) { 7270 PetscInt i; 7271 7272 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i])); 7273 PetscCall(PetscFree(sub_schurs->change)); 7274 } 7275 } 7276 if (pcbddc->switch_static) { /* need to save the local change */ 7277 pcbddc->switch_static_change = localChangeOfBasisMatrix; 7278 } else { 7279 PetscCall(MatDestroy(&localChangeOfBasisMatrix)); 7280 } 7281 /* determine if any process has changed the pressures locally */ 7282 pcbddc->change_interior = pcbddc->benign_have_null; 7283 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 7284 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix)); 7285 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 7286 pcbddc->use_qr_single = qr_needed; 7287 } 7288 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 7289 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 7290 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix)); 7291 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 7292 } else { 7293 Mat benign_global = NULL; 7294 if (pcbddc->benign_have_null) { 7295 Mat M; 7296 7297 pcbddc->change_interior = PETSC_TRUE; 7298 PetscCall(VecCopy(matis->counter, pcis->vec1_N)); 7299 PetscCall(VecReciprocal(pcis->vec1_N)); 7300 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global)); 7301 if (pcbddc->benign_change) { 7302 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M)); 7303 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL)); 7304 } else { 7305 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M)); 7306 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES)); 7307 } 7308 PetscCall(MatISSetLocalMat(benign_global, M)); 7309 PetscCall(MatDestroy(&M)); 7310 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY)); 7311 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY)); 7312 } 7313 if (pcbddc->user_ChangeOfBasisMatrix) { 7314 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix)); 7315 PetscCall(MatDestroy(&benign_global)); 7316 } else if (pcbddc->benign_have_null) { 7317 pcbddc->ChangeOfBasisMatrix = benign_global; 7318 } 7319 } 7320 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 7321 IS is_global; 7322 const PetscInt *gidxs; 7323 7324 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs)); 7325 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global)); 7326 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs)); 7327 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change)); 7328 PetscCall(ISDestroy(&is_global)); 7329 } 7330 } 7331 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change)); 7332 7333 if (!pcbddc->fake_change) { 7334 /* add pressure dofs to set of primal nodes for numbering purposes */ 7335 for (i = 0; i < pcbddc->benign_n; i++) { 7336 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 7337 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 7338 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 7339 pcbddc->local_primal_size_cc++; 7340 pcbddc->local_primal_size++; 7341 } 7342 7343 /* check if a new primal space has been introduced (also take into account benign trick) */ 7344 pcbddc->new_primal_space_local = PETSC_TRUE; 7345 if (olocal_primal_size == pcbddc->local_primal_size) { 7346 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7347 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7348 if (!pcbddc->new_primal_space_local) { 7349 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local)); 7350 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 7351 } 7352 } 7353 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 7354 PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 7355 } 7356 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult)); 7357 7358 /* flush dbg viewer */ 7359 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7360 7361 /* free workspace */ 7362 PetscCall(PetscBTDestroy(&qr_needed_idx)); 7363 PetscCall(PetscBTDestroy(&change_basis)); 7364 if (!pcbddc->adaptive_selection) { 7365 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n)); 7366 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B)); 7367 } else { 7368 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data)); 7369 PetscCall(PetscFree(constraints_n)); 7370 PetscCall(PetscFree(constraints_idxs_B)); 7371 } 7372 PetscFunctionReturn(PETSC_SUCCESS); 7373 } 7374 7375 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 7376 { 7377 ISLocalToGlobalMapping map; 7378 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 7379 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 7380 PetscInt i, N; 7381 PetscBool rcsr = PETSC_FALSE; 7382 7383 PetscFunctionBegin; 7384 if (pcbddc->recompute_topography) { 7385 pcbddc->graphanalyzed = PETSC_FALSE; 7386 /* Reset previously computed graph */ 7387 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph)); 7388 /* Init local Graph struct */ 7389 PetscCall(MatGetSize(pc->pmat, &N, NULL)); 7390 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL)); 7391 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount)); 7392 7393 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local)); 7394 /* Check validity of the csr graph passed in by the user */ 7395 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, 7396 pcbddc->mat_graph->nvtxs); 7397 7398 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 7399 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 7400 PetscInt *xadj, *adjncy; 7401 PetscInt nvtxs; 7402 PetscBool flg_row; 7403 Mat A; 7404 7405 PetscCall(PetscObjectReference((PetscObject)matis->A)); 7406 A = matis->A; 7407 for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) { 7408 Mat AtA; 7409 7410 PetscCall(MatProductCreate(A, A, NULL, &AtA)); 7411 PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_")); 7412 PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB)); 7413 PetscCall(MatProductSetFromOptions(AtA)); 7414 PetscCall(MatProductSymbolic(AtA)); 7415 PetscCall(MatProductClear(AtA)); 7416 /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */ 7417 AtA->assembled = PETSC_TRUE; 7418 PetscCall(MatDestroy(&A)); 7419 A = AtA; 7420 } 7421 PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7422 if (flg_row) { 7423 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES)); 7424 pcbddc->computed_rowadj = PETSC_TRUE; 7425 PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row)); 7426 rcsr = PETSC_TRUE; 7427 } 7428 PetscCall(MatDestroy(&A)); 7429 } 7430 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 7431 7432 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 7433 PetscReal *lcoords; 7434 PetscInt n; 7435 MPI_Datatype dimrealtype; 7436 7437 /* TODO: support for blocked */ 7438 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n); 7439 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7440 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords)); 7441 PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype)); 7442 PetscCallMPI(MPI_Type_commit(&dimrealtype)); 7443 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7444 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE)); 7445 PetscCallMPI(MPI_Type_free(&dimrealtype)); 7446 PetscCall(PetscFree(pcbddc->mat_graph->coords)); 7447 7448 pcbddc->mat_graph->coords = lcoords; 7449 pcbddc->mat_graph->cloc = PETSC_TRUE; 7450 pcbddc->mat_graph->cnloc = n; 7451 } 7452 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, 7453 pcbddc->mat_graph->nvtxs); 7454 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected); 7455 7456 /* attach info on disconnected subdomains if present */ 7457 if (pcbddc->n_local_subs) { 7458 PetscInt *local_subs, n, totn; 7459 7460 PetscCall(MatGetLocalSize(matis->A, &n, NULL)); 7461 PetscCall(PetscMalloc1(n, &local_subs)); 7462 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs; 7463 for (i = 0; i < pcbddc->n_local_subs; i++) { 7464 const PetscInt *idxs; 7465 PetscInt nl, j; 7466 7467 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl)); 7468 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs)); 7469 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i; 7470 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs)); 7471 } 7472 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]); 7473 pcbddc->mat_graph->n_local_subs = totn + 1; 7474 pcbddc->mat_graph->local_subs = local_subs; 7475 } 7476 7477 /* Setup of Graph */ 7478 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local)); 7479 } 7480 7481 if (!pcbddc->graphanalyzed) { 7482 /* Graph's connected components analysis */ 7483 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph)); 7484 pcbddc->graphanalyzed = PETSC_TRUE; 7485 pcbddc->corner_selected = pcbddc->corner_selection; 7486 } 7487 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7488 PetscFunctionReturn(PETSC_SUCCESS); 7489 } 7490 7491 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[]) 7492 { 7493 PetscInt i, j, n; 7494 PetscScalar *alphas; 7495 PetscReal norm, *onorms; 7496 7497 PetscFunctionBegin; 7498 n = *nio; 7499 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 7500 PetscCall(PetscMalloc2(n, &alphas, n, &onorms)); 7501 PetscCall(VecNormalize(vecs[0], &norm)); 7502 if (norm < PETSC_SMALL) { 7503 onorms[0] = 0.0; 7504 PetscCall(VecSet(vecs[0], 0.0)); 7505 } else { 7506 onorms[0] = norm; 7507 } 7508 7509 for (i = 1; i < n; i++) { 7510 PetscCall(VecMDot(vecs[i], i, vecs, alphas)); 7511 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]); 7512 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs)); 7513 PetscCall(VecNormalize(vecs[i], &norm)); 7514 if (norm < PETSC_SMALL) { 7515 onorms[i] = 0.0; 7516 PetscCall(VecSet(vecs[i], 0.0)); 7517 } else { 7518 onorms[i] = norm; 7519 } 7520 } 7521 /* push nonzero vectors at the beginning */ 7522 for (i = 0; i < n; i++) { 7523 if (onorms[i] == 0.0) { 7524 for (j = i + 1; j < n; j++) { 7525 if (onorms[j] != 0.0) { 7526 PetscCall(VecCopy(vecs[j], vecs[i])); 7527 onorms[j] = 0.0; 7528 } 7529 } 7530 } 7531 } 7532 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0; 7533 PetscCall(PetscFree2(alphas, onorms)); 7534 PetscFunctionReturn(PETSC_SUCCESS); 7535 } 7536 7537 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void) 7538 { 7539 ISLocalToGlobalMapping mapping; 7540 Mat A; 7541 PetscInt n_neighs, *neighs, *n_shared, **shared; 7542 PetscMPIInt size, rank, color; 7543 PetscInt *xadj, *adjncy; 7544 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx; 7545 PetscInt im_active, active_procs, N, n, i, j, threshold = 2; 7546 PetscInt void_procs, *procs_candidates = NULL; 7547 PetscInt xadj_count, *count; 7548 PetscBool ismatis, use_vwgt = PETSC_FALSE; 7549 PetscSubcomm psubcomm; 7550 MPI_Comm subcomm; 7551 7552 PetscFunctionBegin; 7553 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7554 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7555 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7556 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2); 7557 PetscValidLogicalCollectiveInt(mat, redprocs, 3); 7558 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains); 7559 7560 if (have_void) *have_void = PETSC_FALSE; 7561 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 7562 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 7563 PetscCall(MatISGetLocalMat(mat, &A)); 7564 PetscCall(MatGetLocalSize(A, &n, NULL)); 7565 im_active = !!n; 7566 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat))); 7567 void_procs = size - active_procs; 7568 /* get ranks of non-active processes in mat communicator */ 7569 if (void_procs) { 7570 PetscInt ncand; 7571 7572 if (have_void) *have_void = PETSC_TRUE; 7573 PetscCall(PetscMalloc1(size, &procs_candidates)); 7574 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 7575 for (i = 0, ncand = 0; i < size; i++) { 7576 if (!procs_candidates[i]) procs_candidates[ncand++] = i; 7577 } 7578 /* force n_subdomains to be not greater that the number of non-active processes */ 7579 *n_subdomains = PetscMin(void_procs, *n_subdomains); 7580 } 7581 7582 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7583 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */ 7584 PetscCall(MatGetSize(mat, &N, NULL)); 7585 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7586 PetscInt issize, isidx, dest; 7587 if (*n_subdomains == 1) dest = 0; 7588 else dest = rank; 7589 if (im_active) { 7590 issize = 1; 7591 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7592 isidx = procs_candidates[dest]; 7593 } else { 7594 isidx = dest; 7595 } 7596 } else { 7597 issize = 0; 7598 isidx = -1; 7599 } 7600 if (*n_subdomains != 1) *n_subdomains = active_procs; 7601 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends)); 7602 PetscCall(PetscFree(procs_candidates)); 7603 PetscFunctionReturn(PETSC_SUCCESS); 7604 } 7605 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL)); 7606 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL)); 7607 threshold = PetscMax(threshold, 2); 7608 7609 /* Get info on mapping */ 7610 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7611 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7612 7613 /* build local CSR graph of subdomains' connectivity */ 7614 PetscCall(PetscMalloc1(2, &xadj)); 7615 xadj[0] = 0; 7616 xadj[1] = PetscMax(n_neighs - 1, 0); 7617 PetscCall(PetscMalloc1(xadj[1], &adjncy)); 7618 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt)); 7619 PetscCall(PetscCalloc1(n, &count)); 7620 for (i = 1; i < n_neighs; i++) 7621 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1; 7622 7623 xadj_count = 0; 7624 for (i = 1; i < n_neighs; i++) { 7625 for (j = 0; j < n_shared[i]; j++) { 7626 if (count[shared[i][j]] < threshold) { 7627 adjncy[xadj_count] = neighs[i]; 7628 adjncy_wgt[xadj_count] = n_shared[i]; 7629 xadj_count++; 7630 break; 7631 } 7632 } 7633 } 7634 xadj[1] = xadj_count; 7635 PetscCall(PetscFree(count)); 7636 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared)); 7637 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7638 7639 PetscCall(PetscMalloc1(1, &ranks_send_to_idx)); 7640 7641 /* Restrict work on active processes only */ 7642 PetscCall(PetscMPIIntCast(im_active, &color)); 7643 if (void_procs) { 7644 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm)); 7645 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */ 7646 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank)); 7647 subcomm = PetscSubcommChild(psubcomm); 7648 } else { 7649 psubcomm = NULL; 7650 subcomm = PetscObjectComm((PetscObject)mat); 7651 } 7652 7653 v_wgt = NULL; 7654 if (!color) { 7655 PetscCall(PetscFree(xadj)); 7656 PetscCall(PetscFree(adjncy)); 7657 PetscCall(PetscFree(adjncy_wgt)); 7658 } else { 7659 Mat subdomain_adj; 7660 IS new_ranks, new_ranks_contig; 7661 MatPartitioning partitioner; 7662 PetscInt rstart = 0, rend = 0; 7663 PetscInt *is_indices, *oldranks; 7664 PetscMPIInt size; 7665 PetscBool aggregate; 7666 7667 PetscCallMPI(MPI_Comm_size(subcomm, &size)); 7668 if (void_procs) { 7669 PetscInt prank = rank; 7670 PetscCall(PetscMalloc1(size, &oldranks)); 7671 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm)); 7672 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i])); 7673 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt)); 7674 } else { 7675 oldranks = NULL; 7676 } 7677 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7678 if (aggregate) { /* TODO: all this part could be made more efficient */ 7679 PetscInt lrows, row, ncols, *cols; 7680 PetscMPIInt nrank; 7681 PetscScalar *vals; 7682 7683 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank)); 7684 lrows = 0; 7685 if (nrank < redprocs) { 7686 lrows = size / redprocs; 7687 if (nrank < size % redprocs) lrows++; 7688 } 7689 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj)); 7690 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend)); 7691 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 7692 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 7693 row = nrank; 7694 ncols = xadj[1] - xadj[0]; 7695 cols = adjncy; 7696 PetscCall(PetscMalloc1(ncols, &vals)); 7697 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i]; 7698 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES)); 7699 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7700 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY)); 7701 PetscCall(PetscFree(xadj)); 7702 PetscCall(PetscFree(adjncy)); 7703 PetscCall(PetscFree(adjncy_wgt)); 7704 PetscCall(PetscFree(vals)); 7705 if (use_vwgt) { 7706 Vec v; 7707 const PetscScalar *array; 7708 PetscInt nl; 7709 7710 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL)); 7711 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES)); 7712 PetscCall(VecAssemblyBegin(v)); 7713 PetscCall(VecAssemblyEnd(v)); 7714 PetscCall(VecGetLocalSize(v, &nl)); 7715 PetscCall(VecGetArrayRead(v, &array)); 7716 PetscCall(PetscMalloc1(nl, &v_wgt)); 7717 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7718 PetscCall(VecRestoreArrayRead(v, &array)); 7719 PetscCall(VecDestroy(&v)); 7720 } 7721 } else { 7722 PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj)); 7723 if (use_vwgt) { 7724 PetscCall(PetscMalloc1(1, &v_wgt)); 7725 v_wgt[0] = n; 7726 } 7727 } 7728 /* PetscCall(MatView(subdomain_adj,0)); */ 7729 7730 /* Partition */ 7731 PetscCall(MatPartitioningCreate(subcomm, &partitioner)); 7732 #if defined(PETSC_HAVE_PTSCOTCH) 7733 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH)); 7734 #elif defined(PETSC_HAVE_PARMETIS) 7735 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS)); 7736 #else 7737 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE)); 7738 #endif 7739 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj)); 7740 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt)); 7741 *n_subdomains = PetscMin((PetscInt)size, *n_subdomains); 7742 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains)); 7743 PetscCall(MatPartitioningSetFromOptions(partitioner)); 7744 PetscCall(MatPartitioningApply(partitioner, &new_ranks)); 7745 /* PetscCall(MatPartitioningView(partitioner,0)); */ 7746 7747 /* renumber new_ranks to avoid "holes" in new set of processors */ 7748 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig)); 7749 PetscCall(ISDestroy(&new_ranks)); 7750 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7751 if (!aggregate) { 7752 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7753 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7754 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7755 } else if (oldranks) { 7756 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7757 } else { 7758 ranks_send_to_idx[0] = is_indices[0]; 7759 } 7760 } else { 7761 PetscInt idx = 0; 7762 PetscMPIInt tag; 7763 MPI_Request *reqs; 7764 7765 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag)); 7766 PetscCall(PetscMalloc1(rend - rstart, &reqs)); 7767 for (i = rstart; i < rend; i++) PetscCallMPI(MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart])); 7768 PetscCallMPI(MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE)); 7769 PetscCallMPI(MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE)); 7770 PetscCall(PetscFree(reqs)); 7771 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7772 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 7773 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7774 } else if (oldranks) { 7775 ranks_send_to_idx[0] = oldranks[idx]; 7776 } else { 7777 ranks_send_to_idx[0] = idx; 7778 } 7779 } 7780 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices)); 7781 /* clean up */ 7782 PetscCall(PetscFree(oldranks)); 7783 PetscCall(ISDestroy(&new_ranks_contig)); 7784 PetscCall(MatDestroy(&subdomain_adj)); 7785 PetscCall(MatPartitioningDestroy(&partitioner)); 7786 } 7787 PetscCall(PetscSubcommDestroy(&psubcomm)); 7788 PetscCall(PetscFree(procs_candidates)); 7789 7790 /* assemble parallel IS for sends */ 7791 i = 1; 7792 if (!color) i = 0; 7793 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends)); 7794 PetscFunctionReturn(PETSC_SUCCESS); 7795 } 7796 7797 typedef enum { 7798 MATDENSE_PRIVATE = 0, 7799 MATAIJ_PRIVATE, 7800 MATBAIJ_PRIVATE, 7801 MATSBAIJ_PRIVATE 7802 } MatTypePrivate; 7803 7804 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7805 { 7806 Mat local_mat; 7807 IS is_sends_internal; 7808 PetscInt rows, cols, new_local_rows; 7809 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs; 7810 PetscBool ismatis, isdense, newisdense, destroy_mat; 7811 ISLocalToGlobalMapping l2gmap; 7812 PetscInt *l2gmap_indices; 7813 const PetscInt *is_indices; 7814 MatType new_local_type; 7815 /* buffers */ 7816 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs; 7817 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is; 7818 PetscInt *recv_buffer_idxs_local; 7819 PetscScalar *ptr_vals, *recv_buffer_vals; 7820 const PetscScalar *send_buffer_vals; 7821 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs; 7822 /* MPI */ 7823 MPI_Comm comm, comm_n; 7824 PetscSubcomm subcomm; 7825 PetscMPIInt n_sends, n_recvs, size; 7826 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is; 7827 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals; 7828 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest; 7829 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs; 7830 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs; 7831 7832 PetscFunctionBegin; 7833 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 7834 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis)); 7835 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME); 7836 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3); 7837 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4); 7838 PetscValidLogicalCollectiveBool(mat, restrict_full, 5); 7839 PetscValidLogicalCollectiveBool(mat, reuse, 6); 7840 PetscValidLogicalCollectiveInt(mat, nis, 8); 7841 PetscValidLogicalCollectiveInt(mat, nvecs, 10); 7842 if (nvecs) { 7843 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported"); 7844 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11); 7845 } 7846 /* further checks */ 7847 PetscCall(MatISGetLocalMat(mat, &local_mat)); 7848 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7849 /* XXX hack for multi_element */ 7850 if (!isdense) PetscCall(MatConvert(local_mat, MATDENSE, MAT_INPLACE_MATRIX, &local_mat)); 7851 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense)); 7852 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7853 7854 PetscCall(MatGetSize(local_mat, &rows, &cols)); 7855 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square"); 7856 if (reuse && *mat_n) { 7857 PetscInt mrows, mcols, mnrows, mncols; 7858 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7); 7859 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis)); 7860 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS"); 7861 PetscCall(MatGetSize(mat, &mrows, &mcols)); 7862 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols)); 7863 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows); 7864 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols); 7865 } 7866 PetscCall(MatGetBlockSize(local_mat, &bs)); 7867 PetscValidLogicalCollectiveInt(mat, bs, 1); 7868 7869 /* prepare IS for sending if not provided */ 7870 if (!is_sends) { 7871 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains"); 7872 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL)); 7873 } else { 7874 PetscCall(PetscObjectReference((PetscObject)is_sends)); 7875 is_sends_internal = is_sends; 7876 } 7877 7878 /* get comm */ 7879 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 7880 7881 /* compute number of sends */ 7882 PetscCall(ISGetLocalSize(is_sends_internal, &i)); 7883 PetscCall(PetscMPIIntCast(i, &n_sends)); 7884 7885 /* compute number of receives */ 7886 PetscCallMPI(MPI_Comm_size(comm, &size)); 7887 PetscCall(PetscMalloc1(size, &iflags)); 7888 PetscCall(PetscArrayzero(iflags, size)); 7889 PetscCall(ISGetIndices(is_sends_internal, &is_indices)); 7890 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1; 7891 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs)); 7892 PetscCall(PetscFree(iflags)); 7893 7894 /* restrict comm if requested */ 7895 subcomm = NULL; 7896 destroy_mat = PETSC_FALSE; 7897 if (restrict_comm) { 7898 PetscMPIInt color, subcommsize; 7899 7900 color = 0; 7901 if (restrict_full) { 7902 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */ 7903 } else { 7904 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */ 7905 } 7906 PetscCall(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm)); 7907 subcommsize = size - subcommsize; 7908 /* check if reuse has been requested */ 7909 if (reuse) { 7910 if (*mat_n) { 7911 PetscMPIInt subcommsize2; 7912 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2)); 7913 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2); 7914 comm_n = PetscObjectComm((PetscObject)*mat_n); 7915 } else { 7916 comm_n = PETSC_COMM_SELF; 7917 } 7918 } else { /* MAT_INITIAL_MATRIX */ 7919 PetscMPIInt rank; 7920 7921 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 7922 PetscCall(PetscSubcommCreate(comm, &subcomm)); 7923 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 7924 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 7925 comm_n = PetscSubcommChild(subcomm); 7926 } 7927 /* flag to destroy *mat_n if not significative */ 7928 if (color) destroy_mat = PETSC_TRUE; 7929 } else { 7930 comm_n = comm; 7931 } 7932 7933 /* prepare send/receive buffers */ 7934 PetscCall(PetscMalloc1(size, &ilengths_idxs)); 7935 PetscCall(PetscArrayzero(ilengths_idxs, size)); 7936 PetscCall(PetscMalloc1(size, &ilengths_vals)); 7937 PetscCall(PetscArrayzero(ilengths_vals, size)); 7938 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is)); 7939 7940 /* Get data from local matrices */ 7941 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented"); 7942 /* TODO: See below some guidelines on how to prepare the local buffers */ 7943 /* 7944 send_buffer_vals should contain the raw values of the local matrix 7945 send_buffer_idxs should contain: 7946 - MatType_PRIVATE type 7947 - PetscInt size_of_l2gmap 7948 - PetscInt global_row_indices[size_of_l2gmap] 7949 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7950 */ 7951 { 7952 ISLocalToGlobalMapping mapping; 7953 7954 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL)); 7955 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals)); 7956 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i)); 7957 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs)); 7958 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7959 send_buffer_idxs[1] = i; 7960 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs)); 7961 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i)); 7962 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs)); 7963 PetscCall(PetscMPIIntCast(i, &len)); 7964 for (i = 0; i < n_sends; i++) { 7965 ilengths_vals[is_indices[i]] = len * len; 7966 ilengths_idxs[is_indices[i]] = len + 2; 7967 } 7968 } 7969 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals)); 7970 /* additional is (if any) */ 7971 if (nis) { 7972 PetscMPIInt psum; 7973 PetscInt j; 7974 for (j = 0, psum = 0; j < nis; j++) { 7975 PetscInt plen; 7976 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7977 PetscCall(PetscMPIIntCast(plen, &len)); 7978 psum += len + 1; /* indices + length */ 7979 } 7980 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is)); 7981 for (j = 0, psum = 0; j < nis; j++) { 7982 PetscInt plen; 7983 const PetscInt *is_array_idxs; 7984 PetscCall(ISGetLocalSize(isarray[j], &plen)); 7985 send_buffer_idxs_is[psum] = plen; 7986 PetscCall(ISGetIndices(isarray[j], &is_array_idxs)); 7987 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen)); 7988 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs)); 7989 psum += plen + 1; /* indices + length */ 7990 } 7991 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum; 7992 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is)); 7993 } 7994 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 7995 7996 buf_size_idxs = 0; 7997 buf_size_vals = 0; 7998 buf_size_idxs_is = 0; 7999 buf_size_vecs = 0; 8000 for (i = 0; i < n_recvs; i++) { 8001 buf_size_idxs += (PetscInt)olengths_idxs[i]; 8002 buf_size_vals += (PetscInt)olengths_vals[i]; 8003 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 8004 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 8005 } 8006 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs)); 8007 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals)); 8008 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is)); 8009 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs)); 8010 8011 /* get new tags for clean communications */ 8012 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs)); 8013 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals)); 8014 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is)); 8015 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs)); 8016 8017 /* allocate for requests */ 8018 PetscCall(PetscMalloc1(n_sends, &send_req_idxs)); 8019 PetscCall(PetscMalloc1(n_sends, &send_req_vals)); 8020 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is)); 8021 PetscCall(PetscMalloc1(n_sends, &send_req_vecs)); 8022 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs)); 8023 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals)); 8024 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is)); 8025 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs)); 8026 8027 /* communications */ 8028 ptr_idxs = recv_buffer_idxs; 8029 ptr_vals = recv_buffer_vals; 8030 ptr_idxs_is = recv_buffer_idxs_is; 8031 ptr_vecs = recv_buffer_vecs; 8032 for (i = 0; i < n_recvs; i++) { 8033 source_dest = onodes[i]; 8034 PetscCallMPI(MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i])); 8035 PetscCallMPI(MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i])); 8036 ptr_idxs += olengths_idxs[i]; 8037 ptr_vals += olengths_vals[i]; 8038 if (nis) { 8039 source_dest = onodes_is[i]; 8040 PetscCallMPI(MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i])); 8041 ptr_idxs_is += olengths_idxs_is[i]; 8042 } 8043 if (nvecs) { 8044 source_dest = onodes[i]; 8045 PetscCallMPI(MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i])); 8046 ptr_vecs += olengths_idxs[i] - 2; 8047 } 8048 } 8049 for (i = 0; i < n_sends; i++) { 8050 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest)); 8051 PetscCallMPI(MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i])); 8052 PetscCallMPI(MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i])); 8053 if (nis) PetscCallMPI(MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i])); 8054 if (nvecs) { 8055 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8056 PetscCallMPI(MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i])); 8057 } 8058 } 8059 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices)); 8060 PetscCall(ISDestroy(&is_sends_internal)); 8061 8062 /* assemble new l2g map */ 8063 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE)); 8064 ptr_idxs = recv_buffer_idxs; 8065 new_local_rows = 0; 8066 for (i = 0; i < n_recvs; i++) { 8067 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8068 ptr_idxs += olengths_idxs[i]; 8069 } 8070 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices)); 8071 ptr_idxs = recv_buffer_idxs; 8072 new_local_rows = 0; 8073 for (i = 0; i < n_recvs; i++) { 8074 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1))); 8075 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */ 8076 ptr_idxs += olengths_idxs[i]; 8077 } 8078 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices)); 8079 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap)); 8080 PetscCall(PetscFree(l2gmap_indices)); 8081 8082 /* infer new local matrix type from received local matrices type */ 8083 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 8084 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 8085 if (n_recvs) { 8086 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 8087 ptr_idxs = recv_buffer_idxs; 8088 for (i = 0; i < n_recvs; i++) { 8089 if ((PetscInt)new_local_type_private != *ptr_idxs) { 8090 new_local_type_private = MATAIJ_PRIVATE; 8091 break; 8092 } 8093 ptr_idxs += olengths_idxs[i]; 8094 } 8095 switch (new_local_type_private) { 8096 case MATDENSE_PRIVATE: 8097 new_local_type = MATSEQAIJ; 8098 bs = 1; 8099 break; 8100 case MATAIJ_PRIVATE: 8101 new_local_type = MATSEQAIJ; 8102 bs = 1; 8103 break; 8104 case MATBAIJ_PRIVATE: 8105 new_local_type = MATSEQBAIJ; 8106 break; 8107 case MATSBAIJ_PRIVATE: 8108 new_local_type = MATSEQSBAIJ; 8109 break; 8110 default: 8111 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME); 8112 } 8113 } else { /* by default, new_local_type is seqaij */ 8114 new_local_type = MATSEQAIJ; 8115 bs = 1; 8116 } 8117 8118 /* create MATIS object if needed */ 8119 if (!reuse) { 8120 PetscCall(MatGetSize(mat, &rows, &cols)); 8121 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8122 } else { 8123 /* it also destroys the local matrices */ 8124 if (*mat_n) { 8125 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap)); 8126 } else { /* this is a fake object */ 8127 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n)); 8128 } 8129 } 8130 PetscCall(MatISGetLocalMat(*mat_n, &local_mat)); 8131 PetscCall(MatSetType(local_mat, new_local_type)); 8132 8133 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE)); 8134 8135 /* Global to local map of received indices */ 8136 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */ 8137 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local)); 8138 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap)); 8139 8140 /* restore attributes -> type of incoming data and its size */ 8141 buf_size_idxs = 0; 8142 for (i = 0; i < n_recvs; i++) { 8143 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 8144 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1]; 8145 buf_size_idxs += (PetscInt)olengths_idxs[i]; 8146 } 8147 PetscCall(PetscFree(recv_buffer_idxs)); 8148 8149 /* set preallocation */ 8150 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense)); 8151 if (!newisdense) { 8152 PetscInt *new_local_nnz = NULL; 8153 8154 ptr_idxs = recv_buffer_idxs_local; 8155 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz)); 8156 for (i = 0; i < n_recvs; i++) { 8157 PetscInt j; 8158 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 8159 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1); 8160 } else { 8161 /* TODO */ 8162 } 8163 ptr_idxs += olengths_idxs[i]; 8164 } 8165 if (new_local_nnz) { 8166 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows); 8167 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz)); 8168 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs; 8169 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8170 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0); 8171 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz)); 8172 } else { 8173 PetscCall(MatSetUp(local_mat)); 8174 } 8175 PetscCall(PetscFree(new_local_nnz)); 8176 } else { 8177 PetscCall(MatSetUp(local_mat)); 8178 } 8179 8180 /* set values */ 8181 ptr_vals = recv_buffer_vals; 8182 ptr_idxs = recv_buffer_idxs_local; 8183 for (i = 0; i < n_recvs; i++) { 8184 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 8185 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 8186 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES)); 8187 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY)); 8188 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY)); 8189 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 8190 } else { 8191 /* TODO */ 8192 } 8193 ptr_idxs += olengths_idxs[i]; 8194 ptr_vals += olengths_vals[i]; 8195 } 8196 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY)); 8197 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY)); 8198 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat)); 8199 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY)); 8200 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY)); 8201 PetscCall(PetscFree(recv_buffer_vals)); 8202 8203 #if 0 8204 if (!restrict_comm) { /* check */ 8205 Vec lvec,rvec; 8206 PetscReal infty_error; 8207 8208 PetscCall(MatCreateVecs(mat,&rvec,&lvec)); 8209 PetscCall(VecSetRandom(rvec,NULL)); 8210 PetscCall(MatMult(mat,rvec,lvec)); 8211 PetscCall(VecScale(lvec,-1.0)); 8212 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec)); 8213 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error)); 8214 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error)); 8215 PetscCall(VecDestroy(&rvec)); 8216 PetscCall(VecDestroy(&lvec)); 8217 } 8218 #endif 8219 8220 /* assemble new additional is (if any) */ 8221 if (nis) { 8222 PetscInt **temp_idxs, *count_is, j, psum; 8223 8224 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE)); 8225 PetscCall(PetscCalloc1(nis, &count_is)); 8226 ptr_idxs = recv_buffer_idxs_is; 8227 psum = 0; 8228 for (i = 0; i < n_recvs; i++) { 8229 for (j = 0; j < nis; j++) { 8230 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8231 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 8232 psum += plen; 8233 ptr_idxs += plen + 1; /* shift pointer to received data */ 8234 } 8235 } 8236 PetscCall(PetscMalloc1(nis, &temp_idxs)); 8237 PetscCall(PetscMalloc1(psum, &temp_idxs[0])); 8238 for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]); 8239 PetscCall(PetscArrayzero(count_is, nis)); 8240 ptr_idxs = recv_buffer_idxs_is; 8241 for (i = 0; i < n_recvs; i++) { 8242 for (j = 0; j < nis; j++) { 8243 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 8244 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen)); 8245 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 8246 ptr_idxs += plen + 1; /* shift pointer to received data */ 8247 } 8248 } 8249 for (i = 0; i < nis; i++) { 8250 PetscCall(ISDestroy(&isarray[i])); 8251 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i])); 8252 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i])); 8253 } 8254 PetscCall(PetscFree(count_is)); 8255 PetscCall(PetscFree(temp_idxs[0])); 8256 PetscCall(PetscFree(temp_idxs)); 8257 } 8258 /* free workspace */ 8259 PetscCall(PetscFree(recv_buffer_idxs_is)); 8260 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE)); 8261 PetscCall(PetscFree(send_buffer_idxs)); 8262 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE)); 8263 if (isdense) { 8264 PetscCall(MatISGetLocalMat(mat, &local_mat)); 8265 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals)); 8266 PetscCall(MatISRestoreLocalMat(mat, &local_mat)); 8267 } else { 8268 /* PetscCall(PetscFree(send_buffer_vals)); */ 8269 } 8270 if (nis) { 8271 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE)); 8272 PetscCall(PetscFree(send_buffer_idxs_is)); 8273 } 8274 8275 if (nvecs) { 8276 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE)); 8277 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE)); 8278 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8279 PetscCall(VecDestroy(&nnsp_vec[0])); 8280 PetscCall(VecCreate(comm_n, &nnsp_vec[0])); 8281 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE)); 8282 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD)); 8283 /* set values */ 8284 ptr_vals = recv_buffer_vecs; 8285 ptr_idxs = recv_buffer_idxs_local; 8286 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs)); 8287 for (i = 0; i < n_recvs; i++) { 8288 PetscInt j; 8289 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j); 8290 ptr_idxs += olengths_idxs[i]; 8291 ptr_vals += olengths_idxs[i] - 2; 8292 } 8293 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs)); 8294 PetscCall(VecAssemblyBegin(nnsp_vec[0])); 8295 PetscCall(VecAssemblyEnd(nnsp_vec[0])); 8296 } 8297 8298 PetscCall(PetscFree(recv_buffer_vecs)); 8299 PetscCall(PetscFree(recv_buffer_idxs_local)); 8300 PetscCall(PetscFree(recv_req_idxs)); 8301 PetscCall(PetscFree(recv_req_vals)); 8302 PetscCall(PetscFree(recv_req_vecs)); 8303 PetscCall(PetscFree(recv_req_idxs_is)); 8304 PetscCall(PetscFree(send_req_idxs)); 8305 PetscCall(PetscFree(send_req_vals)); 8306 PetscCall(PetscFree(send_req_vecs)); 8307 PetscCall(PetscFree(send_req_idxs_is)); 8308 PetscCall(PetscFree(ilengths_vals)); 8309 PetscCall(PetscFree(ilengths_idxs)); 8310 PetscCall(PetscFree(olengths_vals)); 8311 PetscCall(PetscFree(olengths_idxs)); 8312 PetscCall(PetscFree(onodes)); 8313 if (nis) { 8314 PetscCall(PetscFree(ilengths_idxs_is)); 8315 PetscCall(PetscFree(olengths_idxs_is)); 8316 PetscCall(PetscFree(onodes_is)); 8317 } 8318 PetscCall(PetscSubcommDestroy(&subcomm)); 8319 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */ 8320 PetscCall(MatDestroy(mat_n)); 8321 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i])); 8322 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 8323 PetscCall(VecDestroy(&nnsp_vec[0])); 8324 } 8325 *mat_n = NULL; 8326 } 8327 PetscFunctionReturn(PETSC_SUCCESS); 8328 } 8329 8330 /* temporary hack into ksp private data structure */ 8331 #include <petsc/private/kspimpl.h> 8332 8333 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat) 8334 { 8335 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 8336 PC_IS *pcis = (PC_IS *)pc->data; 8337 PCBDDCGraph graph = pcbddc->mat_graph; 8338 Mat coarse_mat, coarse_mat_is; 8339 Mat coarsedivudotp = NULL; 8340 Mat coarseG, t_coarse_mat_is; 8341 MatNullSpace CoarseNullSpace = NULL; 8342 ISLocalToGlobalMapping coarse_islg; 8343 IS coarse_is, *isarray, corners; 8344 PetscInt i, im_active = -1, active_procs = -1; 8345 PetscInt nis, nisdofs, nisneu, nisvert; 8346 PetscInt coarse_eqs_per_proc, coarsening_ratio; 8347 PC pc_temp; 8348 PCType coarse_pc_type; 8349 KSPType coarse_ksp_type; 8350 PetscBool multilevel_requested, multilevel_allowed; 8351 PetscBool coarse_reuse, multi_element = graph->multi_element; 8352 PetscInt ncoarse, nedcfield; 8353 PetscBool compute_vecs = PETSC_FALSE; 8354 PetscScalar *array; 8355 MatReuse coarse_mat_reuse; 8356 PetscBool restr, full_restr, have_void; 8357 PetscMPIInt size; 8358 8359 PetscFunctionBegin; 8360 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8361 /* Assign global numbering to coarse dofs */ 8362 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 8363 PetscInt ocoarse_size; 8364 compute_vecs = PETSC_TRUE; 8365 8366 pcbddc->new_primal_space = PETSC_TRUE; 8367 ocoarse_size = pcbddc->coarse_size; 8368 PetscCall(PetscFree(pcbddc->global_primal_indices)); 8369 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices)); 8370 /* see if we can avoid some work */ 8371 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 8372 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 8373 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 8374 PetscCall(KSPReset(pcbddc->coarse_ksp)); 8375 coarse_reuse = PETSC_FALSE; 8376 } else { /* we can safely reuse already computed coarse matrix */ 8377 coarse_reuse = PETSC_TRUE; 8378 } 8379 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 8380 coarse_reuse = PETSC_FALSE; 8381 } 8382 /* reset any subassembling information */ 8383 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling)); 8384 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 8385 coarse_reuse = PETSC_TRUE; 8386 } 8387 if (coarse_reuse && pcbddc->coarse_ksp) { 8388 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL)); 8389 PetscCall(PetscObjectReference((PetscObject)coarse_mat)); 8390 coarse_mat_reuse = MAT_REUSE_MATRIX; 8391 } else { 8392 coarse_mat = NULL; 8393 coarse_mat_reuse = MAT_INITIAL_MATRIX; 8394 } 8395 8396 /* creates temporary l2gmap and IS for coarse indexes */ 8397 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is)); 8398 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg)); 8399 8400 /* creates temporary MATIS object for coarse matrix */ 8401 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is)); 8402 PetscCall(MatSetType(t_coarse_mat_is, MATIS)); 8403 PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size)); 8404 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_TRUE)); 8405 PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg)); 8406 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat)); 8407 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8408 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY)); 8409 PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view")); 8410 8411 /* count "active" (i.e. with positive local size) and "void" processes */ 8412 im_active = !!pcis->n; 8413 PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8414 8415 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 8416 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */ 8417 /* full_restr : just use the receivers from the subassembling pattern */ 8418 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size)); 8419 coarse_mat_is = NULL; 8420 multilevel_allowed = PETSC_FALSE; 8421 multilevel_requested = PETSC_FALSE; 8422 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc); 8423 if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1); 8424 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 8425 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 8426 coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio; 8427 if (multilevel_requested) { 8428 ncoarse = active_procs / coarsening_ratio; 8429 restr = PETSC_FALSE; 8430 full_restr = PETSC_FALSE; 8431 } else { 8432 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc); 8433 restr = PETSC_TRUE; 8434 full_restr = PETSC_TRUE; 8435 } 8436 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 8437 ncoarse = PetscMax(1, ncoarse); 8438 if (!pcbddc->coarse_subassembling) { 8439 if (coarsening_ratio > 1) { 8440 if (multilevel_requested) { 8441 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8442 } else { 8443 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void)); 8444 } 8445 } else { 8446 PetscMPIInt rank; 8447 8448 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank)); 8449 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 8450 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling)); 8451 } 8452 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 8453 PetscInt psum; 8454 if (pcbddc->coarse_ksp) psum = 1; 8455 else psum = 0; 8456 PetscCall(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc))); 8457 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 8458 } 8459 /* determine if we can go multilevel */ 8460 if (multilevel_requested) { 8461 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 8462 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 8463 } 8464 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 8465 8466 /* dump subassembling pattern */ 8467 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer)); 8468 /* compute dofs splitting and neumann boundaries for coarse dofs */ 8469 nedcfield = -1; 8470 corners = NULL; 8471 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */ 8472 PetscInt *tidxs, *tidxs2, nout, tsize, i; 8473 const PetscInt *idxs; 8474 ISLocalToGlobalMapping tmap; 8475 8476 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8477 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap)); 8478 /* allocate space for temporary storage */ 8479 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs)); 8480 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2)); 8481 /* allocate for IS array */ 8482 nisdofs = pcbddc->n_ISForDofsLocal; 8483 if (pcbddc->nedclocal) { 8484 if (pcbddc->nedfield > -1) { 8485 nedcfield = pcbddc->nedfield; 8486 } else { 8487 nedcfield = 0; 8488 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs); 8489 nisdofs = 1; 8490 } 8491 } 8492 nisneu = !!pcbddc->NeumannBoundariesLocal; 8493 nisvert = 0; /* nisvert is not used */ 8494 nis = nisdofs + nisneu + nisvert; 8495 PetscCall(PetscMalloc1(nis, &isarray)); 8496 /* dofs splitting */ 8497 for (i = 0; i < nisdofs; i++) { 8498 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */ 8499 if (nedcfield != i) { 8500 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize)); 8501 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8502 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8503 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs)); 8504 } else { 8505 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize)); 8506 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs)); 8507 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8508 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8509 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs)); 8510 } 8511 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8512 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i])); 8513 /* PetscCall(ISView(isarray[i],0)); */ 8514 } 8515 /* neumann boundaries */ 8516 if (pcbddc->NeumannBoundariesLocal) { 8517 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */ 8518 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize)); 8519 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8520 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8521 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs)); 8522 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8523 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs])); 8524 /* PetscCall(ISView(isarray[nisdofs],0)); */ 8525 } 8526 /* coordinates */ 8527 if (pcbddc->corner_selected) { 8528 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8529 PetscCall(ISGetLocalSize(corners, &tsize)); 8530 PetscCall(ISGetIndices(corners, &idxs)); 8531 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs)); 8532 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout); 8533 PetscCall(ISRestoreIndices(corners, &idxs)); 8534 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners)); 8535 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2)); 8536 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners)); 8537 } 8538 PetscCall(PetscFree(tidxs)); 8539 PetscCall(PetscFree(tidxs2)); 8540 PetscCall(ISLocalToGlobalMappingDestroy(&tmap)); 8541 } else { 8542 nis = 0; 8543 nisdofs = 0; 8544 nisneu = 0; 8545 nisvert = 0; 8546 isarray = NULL; 8547 } 8548 /* destroy no longer needed map */ 8549 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg)); 8550 8551 /* subassemble */ 8552 if (multilevel_allowed) { 8553 Vec vp[1]; 8554 PetscInt nvecs = 0; 8555 PetscBool reuse; 8556 8557 vp[0] = NULL; 8558 /* XXX HDIV also */ 8559 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8560 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0])); 8561 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE)); 8562 PetscCall(VecSetType(vp[0], VECSTANDARD)); 8563 nvecs = 1; 8564 8565 if (pcbddc->divudotp) { 8566 Mat B, loc_divudotp; 8567 Vec v, p; 8568 IS dummy; 8569 PetscInt np; 8570 8571 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp)); 8572 PetscCall(MatGetSize(loc_divudotp, &np, NULL)); 8573 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy)); 8574 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B)); 8575 PetscCall(MatCreateVecs(B, &v, &p)); 8576 PetscCall(VecSet(p, 1.)); 8577 PetscCall(MatMultTranspose(B, p, v)); 8578 PetscCall(VecDestroy(&p)); 8579 PetscCall(MatDestroy(&B)); 8580 PetscCall(VecGetArray(vp[0], &array)); 8581 PetscCall(VecPlaceArray(pcbddc->vec1_P, array)); 8582 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P)); 8583 PetscCall(VecResetArray(pcbddc->vec1_P)); 8584 PetscCall(VecRestoreArray(vp[0], &array)); 8585 PetscCall(ISDestroy(&dummy)); 8586 PetscCall(VecDestroy(&v)); 8587 } 8588 } 8589 if (coarse_mat) reuse = PETSC_TRUE; 8590 else reuse = PETSC_FALSE; 8591 if (multi_element) { 8592 /* XXX divudotp */ 8593 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_FALSE)); 8594 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8595 coarse_mat_is = t_coarse_mat_is; 8596 } else { 8597 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 8598 if (reuse) { 8599 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp)); 8600 } else { 8601 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp)); 8602 } 8603 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8604 PetscScalar *arraym; 8605 const PetscScalar *arrayv; 8606 PetscInt nl; 8607 PetscCall(VecGetLocalSize(vp[0], &nl)); 8608 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp)); 8609 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym)); 8610 PetscCall(VecGetArrayRead(vp[0], &arrayv)); 8611 PetscCall(PetscArraycpy(arraym, arrayv, nl)); 8612 PetscCall(VecRestoreArrayRead(vp[0], &arrayv)); 8613 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym)); 8614 PetscCall(VecDestroy(&vp[0])); 8615 } else { 8616 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp)); 8617 } 8618 } 8619 } else { 8620 if (ncoarse != size) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL)); 8621 else { 8622 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is)); 8623 coarse_mat_is = t_coarse_mat_is; 8624 } 8625 } 8626 if (coarse_mat_is || coarse_mat) { 8627 if (!multilevel_allowed) { 8628 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat)); 8629 } else { 8630 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8631 if (coarse_mat_is) { 8632 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen"); 8633 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is)); 8634 coarse_mat = coarse_mat_is; 8635 } 8636 } 8637 } 8638 PetscCall(MatDestroy(&t_coarse_mat_is)); 8639 PetscCall(MatDestroy(&coarse_mat_is)); 8640 8641 /* create local to global scatters for coarse problem */ 8642 if (compute_vecs) { 8643 PetscInt lrows; 8644 PetscCall(VecDestroy(&pcbddc->coarse_vec)); 8645 if (coarse_mat) { 8646 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL)); 8647 } else { 8648 lrows = 0; 8649 } 8650 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec)); 8651 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE)); 8652 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD)); 8653 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob)); 8654 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob)); 8655 } 8656 PetscCall(ISDestroy(&coarse_is)); 8657 8658 /* set defaults for coarse KSP and PC */ 8659 if (multilevel_allowed) { 8660 coarse_ksp_type = KSPRICHARDSON; 8661 coarse_pc_type = PCBDDC; 8662 } else { 8663 coarse_ksp_type = KSPPREONLY; 8664 coarse_pc_type = PCREDUNDANT; 8665 } 8666 8667 /* print some info if requested */ 8668 if (pcbddc->dbg_flag) { 8669 if (!multilevel_allowed) { 8670 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 8671 if (multilevel_requested) { 8672 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio)); 8673 } else if (pcbddc->max_levels) { 8674 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels)); 8675 } 8676 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 8677 } 8678 } 8679 8680 /* communicate coarse discrete gradient */ 8681 coarseG = NULL; 8682 if (pcbddc->nedcG && multilevel_allowed) { 8683 MPI_Comm ccomm; 8684 if (coarse_mat) { 8685 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8686 } else { 8687 ccomm = MPI_COMM_NULL; 8688 } 8689 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG)); 8690 } 8691 8692 /* create the coarse KSP object only once with defaults */ 8693 if (coarse_mat) { 8694 PetscBool isredundant, isbddc, force, valid; 8695 PetscViewer dbg_viewer = NULL; 8696 PetscBool isset, issym, isher, isspd; 8697 8698 if (pcbddc->dbg_flag) { 8699 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8700 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level)); 8701 } 8702 if (!pcbddc->coarse_ksp) { 8703 char prefix[256], str_level[16]; 8704 size_t len; 8705 8706 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp)); 8707 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel)); 8708 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure)); 8709 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1)); 8710 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1)); 8711 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8712 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type)); 8713 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE)); 8714 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8715 /* TODO is this logic correct? should check for coarse_mat type */ 8716 PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8717 /* prefix */ 8718 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix))); 8719 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level))); 8720 if (!pcbddc->current_level) { 8721 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix))); 8722 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix))); 8723 } else { 8724 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len)); 8725 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */ 8726 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */ 8727 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8728 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1)); 8729 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)pcbddc->current_level)); 8730 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix))); 8731 } 8732 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix)); 8733 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8734 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8735 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8736 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8737 /* allow user customization */ 8738 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp)); 8739 /* get some info after set from options */ 8740 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8741 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8742 force = PETSC_FALSE; 8743 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8744 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8745 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8746 if (multilevel_allowed && !force && !valid) { 8747 isbddc = PETSC_TRUE; 8748 PetscCall(PCSetType(pc_temp, PCBDDC)); 8749 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1)); 8750 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio)); 8751 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels)); 8752 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */ 8753 PetscObjectOptionsBegin((PetscObject)pc_temp); 8754 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject)); 8755 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject)); 8756 PetscOptionsEnd(); 8757 pc_temp->setfromoptionscalled++; 8758 } 8759 } 8760 } 8761 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8762 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp)); 8763 if (nisdofs) { 8764 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray)); 8765 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i])); 8766 } 8767 if (nisneu) { 8768 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs])); 8769 PetscCall(ISDestroy(&isarray[nisdofs])); 8770 } 8771 if (nisvert) { 8772 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1])); 8773 PetscCall(ISDestroy(&isarray[nis - 1])); 8774 } 8775 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE)); 8776 8777 /* get some info after set from options */ 8778 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8779 8780 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8781 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type)); 8782 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */ 8783 force = PETSC_FALSE; 8784 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL)); 8785 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, "")); 8786 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC)); 8787 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant)); 8788 if (isredundant) { 8789 KSP inner_ksp; 8790 PC inner_pc; 8791 8792 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp)); 8793 PetscCall(KSPGetPC(inner_ksp, &inner_pc)); 8794 } 8795 8796 /* parameters which miss an API */ 8797 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc)); 8798 if (isbddc) { 8799 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data; 8800 8801 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8802 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8803 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8804 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8805 if (pcbddc_coarse->benign_saddle_point) { 8806 Mat coarsedivudotp_is; 8807 ISLocalToGlobalMapping l2gmap, rl2g, cl2g; 8808 IS row, col; 8809 const PetscInt *gidxs; 8810 PetscInt n, st, M, N; 8811 8812 PetscCall(MatGetSize(coarsedivudotp, &n, NULL)); 8813 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat))); 8814 st = st - n; 8815 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row)); 8816 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL)); 8817 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n)); 8818 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs)); 8819 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col)); 8820 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs)); 8821 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g)); 8822 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g)); 8823 PetscCall(ISGetSize(row, &M)); 8824 PetscCall(MatGetSize(coarse_mat, &N, NULL)); 8825 PetscCall(ISDestroy(&row)); 8826 PetscCall(ISDestroy(&col)); 8827 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is)); 8828 PetscCall(MatSetType(coarsedivudotp_is, MATIS)); 8829 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N)); 8830 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g)); 8831 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g)); 8832 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g)); 8833 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp)); 8834 PetscCall(MatDestroy(&coarsedivudotp)); 8835 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL)); 8836 PetscCall(MatDestroy(&coarsedivudotp_is)); 8837 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8838 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8839 } 8840 } 8841 8842 /* propagate symmetry info of coarse matrix */ 8843 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE)); 8844 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym)); 8845 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym)); 8846 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher)); 8847 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher)); 8848 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd)); 8849 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd)); 8850 8851 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE)); 8852 /* set operators */ 8853 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view")); 8854 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix)); 8855 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat)); 8856 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level)); 8857 } 8858 PetscCall(MatDestroy(&coarseG)); 8859 PetscCall(PetscFree(isarray)); 8860 #if 0 8861 { 8862 PetscViewer viewer; 8863 char filename[256]; 8864 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level)); 8865 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer)); 8866 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB)); 8867 PetscCall(MatView(coarse_mat,viewer)); 8868 PetscCall(PetscViewerPopFormat(viewer)); 8869 PetscCall(PetscViewerDestroy(&viewer)); 8870 } 8871 #endif 8872 8873 if (corners) { 8874 Vec gv; 8875 IS is; 8876 const PetscInt *idxs; 8877 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim; 8878 PetscScalar *coords; 8879 8880 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates"); 8881 PetscCall(VecGetSize(pcbddc->coarse_vec, &N)); 8882 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n)); 8883 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv)); 8884 PetscCall(VecSetBlockSize(gv, cdim)); 8885 PetscCall(VecSetSizes(gv, n * cdim, N * cdim)); 8886 PetscCall(VecSetType(gv, VECSTANDARD)); 8887 PetscCall(VecSetFromOptions(gv)); 8888 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */ 8889 8890 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8891 PetscCall(ISGetLocalSize(is, &n)); 8892 PetscCall(ISGetIndices(is, &idxs)); 8893 PetscCall(PetscMalloc1(n * cdim, &coords)); 8894 for (i = 0; i < n; i++) { 8895 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d]; 8896 } 8897 PetscCall(ISRestoreIndices(is, &idxs)); 8898 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is)); 8899 8900 PetscCall(ISGetLocalSize(corners, &n)); 8901 PetscCall(ISGetIndices(corners, &idxs)); 8902 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES)); 8903 PetscCall(ISRestoreIndices(corners, &idxs)); 8904 PetscCall(PetscFree(coords)); 8905 PetscCall(VecAssemblyBegin(gv)); 8906 PetscCall(VecAssemblyEnd(gv)); 8907 PetscCall(VecGetArray(gv, &coords)); 8908 if (pcbddc->coarse_ksp) { 8909 PC coarse_pc; 8910 PetscBool isbddc; 8911 8912 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc)); 8913 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc)); 8914 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */ 8915 PetscReal *realcoords; 8916 8917 PetscCall(VecGetLocalSize(gv, &n)); 8918 #if defined(PETSC_USE_COMPLEX) 8919 PetscCall(PetscMalloc1(n, &realcoords)); 8920 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]); 8921 #else 8922 realcoords = coords; 8923 #endif 8924 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords)); 8925 #if defined(PETSC_USE_COMPLEX) 8926 PetscCall(PetscFree(realcoords)); 8927 #endif 8928 } 8929 } 8930 PetscCall(VecRestoreArray(gv, &coords)); 8931 PetscCall(VecDestroy(&gv)); 8932 } 8933 PetscCall(ISDestroy(&corners)); 8934 8935 if (pcbddc->coarse_ksp) { 8936 Vec crhs, csol; 8937 8938 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol)); 8939 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs)); 8940 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL)); 8941 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs)); 8942 } 8943 PetscCall(MatDestroy(&coarsedivudotp)); 8944 8945 /* compute null space for coarse solver if the benign trick has been requested */ 8946 if (pcbddc->benign_null) { 8947 PetscCall(VecSet(pcbddc->vec1_P, 0.)); 8948 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES)); 8949 PetscCall(VecAssemblyBegin(pcbddc->vec1_P)); 8950 PetscCall(VecAssemblyEnd(pcbddc->vec1_P)); 8951 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8952 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD)); 8953 if (coarse_mat) { 8954 Vec nullv; 8955 PetscScalar *array, *array2; 8956 PetscInt nl; 8957 8958 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL)); 8959 PetscCall(VecGetLocalSize(nullv, &nl)); 8960 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8961 PetscCall(VecGetArray(nullv, &array2)); 8962 PetscCall(PetscArraycpy(array2, array, nl)); 8963 PetscCall(VecRestoreArray(nullv, &array2)); 8964 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array)); 8965 PetscCall(VecNormalize(nullv, NULL)); 8966 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace)); 8967 PetscCall(VecDestroy(&nullv)); 8968 } 8969 } 8970 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0)); 8971 8972 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 8973 if (pcbddc->coarse_ksp) { 8974 PetscBool ispreonly; 8975 8976 if (CoarseNullSpace) { 8977 PetscBool isnull; 8978 8979 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull)); 8980 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace)); 8981 /* TODO: add local nullspaces (if any) */ 8982 } 8983 /* setup coarse ksp */ 8984 PetscCall(KSPSetUp(pcbddc->coarse_ksp)); 8985 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8986 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly)); 8987 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) { 8988 KSP check_ksp; 8989 KSPType check_ksp_type; 8990 PC check_pc; 8991 Vec check_vec, coarse_vec; 8992 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0; 8993 PetscInt its; 8994 PetscBool compute_eigs; 8995 PetscReal *eigs_r, *eigs_c; 8996 PetscInt neigs; 8997 const char *prefix; 8998 8999 /* Create ksp object suitable for estimation of extreme eigenvalues */ 9000 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp)); 9001 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel)); 9002 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0)); 9003 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE)); 9004 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat)); 9005 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size)); 9006 /* prevent from setup unneeded object */ 9007 PetscCall(KSPGetPC(check_ksp, &check_pc)); 9008 PetscCall(PCSetType(check_pc, PCNONE)); 9009 if (ispreonly) { 9010 check_ksp_type = KSPPREONLY; 9011 compute_eigs = PETSC_FALSE; 9012 } else { 9013 check_ksp_type = KSPGMRES; 9014 compute_eigs = PETSC_TRUE; 9015 } 9016 PetscCall(KSPSetType(check_ksp, check_ksp_type)); 9017 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs)); 9018 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs)); 9019 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1)); 9020 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix)); 9021 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix)); 9022 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_")); 9023 PetscCall(KSPSetFromOptions(check_ksp)); 9024 PetscCall(KSPSetUp(check_ksp)); 9025 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc)); 9026 PetscCall(KSPSetPC(check_ksp, check_pc)); 9027 /* create random vec */ 9028 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec)); 9029 PetscCall(VecSetRandom(check_vec, NULL)); 9030 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9031 /* solve coarse problem */ 9032 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec)); 9033 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec)); 9034 /* set eigenvalue estimation if preonly has not been requested */ 9035 if (compute_eigs) { 9036 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r)); 9037 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c)); 9038 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs)); 9039 if (neigs) { 9040 lambda_max = eigs_r[neigs - 1]; 9041 lambda_min = eigs_r[0]; 9042 if (pcbddc->use_coarse_estimates) { 9043 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 9044 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min)); 9045 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min))); 9046 } 9047 } 9048 } 9049 } 9050 9051 /* check coarse problem residual error */ 9052 if (pcbddc->dbg_flag) { 9053 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 9054 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9055 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec)); 9056 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error)); 9057 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec)); 9058 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error)); 9059 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates)); 9060 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer)); 9061 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer)); 9062 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error)); 9063 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error)); 9064 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n")); 9065 if (compute_eigs) { 9066 PetscReal lambda_max_s, lambda_min_s; 9067 KSPConvergedReason reason; 9068 PetscCall(KSPGetType(check_ksp, &check_ksp_type)); 9069 PetscCall(KSPGetIterationNumber(check_ksp, &its)); 9070 PetscCall(KSPGetConvergedReason(check_ksp, &reason)); 9071 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s)); 9072 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s)); 9073 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i])); 9074 } 9075 PetscCall(PetscViewerFlush(dbg_viewer)); 9076 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1))); 9077 } 9078 PetscCall(VecDestroy(&check_vec)); 9079 PetscCall(VecDestroy(&coarse_vec)); 9080 PetscCall(KSPDestroy(&check_ksp)); 9081 if (compute_eigs) { 9082 PetscCall(PetscFree(eigs_r)); 9083 PetscCall(PetscFree(eigs_c)); 9084 } 9085 } 9086 } 9087 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace)); 9088 /* print additional info */ 9089 if (pcbddc->dbg_flag) { 9090 /* waits until all processes reaches this point */ 9091 PetscCall(PetscBarrier((PetscObject)pc)); 9092 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level)); 9093 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9094 } 9095 9096 /* free memory */ 9097 PetscCall(MatDestroy(&coarse_mat)); 9098 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0)); 9099 PetscFunctionReturn(PETSC_SUCCESS); 9100 } 9101 9102 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n) 9103 { 9104 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9105 PC_IS *pcis = (PC_IS *)pc->data; 9106 IS subset, subset_mult, subset_n; 9107 PetscInt local_size, coarse_size = 0; 9108 PetscInt *local_primal_indices = NULL; 9109 const PetscInt *t_local_primal_indices; 9110 9111 PetscFunctionBegin; 9112 /* Compute global number of coarse dofs */ 9113 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first"); 9114 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n)); 9115 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset)); 9116 PetscCall(ISDestroy(&subset_n)); 9117 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult)); 9118 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n)); 9119 PetscCall(ISDestroy(&subset)); 9120 PetscCall(ISDestroy(&subset_mult)); 9121 PetscCall(ISGetLocalSize(subset_n, &local_size)); 9122 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size); 9123 PetscCall(PetscMalloc1(local_size, &local_primal_indices)); 9124 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices)); 9125 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size)); 9126 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices)); 9127 PetscCall(ISDestroy(&subset_n)); 9128 9129 if (pcbddc->dbg_flag) { 9130 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9131 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n")); 9132 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size)); 9133 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9134 } 9135 9136 /* get back data */ 9137 *coarse_size_n = coarse_size; 9138 *local_primal_indices_n = local_primal_indices; 9139 PetscFunctionReturn(PETSC_SUCCESS); 9140 } 9141 9142 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis) 9143 { 9144 IS localis_t; 9145 PetscInt i, lsize, *idxs, n; 9146 PetscScalar *vals; 9147 9148 PetscFunctionBegin; 9149 /* get indices in local ordering exploiting local to global map */ 9150 PetscCall(ISGetLocalSize(globalis, &lsize)); 9151 PetscCall(PetscMalloc1(lsize, &vals)); 9152 for (i = 0; i < lsize; i++) vals[i] = 1.0; 9153 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs)); 9154 PetscCall(VecSet(gwork, 0.0)); 9155 PetscCall(VecSet(lwork, 0.0)); 9156 if (idxs) { /* multilevel guard */ 9157 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE)); 9158 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES)); 9159 } 9160 PetscCall(VecAssemblyBegin(gwork)); 9161 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs)); 9162 PetscCall(PetscFree(vals)); 9163 PetscCall(VecAssemblyEnd(gwork)); 9164 /* now compute set in local ordering */ 9165 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9166 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD)); 9167 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals)); 9168 PetscCall(VecGetSize(lwork, &n)); 9169 for (i = 0, lsize = 0; i < n; i++) { 9170 if (PetscRealPart(vals[i]) > 0.5) lsize++; 9171 } 9172 PetscCall(PetscMalloc1(lsize, &idxs)); 9173 for (i = 0, lsize = 0; i < n; i++) { 9174 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i; 9175 } 9176 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals)); 9177 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t)); 9178 *localis = localis_t; 9179 PetscFunctionReturn(PETSC_SUCCESS); 9180 } 9181 9182 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr) 9183 { 9184 PC_IS *pcis = (PC_IS *)pc->data; 9185 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9186 PC_IS *pcisf; 9187 PC_BDDC *pcbddcf; 9188 PC pcf; 9189 9190 PetscFunctionBegin; 9191 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf)); 9192 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat)); 9193 PetscCall(PCSetType(pcf, PCBDDC)); 9194 9195 pcisf = (PC_IS *)pcf->data; 9196 pcbddcf = (PC_BDDC *)pcf->data; 9197 9198 pcisf->is_B_local = pcis->is_B_local; 9199 pcisf->vec1_N = pcis->vec1_N; 9200 pcisf->BtoNmap = pcis->BtoNmap; 9201 pcisf->n = pcis->n; 9202 pcisf->n_B = pcis->n_B; 9203 9204 PetscCall(PetscFree(pcbddcf->mat_graph)); 9205 PetscCall(PetscFree(pcbddcf->sub_schurs)); 9206 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph; 9207 pcbddcf->sub_schurs = schurs; 9208 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE; 9209 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0]; 9210 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1]; 9211 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin; 9212 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax; 9213 pcbddcf->use_faces = PETSC_TRUE; 9214 pcbddcf->use_change_of_basis = (PetscBool)!constraints; 9215 pcbddcf->use_change_on_faces = (PetscBool)!constraints; 9216 pcbddcf->use_qr_single = (PetscBool)!constraints; 9217 pcbddcf->fake_change = PETSC_TRUE; 9218 pcbddcf->dbg_flag = pcbddc->dbg_flag; 9219 9220 PetscCall(PCBDDCAdaptiveSelection(pcf)); 9221 PetscCall(PCBDDCConstraintsSetUp(pcf)); 9222 9223 *change = pcbddcf->ConstraintMatrix; 9224 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal)); 9225 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult)); 9226 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single; 9227 9228 if (schurs) pcbddcf->sub_schurs = NULL; 9229 pcbddcf->ConstraintMatrix = NULL; 9230 pcbddcf->mat_graph = NULL; 9231 pcisf->is_B_local = NULL; 9232 pcisf->vec1_N = NULL; 9233 pcisf->BtoNmap = NULL; 9234 PetscCall(PCDestroy(&pcf)); 9235 PetscFunctionReturn(PETSC_SUCCESS); 9236 } 9237 9238 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 9239 { 9240 PC_IS *pcis = (PC_IS *)pc->data; 9241 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9242 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 9243 Mat S_j; 9244 PetscInt *used_xadj, *used_adjncy; 9245 PetscBool free_used_adj; 9246 9247 PetscFunctionBegin; 9248 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9249 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 9250 free_used_adj = PETSC_FALSE; 9251 if (pcbddc->sub_schurs_layers == -1) { 9252 used_xadj = NULL; 9253 used_adjncy = NULL; 9254 } else { 9255 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 9256 used_xadj = pcbddc->mat_graph->xadj; 9257 used_adjncy = pcbddc->mat_graph->adjncy; 9258 } else if (pcbddc->computed_rowadj) { 9259 used_xadj = pcbddc->mat_graph->xadj; 9260 used_adjncy = pcbddc->mat_graph->adjncy; 9261 } else { 9262 PetscBool flg_row = PETSC_FALSE; 9263 const PetscInt *xadj, *adjncy; 9264 PetscInt nvtxs; 9265 9266 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9267 if (flg_row) { 9268 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy)); 9269 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1)); 9270 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs])); 9271 free_used_adj = PETSC_TRUE; 9272 } else { 9273 pcbddc->sub_schurs_layers = -1; 9274 used_xadj = NULL; 9275 used_adjncy = NULL; 9276 } 9277 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row)); 9278 } 9279 } 9280 9281 /* setup sub_schurs data */ 9282 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j)); 9283 if (!sub_schurs->schur_explicit) { 9284 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 9285 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D)); 9286 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL)); 9287 } else { 9288 Mat change = NULL; 9289 Vec scaling = NULL; 9290 IS change_primal = NULL, iP; 9291 PetscInt benign_n; 9292 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 9293 PetscBool need_change = PETSC_FALSE; 9294 PetscBool discrete_harmonic = PETSC_FALSE; 9295 9296 if (!pcbddc->use_vertices && reuse_solvers) { 9297 PetscInt n_vertices; 9298 9299 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices)); 9300 reuse_solvers = (PetscBool)!n_vertices; 9301 } 9302 if (!pcbddc->benign_change_explicit) { 9303 benign_n = pcbddc->benign_n; 9304 } else { 9305 benign_n = 0; 9306 } 9307 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 9308 We need a global reduction to avoid possible deadlocks. 9309 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 9310 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 9311 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 9312 PetscCall(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 9313 need_change = (PetscBool)(!need_change); 9314 } 9315 /* If the user defines additional constraints, we import them here */ 9316 if (need_change) { 9317 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph"); 9318 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr)); 9319 } 9320 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 9321 9322 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP)); 9323 if (iP) { 9324 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 9325 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL)); 9326 PetscOptionsEnd(); 9327 } 9328 if (discrete_harmonic) { 9329 Mat A; 9330 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A)); 9331 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL)); 9332 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP)); 9333 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx, 9334 pcbddc->benign_zerodiag_subs, change, change_primal)); 9335 PetscCall(MatDestroy(&A)); 9336 } else { 9337 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, 9338 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal)); 9339 } 9340 PetscCall(MatDestroy(&change)); 9341 PetscCall(ISDestroy(&change_primal)); 9342 } 9343 PetscCall(MatDestroy(&S_j)); 9344 9345 /* free adjacency */ 9346 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy)); 9347 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0)); 9348 PetscFunctionReturn(PETSC_SUCCESS); 9349 } 9350 9351 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 9352 { 9353 PC_IS *pcis = (PC_IS *)pc->data; 9354 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9355 PCBDDCGraph graph; 9356 9357 PetscFunctionBegin; 9358 /* attach interface graph for determining subsets */ 9359 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 9360 IS verticesIS, verticescomm; 9361 PetscInt vsize, *idxs; 9362 9363 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9364 PetscCall(ISGetSize(verticesIS, &vsize)); 9365 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs)); 9366 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm)); 9367 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs)); 9368 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS)); 9369 PetscCall(PCBDDCGraphCreate(&graph)); 9370 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount)); 9371 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm)); 9372 PetscCall(ISDestroy(&verticescomm)); 9373 PetscCall(PCBDDCGraphComputeConnectedComponents(graph)); 9374 } else { 9375 graph = pcbddc->mat_graph; 9376 } 9377 /* print some info */ 9378 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 9379 IS vertices; 9380 PetscInt nv, nedges, nfaces; 9381 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer)); 9382 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9383 PetscCall(ISGetSize(vertices, &nv)); 9384 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer)); 9385 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n")); 9386 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices)); 9387 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges)); 9388 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces)); 9389 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer)); 9390 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer)); 9391 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices)); 9392 } 9393 9394 /* sub_schurs init */ 9395 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs)); 9396 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE)); 9397 9398 /* free graph struct */ 9399 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph)); 9400 PetscFunctionReturn(PETSC_SUCCESS); 9401 } 9402 9403 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer) 9404 { 9405 Mat_IS *matis = (Mat_IS *)pc->pmat->data; 9406 PetscInt n = pc->pmat->rmap->n, ln, ni, st; 9407 const PetscInt *idxs; 9408 IS gis; 9409 9410 PetscFunctionBegin; 9411 if (!is) PetscFunctionReturn(PETSC_SUCCESS); 9412 PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL)); 9413 PetscCall(MatGetLocalSize(matis->A, NULL, &ln)); 9414 PetscCall(PetscArrayzero(matis->sf_leafdata, ln)); 9415 PetscCall(PetscArrayzero(matis->sf_rootdata, n)); 9416 PetscCall(ISGetLocalSize(is, &ni)); 9417 PetscCall(ISGetIndices(is, &idxs)); 9418 for (PetscInt i = 0; i < ni; i++) { 9419 if (idxs[i] < 0 || idxs[i] >= ln) continue; 9420 matis->sf_leafdata[idxs[i]] = 1; 9421 } 9422 PetscCall(ISRestoreIndices(is, &idxs)); 9423 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9424 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM)); 9425 ln = 0; 9426 for (PetscInt i = 0; i < n; i++) { 9427 if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st; 9428 } 9429 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis)); 9430 PetscCall(ISView(gis, viewer)); 9431 PetscCall(ISDestroy(&gis)); 9432 PetscFunctionReturn(PETSC_SUCCESS); 9433 } 9434 9435 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile) 9436 { 9437 PetscInt header[11]; 9438 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 9439 PetscViewer viewer; 9440 MPI_Comm comm = PetscObjectComm((PetscObject)pc); 9441 9442 PetscFunctionBegin; 9443 PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer)); 9444 if (load) { 9445 IS is; 9446 Mat A; 9447 9448 PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT)); 9449 PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9450 PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9451 PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9452 PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9453 PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9454 PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9455 PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9456 PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9457 PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9458 PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file"); 9459 if (header[0]) { 9460 PetscCall(ISCreate(comm, &is)); 9461 PetscCall(ISLoad(is, viewer)); 9462 PetscCall(PCBDDCSetDirichletBoundaries(pc, is)); 9463 PetscCall(ISDestroy(&is)); 9464 } 9465 if (header[1]) { 9466 PetscCall(ISCreate(comm, &is)); 9467 PetscCall(ISLoad(is, viewer)); 9468 PetscCall(PCBDDCSetNeumannBoundaries(pc, is)); 9469 PetscCall(ISDestroy(&is)); 9470 } 9471 if (header[2]) { 9472 IS *isarray; 9473 9474 PetscCall(PetscMalloc1(header[2], &isarray)); 9475 for (PetscInt i = 0; i < header[2]; i++) { 9476 PetscCall(ISCreate(comm, &isarray[i])); 9477 PetscCall(ISLoad(isarray[i], viewer)); 9478 } 9479 PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray)); 9480 for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i])); 9481 PetscCall(PetscFree(isarray)); 9482 } 9483 if (header[3]) { 9484 PetscCall(ISCreate(comm, &is)); 9485 PetscCall(ISLoad(is, viewer)); 9486 PetscCall(PCBDDCSetPrimalVerticesIS(pc, is)); 9487 PetscCall(ISDestroy(&is)); 9488 } 9489 if (header[4]) { 9490 PetscCall(MatCreate(comm, &A)); 9491 PetscCall(MatSetType(A, MATAIJ)); 9492 PetscCall(MatLoad(A, viewer)); 9493 PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8])); 9494 PetscCall(MatDestroy(&A)); 9495 } 9496 if (header[9]) { 9497 PetscCall(MatCreate(comm, &A)); 9498 PetscCall(MatSetType(A, MATIS)); 9499 PetscCall(MatLoad(A, viewer)); 9500 PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL)); 9501 PetscCall(MatDestroy(&A)); 9502 } 9503 } else { 9504 header[0] = (PetscInt) !!pcbddc->DirichletBoundariesLocal; 9505 header[1] = (PetscInt) !!pcbddc->NeumannBoundariesLocal; 9506 header[2] = pcbddc->n_ISForDofsLocal; 9507 header[3] = (PetscInt) !!pcbddc->user_primal_vertices_local; 9508 header[4] = (PetscInt) !!pcbddc->discretegradient; 9509 header[5] = pcbddc->nedorder; 9510 header[6] = pcbddc->nedfield; 9511 header[7] = (PetscInt)pcbddc->nedglobal; 9512 header[8] = (PetscInt)pcbddc->conforming; 9513 header[9] = (PetscInt) !!pcbddc->divudotp; 9514 header[10] = (PetscInt)pcbddc->divudotp_trans; 9515 if (header[4]) header[3] = 0; 9516 9517 PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT)); 9518 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer)); 9519 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer)); 9520 for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer)); 9521 if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer)); 9522 if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer)); 9523 if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer)); 9524 } 9525 PetscCall(PetscViewerDestroy(&viewer)); 9526 PetscFunctionReturn(PETSC_SUCCESS); 9527 } 9528 9529 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9530 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9531 { 9532 Mat At; 9533 IS rows; 9534 PetscInt rst, ren; 9535 PetscLayout rmap; 9536 9537 PetscFunctionBegin; 9538 rst = ren = 0; 9539 if (ccomm != MPI_COMM_NULL) { 9540 PetscCall(PetscLayoutCreate(ccomm, &rmap)); 9541 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N)); 9542 PetscCall(PetscLayoutSetBlockSize(rmap, 1)); 9543 PetscCall(PetscLayoutSetUp(rmap)); 9544 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren)); 9545 } 9546 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows)); 9547 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At)); 9548 PetscCall(ISDestroy(&rows)); 9549 9550 if (ccomm != MPI_COMM_NULL) { 9551 Mat_MPIAIJ *a, *b; 9552 IS from, to; 9553 Vec gvec; 9554 PetscInt lsize; 9555 9556 PetscCall(MatCreate(ccomm, B)); 9557 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N)); 9558 PetscCall(MatSetType(*B, MATAIJ)); 9559 PetscCall(PetscLayoutDestroy(&((*B)->rmap))); 9560 PetscCall(PetscLayoutSetUp((*B)->cmap)); 9561 a = (Mat_MPIAIJ *)At->data; 9562 b = (Mat_MPIAIJ *)(*B)->data; 9563 PetscCallMPI(MPI_Comm_size(ccomm, &b->size)); 9564 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank)); 9565 PetscCall(PetscObjectReference((PetscObject)a->A)); 9566 PetscCall(PetscObjectReference((PetscObject)a->B)); 9567 b->A = a->A; 9568 b->B = a->B; 9569 9570 b->donotstash = a->donotstash; 9571 b->roworiented = a->roworiented; 9572 b->rowindices = NULL; 9573 b->rowvalues = NULL; 9574 b->getrowactive = PETSC_FALSE; 9575 9576 (*B)->rmap = rmap; 9577 (*B)->factortype = A->factortype; 9578 (*B)->assembled = PETSC_TRUE; 9579 (*B)->insertmode = NOT_SET_VALUES; 9580 (*B)->preallocated = PETSC_TRUE; 9581 9582 if (a->colmap) { 9583 #if defined(PETSC_USE_CTABLE) 9584 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap)); 9585 #else 9586 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap)); 9587 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N)); 9588 #endif 9589 } else b->colmap = NULL; 9590 if (a->garray) { 9591 PetscInt len; 9592 len = a->B->cmap->n; 9593 PetscCall(PetscMalloc1(len + 1, &b->garray)); 9594 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len)); 9595 } else b->garray = NULL; 9596 9597 PetscCall(PetscObjectReference((PetscObject)a->lvec)); 9598 b->lvec = a->lvec; 9599 9600 /* cannot use VecScatterCopy */ 9601 PetscCall(VecGetLocalSize(b->lvec, &lsize)); 9602 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from)); 9603 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to)); 9604 PetscCall(MatCreateVecs(*B, &gvec, NULL)); 9605 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx)); 9606 PetscCall(ISDestroy(&from)); 9607 PetscCall(ISDestroy(&to)); 9608 PetscCall(VecDestroy(&gvec)); 9609 } 9610 PetscCall(MatDestroy(&At)); 9611 PetscFunctionReturn(PETSC_SUCCESS); 9612 } 9613 9614 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */ 9615 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA) 9616 { 9617 PetscBool isaij; 9618 MPI_Comm comm; 9619 9620 PetscFunctionBegin; 9621 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 9622 PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, "")); 9623 PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented"); 9624 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij)); 9625 if (isaij) { /* SeqAIJ supports repeated rows */ 9626 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA)); 9627 } else { 9628 Mat A_loc; 9629 Mat_SeqAIJ *da; 9630 PetscSF sf; 9631 PetscInt ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata; 9632 PetscScalar *daa; 9633 const PetscInt *idxs; 9634 const PetscSFNode *iremotes; 9635 PetscSFNode *remotes; 9636 9637 /* SF for incoming rows */ 9638 PetscCall(PetscSFCreate(comm, &sf)); 9639 PetscCall(ISGetLocalSize(rows, &ni)); 9640 PetscCall(ISGetIndices(rows, &idxs)); 9641 PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs)); 9642 PetscCall(ISRestoreIndices(rows, &idxs)); 9643 9644 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc)); 9645 da = (Mat_SeqAIJ *)A_loc->data; 9646 PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata)); 9647 for (PetscInt i = 0; i < m; i++) { 9648 rdata[2 * i + 0] = da->i[i + 1] - da->i[i]; 9649 rdata[2 * i + 1] = da->i[i]; 9650 } 9651 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9652 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE)); 9653 PetscCall(PetscMalloc1(ni + 1, &di)); 9654 di[0] = 0; 9655 for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0]; 9656 PetscCall(PetscMalloc1(di[ni], &dj)); 9657 PetscCall(PetscMalloc1(di[ni], &daa)); 9658 PetscCall(PetscMalloc1(di[ni], &remotes)); 9659 9660 PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes)); 9661 9662 /* SF graph for nonzeros */ 9663 c = 0; 9664 for (PetscInt i = 0; i < ni; i++) { 9665 const PetscInt rank = iremotes[i].rank; 9666 const PetscInt rsize = ldata[2 * i]; 9667 for (PetscInt j = 0; j < rsize; j++) { 9668 remotes[c].rank = rank; 9669 remotes[c].index = ldata[2 * i + 1] + j; 9670 c++; 9671 } 9672 } 9673 PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]); 9674 PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER)); 9675 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9676 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE)); 9677 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9678 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE)); 9679 9680 PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA)); 9681 PetscCall(MatDestroy(&A_loc)); 9682 PetscCall(PetscSFDestroy(&sf)); 9683 PetscCall(PetscFree(di)); 9684 PetscCall(PetscFree(dj)); 9685 PetscCall(PetscFree(daa)); 9686 PetscCall(PetscFree(remotes)); 9687 PetscCall(PetscFree2(ldata, rdata)); 9688 } 9689 PetscFunctionReturn(PETSC_SUCCESS); 9690 } 9691