1 #include <petsc/private/pcbddcimpl.h> 2 #include <petsc/private/pcbddcprivateimpl.h> 3 #include <../src/mat/impls/dense/seq/dense.h> 4 #include <petscblaslapack.h> 5 6 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *); 7 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *); 8 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC, Vec, Vec); 9 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC, Vec, Vec); 10 11 /* if v2 is not present, correction is done in-place */ 12 PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full) { 13 PetscScalar *array; 14 PetscScalar *array2; 15 16 PetscFunctionBegin; 17 if (!ctx->benign_n) PetscFunctionReturn(0); 18 if (sol && full) { 19 PetscInt n_I, size_schur; 20 21 /* get sizes */ 22 PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL)); 23 PetscCall(VecGetSize(v, &n_I)); 24 n_I = n_I - size_schur; 25 /* get schur sol from array */ 26 PetscCall(VecGetArray(v, &array)); 27 PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I)); 28 PetscCall(VecRestoreArray(v, &array)); 29 /* apply interior sol correction */ 30 PetscCall(MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work)); 31 PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 32 PetscCall(MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v)); 33 } 34 if (v2) { 35 PetscInt nl; 36 37 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 38 PetscCall(VecGetLocalSize(v2, &nl)); 39 PetscCall(VecGetArray(v2, &array2)); 40 PetscCall(PetscArraycpy(array2, array, nl)); 41 } else { 42 PetscCall(VecGetArray(v, &array)); 43 array2 = array; 44 } 45 if (!sol) { /* change rhs */ 46 PetscInt n; 47 for (n = 0; n < ctx->benign_n; n++) { 48 PetscScalar sum = 0.; 49 const PetscInt *cols; 50 PetscInt nz, i; 51 52 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz)); 53 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols)); 54 for (i = 0; i < nz - 1; i++) sum += array[cols[i]]; 55 #if defined(PETSC_USE_COMPLEX) 56 sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz)); 57 #else 58 sum = -sum / nz; 59 #endif 60 for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum; 61 ctx->benign_save_vals[n] = array2[cols[nz - 1]]; 62 array2[cols[nz - 1]] = sum; 63 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols)); 64 } 65 } else { 66 PetscInt n; 67 for (n = 0; n < ctx->benign_n; n++) { 68 PetscScalar sum = 0.; 69 const PetscInt *cols; 70 PetscInt nz, i; 71 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz)); 72 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols)); 73 for (i = 0; i < nz - 1; i++) sum += array[cols[i]]; 74 #if defined(PETSC_USE_COMPLEX) 75 sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz)); 76 #else 77 sum = -sum / nz; 78 #endif 79 for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum; 80 array2[cols[nz - 1]] = ctx->benign_save_vals[n]; 81 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols)); 82 } 83 } 84 if (v2) { 85 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 86 PetscCall(VecRestoreArray(v2, &array2)); 87 } else { 88 PetscCall(VecRestoreArray(v, &array)); 89 } 90 if (!sol && full) { 91 Vec usedv; 92 PetscInt n_I, size_schur; 93 94 /* get sizes */ 95 PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL)); 96 PetscCall(VecGetSize(v, &n_I)); 97 n_I = n_I - size_schur; 98 /* compute schur rhs correction */ 99 if (v2) { 100 usedv = v2; 101 } else { 102 usedv = v; 103 } 104 /* apply schur rhs correction */ 105 PetscCall(MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work)); 106 PetscCall(VecGetArrayRead(usedv, (const PetscScalar **)&array)); 107 PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I)); 108 PetscCall(VecRestoreArrayRead(usedv, (const PetscScalar **)&array)); 109 PetscCall(MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec)); 110 PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 111 } 112 PetscFunctionReturn(0); 113 } 114 115 static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full) { 116 PCBDDCReuseSolvers ctx; 117 PetscBool copy = PETSC_FALSE; 118 119 PetscFunctionBegin; 120 PetscCall(PCShellGetContext(pc, &ctx)); 121 if (full) { 122 #if defined(PETSC_HAVE_MUMPS) 123 PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1)); 124 #endif 125 #if defined(PETSC_HAVE_MKL_PARDISO) 126 PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0)); 127 #endif 128 copy = ctx->has_vertices; 129 } else { /* interior solver */ 130 #if defined(PETSC_HAVE_MUMPS) 131 PetscCall(MatMumpsSetIcntl(ctx->F, 26, 0)); 132 #endif 133 #if defined(PETSC_HAVE_MKL_PARDISO) 134 PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 1)); 135 #endif 136 copy = PETSC_TRUE; 137 } 138 /* copy rhs into factored matrix workspace */ 139 if (copy) { 140 PetscInt n; 141 PetscScalar *array, *array_solver; 142 143 PetscCall(VecGetLocalSize(rhs, &n)); 144 PetscCall(VecGetArrayRead(rhs, (const PetscScalar **)&array)); 145 PetscCall(VecGetArray(ctx->rhs, &array_solver)); 146 PetscCall(PetscArraycpy(array_solver, array, n)); 147 PetscCall(VecRestoreArray(ctx->rhs, &array_solver)); 148 PetscCall(VecRestoreArrayRead(rhs, (const PetscScalar **)&array)); 149 150 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full)); 151 if (transpose) { 152 PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol)); 153 } else { 154 PetscCall(MatSolve(ctx->F, ctx->rhs, ctx->sol)); 155 } 156 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full)); 157 158 /* get back data to caller worskpace */ 159 PetscCall(VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver)); 160 PetscCall(VecGetArray(sol, &array)); 161 PetscCall(PetscArraycpy(array, array_solver, n)); 162 PetscCall(VecRestoreArray(sol, &array)); 163 PetscCall(VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver)); 164 } else { 165 if (ctx->benign_n) { 166 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full)); 167 if (transpose) { 168 PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, sol)); 169 } else { 170 PetscCall(MatSolve(ctx->F, ctx->rhs, sol)); 171 } 172 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full)); 173 } else { 174 if (transpose) { 175 PetscCall(MatSolveTranspose(ctx->F, rhs, sol)); 176 } else { 177 PetscCall(MatSolve(ctx->F, rhs, sol)); 178 } 179 } 180 } 181 /* restore defaults */ 182 #if defined(PETSC_HAVE_MUMPS) 183 PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1)); 184 #endif 185 #if defined(PETSC_HAVE_MKL_PARDISO) 186 PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0)); 187 #endif 188 PetscFunctionReturn(0); 189 } 190 191 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol) { 192 PetscFunctionBegin; 193 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE)); 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol) { 198 PetscFunctionBegin; 199 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE)); 200 PetscFunctionReturn(0); 201 } 202 203 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol) { 204 PetscFunctionBegin; 205 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE)); 206 PetscFunctionReturn(0); 207 } 208 209 static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol) { 210 PetscFunctionBegin; 211 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE)); 212 PetscFunctionReturn(0); 213 } 214 215 static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer) { 216 PCBDDCReuseSolvers ctx; 217 PetscBool iascii; 218 219 PetscFunctionBegin; 220 PetscCall(PCShellGetContext(pc, &ctx)); 221 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 222 if (iascii) PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 223 PetscCall(MatView(ctx->F, viewer)); 224 if (iascii) PetscCall(PetscViewerPopFormat(viewer)); 225 PetscFunctionReturn(0); 226 } 227 228 static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse) { 229 PetscInt i; 230 231 PetscFunctionBegin; 232 PetscCall(MatDestroy(&reuse->F)); 233 PetscCall(VecDestroy(&reuse->sol)); 234 PetscCall(VecDestroy(&reuse->rhs)); 235 PetscCall(PCDestroy(&reuse->interior_solver)); 236 PetscCall(PCDestroy(&reuse->correction_solver)); 237 PetscCall(ISDestroy(&reuse->is_R)); 238 PetscCall(ISDestroy(&reuse->is_B)); 239 PetscCall(VecScatterDestroy(&reuse->correction_scatter_B)); 240 PetscCall(VecDestroy(&reuse->sol_B)); 241 PetscCall(VecDestroy(&reuse->rhs_B)); 242 for (i = 0; i < reuse->benign_n; i++) { PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i])); } 243 PetscCall(PetscFree(reuse->benign_zerodiag_subs)); 244 PetscCall(PetscFree(reuse->benign_save_vals)); 245 PetscCall(MatDestroy(&reuse->benign_csAIB)); 246 PetscCall(MatDestroy(&reuse->benign_AIIm1ones)); 247 PetscCall(VecDestroy(&reuse->benign_corr_work)); 248 PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec)); 249 PetscFunctionReturn(0); 250 } 251 252 static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc) { 253 PCBDDCReuseSolvers ctx; 254 255 PetscFunctionBegin; 256 PetscCall(PCShellGetContext(pc, &ctx)); 257 PetscCall(PCBDDCReuseSolversReset(ctx)); 258 PetscCall(PetscFree(ctx)); 259 PetscCall(PCShellSetContext(pc, NULL)); 260 PetscFunctionReturn(0); 261 } 262 263 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S) { 264 Mat B, C, D, Bd, Cd, AinvBd; 265 KSP ksp; 266 PC pc; 267 PetscBool isLU, isILU, isCHOL, Bdense, Cdense; 268 PetscReal fill = 2.0; 269 PetscInt n_I; 270 PetscMPIInt size; 271 272 PetscFunctionBegin; 273 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M), &size)); 274 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for parallel matrices"); 275 if (reuse == MAT_REUSE_MATRIX) { 276 PetscBool Sdense; 277 278 PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense)); 279 PetscCheck(Sdense, PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "S should dense"); 280 } 281 PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D)); 282 PetscCall(MatSchurComplementGetKSP(M, &ksp)); 283 PetscCall(KSPGetPC(ksp, &pc)); 284 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU)); 285 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU)); 286 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL)); 287 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense)); 288 PetscCall(PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense)); 289 PetscCall(MatGetSize(B, &n_I, NULL)); 290 if (n_I) { 291 if (!Bdense) { 292 PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd)); 293 } else { 294 Bd = B; 295 } 296 297 if (isLU || isILU || isCHOL) { 298 Mat fact; 299 PetscCall(KSPSetUp(ksp)); 300 PetscCall(PCFactorGetMatrix(pc, &fact)); 301 PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 302 PetscCall(MatMatSolve(fact, Bd, AinvBd)); 303 } else { 304 PetscBool ex = PETSC_TRUE; 305 306 if (ex) { 307 Mat Ainvd; 308 309 PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd)); 310 PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd)); 311 PetscCall(MatDestroy(&Ainvd)); 312 } else { 313 Vec sol, rhs; 314 PetscScalar *arrayrhs, *arraysol; 315 PetscInt i, nrhs, n; 316 317 PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 318 PetscCall(MatGetSize(Bd, &n, &nrhs)); 319 PetscCall(MatDenseGetArray(Bd, &arrayrhs)); 320 PetscCall(MatDenseGetArray(AinvBd, &arraysol)); 321 PetscCall(KSPGetSolution(ksp, &sol)); 322 PetscCall(KSPGetRhs(ksp, &rhs)); 323 for (i = 0; i < nrhs; i++) { 324 PetscCall(VecPlaceArray(rhs, arrayrhs + i * n)); 325 PetscCall(VecPlaceArray(sol, arraysol + i * n)); 326 PetscCall(KSPSolve(ksp, rhs, sol)); 327 PetscCall(VecResetArray(rhs)); 328 PetscCall(VecResetArray(sol)); 329 } 330 PetscCall(MatDenseRestoreArray(Bd, &arrayrhs)); 331 PetscCall(MatDenseRestoreArray(AinvBd, &arrayrhs)); 332 } 333 } 334 if (!Bdense & !issym) { PetscCall(MatDestroy(&Bd)); } 335 336 if (!issym) { 337 if (!Cdense) { 338 PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd)); 339 } else { 340 Cd = C; 341 } 342 PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S)); 343 if (!Cdense) { PetscCall(MatDestroy(&Cd)); } 344 } else { 345 PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S)); 346 if (!Bdense) { PetscCall(MatDestroy(&Bd)); } 347 } 348 PetscCall(MatDestroy(&AinvBd)); 349 } 350 351 if (D) { 352 Mat Dd; 353 PetscBool Ddense; 354 355 PetscCall(PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense)); 356 if (!Ddense) { 357 PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd)); 358 } else { 359 Dd = D; 360 } 361 if (n_I) { 362 PetscCall(MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN)); 363 } else { 364 if (reuse == MAT_INITIAL_MATRIX) { 365 PetscCall(MatDuplicate(Dd, MAT_COPY_VALUES, S)); 366 } else { 367 PetscCall(MatCopy(Dd, *S, SAME_NONZERO_PATTERN)); 368 } 369 } 370 if (!Ddense) { PetscCall(MatDestroy(&Dd)); } 371 } else { 372 PetscCall(MatScale(*S, -1.0)); 373 } 374 PetscFunctionReturn(0); 375 } 376 377 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal) { 378 Mat F, A_II, A_IB, A_BI, A_BB, AE_II; 379 Mat S_all; 380 Vec gstash, lstash; 381 VecScatter sstash; 382 IS is_I, is_I_layer; 383 IS all_subsets, all_subsets_mult, all_subsets_n; 384 PetscScalar *stasharray, *Bwork; 385 PetscInt *nnz, *all_local_idx_N; 386 PetscInt *auxnum1, *auxnum2; 387 PetscInt i, subset_size, max_subset_size; 388 PetscInt n_B, extra, local_size, global_size; 389 PetscInt local_stash_size; 390 PetscBLASInt B_N, B_ierr, B_lwork, *pivots; 391 MPI_Comm comm_n; 392 PetscBool deluxe = PETSC_TRUE; 393 PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE; 394 PetscViewer matl_dbg_viewer = NULL; 395 PetscBool flg; 396 397 PetscFunctionBegin; 398 PetscCall(MatDestroy(&sub_schurs->A)); 399 PetscCall(MatDestroy(&sub_schurs->S)); 400 if (Ain) { 401 PetscCall(PetscObjectReference((PetscObject)Ain)); 402 sub_schurs->A = Ain; 403 } 404 405 PetscCall(PetscObjectReference((PetscObject)Sin)); 406 sub_schurs->S = Sin; 407 if (sub_schurs->schur_explicit) { sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A); } 408 409 /* preliminary checks */ 410 PetscCheck(sub_schurs->schur_explicit || !compute_Stilda, PetscObjectComm((PetscObject)sub_schurs->l2gmap), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO"); 411 412 if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE; 413 414 /* debug (MATLAB) */ 415 if (sub_schurs->debug) { 416 PetscMPIInt size, rank; 417 PetscInt nr, *print_schurs_ranks, print_schurs = PETSC_FALSE; 418 419 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size)); 420 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank)); 421 nr = size; 422 PetscCall(PetscMalloc1(nr, &print_schurs_ranks)); 423 PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 424 PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg)); 425 if (!flg) print_schurs = PETSC_TRUE; 426 else { 427 print_schurs = PETSC_FALSE; 428 for (i = 0; i < nr; i++) 429 if (print_schurs_ranks[i] == (PetscInt)rank) { 430 print_schurs = PETSC_TRUE; 431 break; 432 } 433 } 434 PetscOptionsEnd(); 435 PetscCall(PetscFree(print_schurs_ranks)); 436 if (print_schurs) { 437 char filename[256]; 438 439 PetscCall(PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank)); 440 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer)); 441 PetscCall(PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB)); 442 } 443 } 444 445 /* restrict work on active processes */ 446 if (sub_schurs->restrict_comm) { 447 PetscSubcomm subcomm; 448 PetscMPIInt color, rank; 449 450 color = 0; 451 if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */ 452 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank)); 453 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm)); 454 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 455 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 456 PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL)); 457 PetscCall(PetscSubcommDestroy(&subcomm)); 458 if (!sub_schurs->n_subs) { 459 PetscCall(PetscCommDestroy(&comm_n)); 460 PetscFunctionReturn(0); 461 } 462 } else { 463 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL)); 464 } 465 466 /* get Schur complement matrices */ 467 if (!sub_schurs->schur_explicit) { 468 Mat tA_IB, tA_BI, tA_BB; 469 PetscBool isseqsbaij; 470 PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB)); 471 PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij)); 472 if (isseqsbaij) { 473 PetscCall(MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB)); 474 PetscCall(MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB)); 475 PetscCall(MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI)); 476 } else { 477 PetscCall(PetscObjectReference((PetscObject)tA_BB)); 478 A_BB = tA_BB; 479 PetscCall(PetscObjectReference((PetscObject)tA_IB)); 480 A_IB = tA_IB; 481 PetscCall(PetscObjectReference((PetscObject)tA_BI)); 482 A_BI = tA_BI; 483 } 484 } else { 485 A_II = NULL; 486 A_IB = NULL; 487 A_BI = NULL; 488 A_BB = NULL; 489 } 490 S_all = NULL; 491 492 /* determine interior problems */ 493 PetscCall(ISGetLocalSize(sub_schurs->is_I, &i)); 494 if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */ 495 PetscBT touched; 496 const PetscInt *idx_B; 497 PetscInt n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering; 498 499 PetscCheck(xadj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot request layering without adjacency"); 500 /* get sizes */ 501 PetscCall(ISGetLocalSize(sub_schurs->is_I, &n_I)); 502 PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B)); 503 504 PetscCall(PetscMalloc1(n_I + n_B, &local_numbering)); 505 PetscCall(PetscBTCreate(n_I + n_B, &touched)); 506 PetscCall(PetscBTMemzero(n_I + n_B, touched)); 507 508 /* all boundary dofs must be skipped when adding layers */ 509 PetscCall(ISGetIndices(sub_schurs->is_B, &idx_B)); 510 for (j = 0; j < n_B; j++) { PetscCall(PetscBTSet(touched, idx_B[j])); } 511 PetscCall(PetscArraycpy(local_numbering, idx_B, n_B)); 512 PetscCall(ISRestoreIndices(sub_schurs->is_B, &idx_B)); 513 514 /* add prescribed number of layers of dofs */ 515 n_local_dofs = n_B; 516 n_prev_added = n_B; 517 for (layer = 0; layer < nlayers; layer++) { 518 PetscInt n_added = 0; 519 if (n_local_dofs == n_I + n_B) break; 520 PetscCheck(n_local_dofs <= n_I + n_B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")", layer, n_local_dofs, n_I + n_B); 521 PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added)); 522 n_prev_added = n_added; 523 n_local_dofs += n_added; 524 if (!n_added) break; 525 } 526 PetscCall(PetscBTDestroy(&touched)); 527 528 /* IS for I layer dofs in original numbering */ 529 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer)); 530 PetscCall(PetscFree(local_numbering)); 531 PetscCall(ISSort(is_I_layer)); 532 /* IS for I layer dofs in I numbering */ 533 if (!sub_schurs->schur_explicit) { 534 ISLocalToGlobalMapping ItoNmap; 535 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap)); 536 PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I)); 537 PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap)); 538 539 /* II block */ 540 PetscCall(MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II)); 541 } 542 } else { 543 PetscInt n_I; 544 545 /* IS for I dofs in original numbering */ 546 PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I)); 547 is_I_layer = sub_schurs->is_I; 548 549 /* IS for I dofs in I numbering (strided 1) */ 550 if (!sub_schurs->schur_explicit) { 551 PetscCall(ISGetSize(sub_schurs->is_I, &n_I)); 552 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I)); 553 554 /* II block is the same */ 555 PetscCall(PetscObjectReference((PetscObject)A_II)); 556 AE_II = A_II; 557 } 558 } 559 560 /* Get info on subset sizes and sum of all subsets sizes */ 561 max_subset_size = 0; 562 local_size = 0; 563 for (i = 0; i < sub_schurs->n_subs; i++) { 564 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 565 max_subset_size = PetscMax(subset_size, max_subset_size); 566 local_size += subset_size; 567 } 568 569 /* Work arrays for local indices */ 570 extra = 0; 571 PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B)); 572 if (sub_schurs->schur_explicit && is_I_layer) { PetscCall(ISGetLocalSize(is_I_layer, &extra)); } 573 PetscCall(PetscMalloc1(n_B + extra, &all_local_idx_N)); 574 if (extra) { 575 const PetscInt *idxs; 576 PetscCall(ISGetIndices(is_I_layer, &idxs)); 577 PetscCall(PetscArraycpy(all_local_idx_N, idxs, extra)); 578 PetscCall(ISRestoreIndices(is_I_layer, &idxs)); 579 } 580 PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum1)); 581 PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum2)); 582 583 /* Get local indices in local numbering */ 584 local_size = 0; 585 local_stash_size = 0; 586 for (i = 0; i < sub_schurs->n_subs; i++) { 587 const PetscInt *idxs; 588 589 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 590 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 591 /* start (smallest in global ordering) and multiplicity */ 592 auxnum1[i] = idxs[0]; 593 auxnum2[i] = subset_size * subset_size; 594 /* subset indices in local numbering */ 595 PetscCall(PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size)); 596 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 597 local_size += subset_size; 598 local_stash_size += subset_size * subset_size; 599 } 600 601 /* allocate extra workspace needed only for GETRI or SYTRF */ 602 use_potr = use_sytr = PETSC_FALSE; 603 if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) { 604 use_potr = PETSC_TRUE; 605 } else if (sub_schurs->is_symmetric) { 606 use_sytr = PETSC_TRUE; 607 } 608 if (local_size && !use_potr) { 609 PetscScalar lwork, dummyscalar = 0.; 610 PetscBLASInt dummyint = 0; 611 612 B_lwork = -1; 613 PetscCall(PetscBLASIntCast(local_size, &B_N)); 614 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 615 if (use_sytr) { 616 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr)); 617 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYTRF Lapack routine %d", (int)B_ierr); 618 } else { 619 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr)); 620 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GETRI Lapack routine %d", (int)B_ierr); 621 } 622 PetscCall(PetscFPTrapPop()); 623 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 624 PetscCall(PetscMalloc2(B_lwork, &Bwork, B_N, &pivots)); 625 } else { 626 Bwork = NULL; 627 pivots = NULL; 628 } 629 630 /* prepare data for summing up properly schurs on subsets */ 631 PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n)); 632 PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets)); 633 PetscCall(ISDestroy(&all_subsets_n)); 634 PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult)); 635 PetscCall(ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n)); 636 PetscCall(ISDestroy(&all_subsets)); 637 PetscCall(ISDestroy(&all_subsets_mult)); 638 PetscCall(ISGetLocalSize(all_subsets_n, &i)); 639 PetscCheck(i == local_stash_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT, i, local_stash_size); 640 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash)); 641 PetscCall(VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash)); 642 PetscCall(VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash)); 643 PetscCall(ISDestroy(&all_subsets_n)); 644 645 /* subset indices in local boundary numbering */ 646 if (!sub_schurs->is_Ej_all) { 647 PetscInt *all_local_idx_B; 648 649 PetscCall(PetscMalloc1(local_size, &all_local_idx_B)); 650 PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B)); 651 PetscCheck(subset_size == local_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT, subset_size, local_size); 652 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all)); 653 } 654 655 if (change) { 656 ISLocalToGlobalMapping BtoS; 657 IS change_primal_B; 658 IS change_primal_all; 659 660 PetscCheck(!sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 661 PetscCheck(!sub_schurs->change, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 662 PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub)); 663 for (i = 0; i < sub_schurs->n_subs; i++) { 664 ISLocalToGlobalMapping NtoS; 665 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS)); 666 PetscCall(ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i])); 667 PetscCall(ISLocalToGlobalMappingDestroy(&NtoS)); 668 } 669 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B)); 670 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS)); 671 PetscCall(ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all)); 672 PetscCall(ISLocalToGlobalMappingDestroy(&BtoS)); 673 PetscCall(ISDestroy(&change_primal_B)); 674 PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change)); 675 for (i = 0; i < sub_schurs->n_subs; i++) { 676 Mat change_sub; 677 678 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 679 PetscCall(KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i])); 680 PetscCall(KSPSetType(sub_schurs->change[i], KSPPREONLY)); 681 if (!sub_schurs->change_with_qr) { 682 PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub)); 683 } else { 684 Mat change_subt; 685 PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt)); 686 PetscCall(MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub)); 687 PetscCall(MatDestroy(&change_subt)); 688 } 689 PetscCall(KSPSetOperators(sub_schurs->change[i], change_sub, change_sub)); 690 PetscCall(MatDestroy(&change_sub)); 691 PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix)); 692 PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_")); 693 } 694 PetscCall(ISDestroy(&change_primal_all)); 695 } 696 697 /* Local matrix of all local Schur on subsets (transposed) */ 698 if (!sub_schurs->S_Ej_all) { 699 Mat T; 700 PetscScalar *v; 701 PetscInt *ii, *jj; 702 PetscInt cum, i, j, k; 703 704 /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks) 705 Allocate properly a representative matrix and duplicate */ 706 PetscCall(PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v)); 707 ii[0] = 0; 708 cum = 0; 709 for (i = 0; i < sub_schurs->n_subs; i++) { 710 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 711 for (j = 0; j < subset_size; j++) { 712 const PetscInt row = cum + j; 713 PetscInt col = cum; 714 715 ii[row + 1] = ii[row] + subset_size; 716 for (k = ii[row]; k < ii[row + 1]; k++) { 717 jj[k] = col; 718 col++; 719 } 720 } 721 cum += subset_size; 722 } 723 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T)); 724 PetscCall(MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all)); 725 PetscCall(MatDestroy(&T)); 726 PetscCall(PetscFree3(ii, jj, v)); 727 } 728 /* matrices for deluxe scaling and adaptive selection */ 729 if (compute_Stilda) { 730 if (!sub_schurs->sum_S_Ej_tilda_all) { PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all)); } 731 if (!sub_schurs->sum_S_Ej_inv_all && deluxe) { PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all)); } 732 } 733 734 /* Compute Schur complements explicitly */ 735 F = NULL; 736 if (!sub_schurs->schur_explicit) { 737 /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested; 738 it is not efficient, unless the economic version of the scaling is used */ 739 Mat S_Ej_expl; 740 PetscScalar *work; 741 PetscInt j, *dummy_idx; 742 PetscBool Sdense; 743 744 PetscCall(PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work)); 745 local_size = 0; 746 for (i = 0; i < sub_schurs->n_subs; i++) { 747 IS is_subset_B; 748 Mat AE_EE, AE_IE, AE_EI, S_Ej; 749 750 /* subsets in original and boundary numbering */ 751 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B)); 752 /* EE block */ 753 PetscCall(MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE)); 754 /* IE block */ 755 PetscCall(MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE)); 756 /* EI block */ 757 if (sub_schurs->is_symmetric) { 758 PetscCall(MatCreateTranspose(AE_IE, &AE_EI)); 759 } else if (sub_schurs->is_hermitian) { 760 PetscCall(MatCreateHermitianTranspose(AE_IE, &AE_EI)); 761 } else { 762 PetscCall(MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI)); 763 } 764 PetscCall(ISDestroy(&is_subset_B)); 765 PetscCall(MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej)); 766 PetscCall(MatDestroy(&AE_EE)); 767 PetscCall(MatDestroy(&AE_IE)); 768 PetscCall(MatDestroy(&AE_EI)); 769 if (AE_II == A_II) { /* we can reuse the same ksp */ 770 KSP ksp; 771 PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &ksp)); 772 PetscCall(MatSchurComplementSetKSP(S_Ej, ksp)); 773 } else { /* build new ksp object which inherits ksp and pc types from the original one */ 774 KSP origksp, schurksp; 775 PC origpc, schurpc; 776 KSPType ksp_type; 777 PetscInt n_internal; 778 PetscBool ispcnone; 779 780 PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &origksp)); 781 PetscCall(MatSchurComplementGetKSP(S_Ej, &schurksp)); 782 PetscCall(KSPGetType(origksp, &ksp_type)); 783 PetscCall(KSPSetType(schurksp, ksp_type)); 784 PetscCall(KSPGetPC(schurksp, &schurpc)); 785 PetscCall(KSPGetPC(origksp, &origpc)); 786 PetscCall(PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone)); 787 if (!ispcnone) { 788 PCType pc_type; 789 PetscCall(PCGetType(origpc, &pc_type)); 790 PetscCall(PCSetType(schurpc, pc_type)); 791 } else { 792 PetscCall(PCSetType(schurpc, PCLU)); 793 } 794 PetscCall(ISGetSize(is_I, &n_internal)); 795 if (!n_internal) { /* UMFPACK gives error with 0 sized problems */ 796 MatSolverType solver = NULL; 797 PetscCall(PCFactorGetMatSolverType(origpc, (MatSolverType *)&solver)); 798 if (solver) PetscCall(PCFactorSetMatSolverType(schurpc, solver)); 799 } 800 PetscCall(KSPSetUp(schurksp)); 801 } 802 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 803 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl)); 804 PetscCall(PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl)); 805 PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense)); 806 if (Sdense) { 807 for (j = 0; j < subset_size; j++) { dummy_idx[j] = local_size + j; } 808 PetscCall(MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES)); 809 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices"); 810 PetscCall(MatDestroy(&S_Ej)); 811 PetscCall(MatDestroy(&S_Ej_expl)); 812 local_size += subset_size; 813 } 814 PetscCall(PetscFree2(dummy_idx, work)); 815 /* free */ 816 PetscCall(ISDestroy(&is_I)); 817 PetscCall(MatDestroy(&AE_II)); 818 PetscCall(PetscFree(all_local_idx_N)); 819 } else { 820 Mat A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL; 821 Mat *gdswA; 822 Vec Dall = NULL; 823 IS is_A_all, *is_p_r = NULL; 824 MatType Stype; 825 PetscScalar *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL; 826 PetscScalar *SEj_arr = NULL, *SEjinv_arr = NULL; 827 const PetscScalar *rS_data; 828 PetscInt n, n_I, size_schur, size_active_schur, cum, cum2; 829 PetscBool economic, solver_S, S_lower_triangular = PETSC_FALSE; 830 PetscBool schur_has_vertices, factor_workaround; 831 PetscBool use_cholesky; 832 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 833 PetscBool oldpin; 834 #endif 835 836 /* get sizes */ 837 n_I = 0; 838 if (is_I_layer) { PetscCall(ISGetLocalSize(is_I_layer, &n_I)); } 839 economic = PETSC_FALSE; 840 PetscCall(ISGetLocalSize(sub_schurs->is_I, &cum)); 841 if (cum != n_I) economic = PETSC_TRUE; 842 PetscCall(MatGetLocalSize(sub_schurs->A, &n, NULL)); 843 size_active_schur = local_size; 844 845 /* import scaling vector (wrong formulation if we have 3D edges) */ 846 if (scaling && compute_Stilda) { 847 const PetscScalar *array; 848 PetscScalar *array2; 849 const PetscInt *idxs; 850 PetscInt i; 851 852 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs)); 853 PetscCall(VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall)); 854 PetscCall(VecGetArrayRead(scaling, &array)); 855 PetscCall(VecGetArray(Dall, &array2)); 856 for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]]; 857 PetscCall(VecRestoreArray(Dall, &array2)); 858 PetscCall(VecRestoreArrayRead(scaling, &array)); 859 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs)); 860 deluxe = PETSC_FALSE; 861 } 862 863 /* size active schurs does not count any dirichlet or vertex dof on the interface */ 864 factor_workaround = PETSC_FALSE; 865 schur_has_vertices = PETSC_FALSE; 866 cum = n_I + size_active_schur; 867 if (sub_schurs->is_dir) { 868 const PetscInt *idxs; 869 PetscInt n_dir; 870 871 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir)); 872 PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs)); 873 PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_dir)); 874 PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs)); 875 cum += n_dir; 876 if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 877 } 878 /* include the primal vertices in the Schur complement */ 879 if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) { 880 PetscInt n_v; 881 882 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v)); 883 if (n_v) { 884 const PetscInt *idxs; 885 886 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 887 PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_v)); 888 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 889 cum += n_v; 890 if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 891 schur_has_vertices = PETSC_TRUE; 892 } 893 } 894 size_schur = cum - n_I; 895 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all)); 896 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 897 oldpin = sub_schurs->A->boundtocpu; 898 PetscCall(MatBindToCPU(sub_schurs->A, PETSC_TRUE)); 899 #endif 900 if (cum == n) { 901 PetscCall(ISSetPermutation(is_A_all)); 902 PetscCall(MatPermute(sub_schurs->A, is_A_all, is_A_all, &A)); 903 } else { 904 PetscCall(MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A)); 905 } 906 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 907 PetscCall(MatBindToCPU(sub_schurs->A, oldpin)); 908 #endif 909 PetscCall(MatSetOptionsPrefixFactor(A, sub_schurs->prefix)); 910 PetscCall(MatAppendOptionsPrefixFactor(A, "sub_schurs_")); 911 912 /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory 913 this is a workaround */ 914 if (benign_n) { 915 Vec v, benign_AIIm1_ones; 916 ISLocalToGlobalMapping N_to_reor; 917 IS is_p0, is_p0_p; 918 PetscScalar *cs_AIB, *AIIm1_data; 919 PetscInt sizeA; 920 921 PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor)); 922 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 923 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p)); 924 PetscCall(ISDestroy(&is_p0)); 925 PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones)); 926 PetscCall(VecGetSize(v, &sizeA)); 927 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat)); 928 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat)); 929 PetscCall(MatDenseGetArray(cs_AIB_mat, &cs_AIB)); 930 PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data)); 931 PetscCall(PetscMalloc1(benign_n, &is_p_r)); 932 /* compute colsum of A_IB restricted to pressures */ 933 for (i = 0; i < benign_n; i++) { 934 const PetscScalar *array; 935 const PetscInt *idxs; 936 PetscInt j, nz; 937 938 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i])); 939 PetscCall(ISGetLocalSize(is_p_r[i], &nz)); 940 PetscCall(ISGetIndices(is_p_r[i], &idxs)); 941 for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.; 942 PetscCall(ISRestoreIndices(is_p_r[i], &idxs)); 943 PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i)); 944 PetscCall(MatMult(A, benign_AIIm1_ones, v)); 945 PetscCall(VecResetArray(benign_AIIm1_ones)); 946 PetscCall(VecGetArrayRead(v, &array)); 947 for (j = 0; j < size_schur; j++) { 948 #if defined(PETSC_USE_COMPLEX) 949 cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz)); 950 #else 951 cs_AIB[i * size_schur + j] = array[j + n_I] / nz; 952 #endif 953 } 954 PetscCall(VecRestoreArrayRead(v, &array)); 955 } 956 PetscCall(MatDenseRestoreArray(cs_AIB_mat, &cs_AIB)); 957 PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data)); 958 PetscCall(VecDestroy(&v)); 959 PetscCall(VecDestroy(&benign_AIIm1_ones)); 960 PetscCall(MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 961 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 962 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 963 PetscCall(MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL)); 964 PetscCall(ISDestroy(&is_p0_p)); 965 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor)); 966 } 967 PetscCall(MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric)); 968 PetscCall(MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian)); 969 PetscCall(MatSetOption(A, MAT_SPD, sub_schurs->is_posdef)); 970 971 /* for complexes, symmetric and hermitian at the same time implies null imaginary part */ 972 use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric); 973 974 /* when using the benign subspace trick, the local Schur complements are SPD */ 975 /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization 976 Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */ 977 if (benign_trick) { 978 sub_schurs->is_posdef = PETSC_TRUE; 979 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg)); 980 if (flg) use_cholesky = PETSC_FALSE; 981 } 982 983 if (n_I) { 984 IS is_schur; 985 char stype[64]; 986 PetscBool gpu = PETSC_FALSE; 987 988 if (use_cholesky) { 989 PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_CHOLESKY, &F)); 990 } else { 991 PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_LU, &F)); 992 } 993 PetscCall(MatSetErrorIfFailure(A, PETSC_TRUE)); 994 #if defined(PETSC_HAVE_MKL_PARDISO) 995 if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10)); 996 #endif 997 /* subsets ordered last */ 998 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur)); 999 PetscCall(MatFactorSetSchurIS(F, is_schur)); 1000 PetscCall(ISDestroy(&is_schur)); 1001 1002 /* factorization step */ 1003 if (use_cholesky) { 1004 PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL)); 1005 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 1006 PetscCall(MatMumpsSetIcntl(F, 19, 2)); 1007 #endif 1008 PetscCall(MatCholeskyFactorNumeric(F, A, NULL)); 1009 S_lower_triangular = PETSC_TRUE; 1010 } else { 1011 PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL)); 1012 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 1013 PetscCall(MatMumpsSetIcntl(F, 19, 3)); 1014 #endif 1015 PetscCall(MatLUFactorNumeric(F, A, NULL)); 1016 } 1017 PetscCall(MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view")); 1018 1019 if (matl_dbg_viewer) { 1020 Mat S; 1021 IS is; 1022 1023 PetscCall(PetscObjectSetName((PetscObject)A, "A")); 1024 PetscCall(MatView(A, matl_dbg_viewer)); 1025 PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); 1026 PetscCall(PetscObjectSetName((PetscObject)S, "S")); 1027 PetscCall(MatView(S, matl_dbg_viewer)); 1028 PetscCall(MatDestroy(&S)); 1029 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is)); 1030 PetscCall(PetscObjectSetName((PetscObject)is, "I")); 1031 PetscCall(ISView(is, matl_dbg_viewer)); 1032 PetscCall(ISDestroy(&is)); 1033 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is)); 1034 PetscCall(PetscObjectSetName((PetscObject)is, "B")); 1035 PetscCall(ISView(is, matl_dbg_viewer)); 1036 PetscCall(ISDestroy(&is)); 1037 PetscCall(PetscObjectSetName((PetscObject)is_A_all, "IA")); 1038 PetscCall(ISView(is_A_all, matl_dbg_viewer)); 1039 for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { 1040 IS is; 1041 char name[16]; 1042 1043 PetscCall(PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i)); 1044 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1045 PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is)); 1046 PetscCall(PetscObjectSetName((PetscObject)is, name)); 1047 PetscCall(ISView(is, matl_dbg_viewer)); 1048 PetscCall(ISDestroy(&is)); 1049 if (sub_schurs->change) { 1050 Mat T; 1051 1052 PetscCall(PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i)); 1053 PetscCall(KSPGetOperators(sub_schurs->change[i], &T, NULL)); 1054 PetscCall(PetscObjectSetName((PetscObject)T, name)); 1055 PetscCall(MatView(T, matl_dbg_viewer)); 1056 PetscCall(PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i)); 1057 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name)); 1058 PetscCall(ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer)); 1059 } 1060 cum += subset_size; 1061 } 1062 PetscCall(PetscViewerFlush(matl_dbg_viewer)); 1063 } 1064 1065 /* get explicit Schur Complement computed during numeric factorization */ 1066 PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 1067 PetscCall(PetscStrncpy(stype, MATSEQDENSE, sizeof(stype))); 1068 #if defined(PETSC_HAVE_CUDA) 1069 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, "")); 1070 #endif 1071 if (gpu) PetscCall(PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype))); 1072 PetscCall(PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL)); 1073 PetscCall(MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all)); 1074 PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef)); 1075 PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian)); 1076 PetscCall(MatGetType(S_all, &Stype)); 1077 1078 /* we can reuse the solvers if we are not using the economic version */ 1079 reuse_solvers = (PetscBool)(reuse_solvers && !economic); 1080 if (!sub_schurs->gdsw) { 1081 factor_workaround = (PetscBool)(reuse_solvers && factor_workaround); 1082 if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE; 1083 } 1084 solver_S = PETSC_TRUE; 1085 1086 /* update the Schur complement with the change of basis on the pressures */ 1087 if (benign_n) { 1088 const PetscScalar *cs_AIB; 1089 PetscScalar *S_data, *AIIm1_data; 1090 Mat S2 = NULL, S3 = NULL; /* dbg */ 1091 PetscScalar *S2_data, *S3_data; /* dbg */ 1092 Vec v, benign_AIIm1_ones; 1093 PetscInt sizeA; 1094 1095 PetscCall(MatDenseGetArray(S_all, &S_data)); 1096 PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones)); 1097 PetscCall(VecGetSize(v, &sizeA)); 1098 #if defined(PETSC_HAVE_MUMPS) 1099 PetscCall(MatMumpsSetIcntl(F, 26, 0)); 1100 #endif 1101 #if defined(PETSC_HAVE_MKL_PARDISO) 1102 PetscCall(MatMkl_PardisoSetCntl(F, 70, 1)); 1103 #endif 1104 PetscCall(MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB)); 1105 PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data)); 1106 if (matl_dbg_viewer) { 1107 PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2)); 1108 PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3)); 1109 PetscCall(MatDenseGetArray(S2, &S2_data)); 1110 PetscCall(MatDenseGetArray(S3, &S3_data)); 1111 } 1112 for (i = 0; i < benign_n; i++) { 1113 PetscScalar *array, sum = 0., one = 1., *sums; 1114 const PetscInt *idxs; 1115 PetscInt k, j, nz; 1116 PetscBLASInt B_k, B_n; 1117 1118 PetscCall(PetscCalloc1(benign_n, &sums)); 1119 PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i)); 1120 PetscCall(VecCopy(benign_AIIm1_ones, v)); 1121 PetscCall(MatSolve(F, v, benign_AIIm1_ones)); 1122 PetscCall(MatMult(A, benign_AIIm1_ones, v)); 1123 PetscCall(VecResetArray(benign_AIIm1_ones)); 1124 /* p0 dofs (eliminated) are excluded from the sums */ 1125 for (k = 0; k < benign_n; k++) { 1126 PetscCall(ISGetLocalSize(is_p_r[k], &nz)); 1127 PetscCall(ISGetIndices(is_p_r[k], &idxs)); 1128 for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i]; 1129 PetscCall(ISRestoreIndices(is_p_r[k], &idxs)); 1130 } 1131 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 1132 if (matl_dbg_viewer) { 1133 Vec vv; 1134 char name[16]; 1135 1136 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv)); 1137 PetscCall(PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i)); 1138 PetscCall(PetscObjectSetName((PetscObject)vv, name)); 1139 PetscCall(VecView(vv, matl_dbg_viewer)); 1140 } 1141 /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */ 1142 /* cs_AIB already scaled by 1./nz */ 1143 B_k = 1; 1144 for (k = 0; k < benign_n; k++) { 1145 sum = sums[k]; 1146 PetscCall(PetscBLASIntCast(size_schur, &B_n)); 1147 1148 if (PetscAbsScalar(sum) == 0.0) continue; 1149 if (k == i) { 1150 PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1151 if (matl_dbg_viewer) { PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n)); } 1152 } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */ 1153 sum /= 2.0; 1154 PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1155 if (matl_dbg_viewer) { PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n)); } 1156 } 1157 } 1158 sum = 1.; 1159 PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1160 if (matl_dbg_viewer) { PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n)); } 1161 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 1162 /* set p0 entry of AIIm1_ones to zero */ 1163 PetscCall(ISGetLocalSize(is_p_r[i], &nz)); 1164 PetscCall(ISGetIndices(is_p_r[i], &idxs)); 1165 for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.; 1166 PetscCall(ISRestoreIndices(is_p_r[i], &idxs)); 1167 PetscCall(PetscFree(sums)); 1168 } 1169 PetscCall(VecDestroy(&benign_AIIm1_ones)); 1170 if (matl_dbg_viewer) { 1171 PetscCall(MatDenseRestoreArray(S2, &S2_data)); 1172 PetscCall(MatDenseRestoreArray(S3, &S3_data)); 1173 } 1174 if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */ 1175 PetscInt k, j; 1176 for (k = 0; k < size_schur; k++) { 1177 for (j = k; j < size_schur; j++) { S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]); } 1178 } 1179 } 1180 1181 /* restore defaults */ 1182 #if defined(PETSC_HAVE_MUMPS) 1183 PetscCall(MatMumpsSetIcntl(F, 26, -1)); 1184 #endif 1185 #if defined(PETSC_HAVE_MKL_PARDISO) 1186 PetscCall(MatMkl_PardisoSetCntl(F, 70, 0)); 1187 #endif 1188 PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB)); 1189 PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data)); 1190 PetscCall(VecDestroy(&v)); 1191 PetscCall(MatDenseRestoreArray(S_all, &S_data)); 1192 if (matl_dbg_viewer) { 1193 Mat S; 1194 1195 PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 1196 PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); 1197 PetscCall(PetscObjectSetName((PetscObject)S, "Sb")); 1198 PetscCall(MatView(S, matl_dbg_viewer)); 1199 PetscCall(MatDestroy(&S)); 1200 PetscCall(PetscObjectSetName((PetscObject)S2, "S2P")); 1201 PetscCall(MatView(S2, matl_dbg_viewer)); 1202 PetscCall(PetscObjectSetName((PetscObject)S3, "S3P")); 1203 PetscCall(MatView(S3, matl_dbg_viewer)); 1204 PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat, "cs")); 1205 PetscCall(MatView(cs_AIB_mat, matl_dbg_viewer)); 1206 PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 1207 } 1208 PetscCall(MatDestroy(&S2)); 1209 PetscCall(MatDestroy(&S3)); 1210 } 1211 if (!reuse_solvers) { 1212 for (i = 0; i < benign_n; i++) { PetscCall(ISDestroy(&is_p_r[i])); } 1213 PetscCall(PetscFree(is_p_r)); 1214 PetscCall(MatDestroy(&cs_AIB_mat)); 1215 PetscCall(MatDestroy(&benign_AIIm1_ones_mat)); 1216 } 1217 } else { /* we can't use MatFactor when size_schur == size_of_the_problem */ 1218 PetscCall(MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all)); 1219 PetscCall(MatGetType(S_all, &Stype)); 1220 reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */ 1221 factor_workaround = PETSC_FALSE; 1222 solver_S = PETSC_FALSE; 1223 } 1224 1225 if (reuse_solvers) { 1226 Mat A_II, pA_II, Afake; 1227 Vec vec1_B; 1228 PCBDDCReuseSolvers msolv_ctx; 1229 PetscInt n_R; 1230 1231 if (sub_schurs->reuse_solver) { 1232 PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1233 } else { 1234 PetscCall(PetscNew(&sub_schurs->reuse_solver)); 1235 } 1236 msolv_ctx = sub_schurs->reuse_solver; 1237 PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL)); 1238 PetscCall(PetscObjectReference((PetscObject)F)); 1239 msolv_ctx->F = F; 1240 PetscCall(MatCreateVecs(F, &msolv_ctx->sol, NULL)); 1241 /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */ 1242 { 1243 PetscScalar *array; 1244 PetscInt n; 1245 1246 PetscCall(VecGetLocalSize(msolv_ctx->sol, &n)); 1247 PetscCall(VecGetArray(msolv_ctx->sol, &array)); 1248 PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs)); 1249 PetscCall(VecRestoreArray(msolv_ctx->sol, &array)); 1250 } 1251 msolv_ctx->has_vertices = schur_has_vertices; 1252 1253 /* interior solver */ 1254 PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver)); 1255 PetscCall(PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II)); 1256 PetscCall(PCSetType(msolv_ctx->interior_solver, PCSHELL)); 1257 PetscCall(PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)")); 1258 PetscCall(PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx)); 1259 PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View)); 1260 PetscCall(PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior)); 1261 PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose)); 1262 if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy)); 1263 1264 /* correction solver */ 1265 if (!sub_schurs->gdsw) { 1266 PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver)); 1267 PetscCall(PCSetType(msolv_ctx->correction_solver, PCSHELL)); 1268 PetscCall(PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)")); 1269 PetscCall(PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx)); 1270 PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View)); 1271 PetscCall(PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction)); 1272 PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose)); 1273 1274 /* scatter and vecs for Schur complement solver */ 1275 PetscCall(MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B)); 1276 PetscCall(MatCreateVecs(sub_schurs->S, &vec1_B, NULL)); 1277 if (!schur_has_vertices) { 1278 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B)); 1279 PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B)); 1280 PetscCall(PetscObjectReference((PetscObject)is_A_all)); 1281 msolv_ctx->is_R = is_A_all; 1282 } else { 1283 IS is_B_all; 1284 const PetscInt *idxs; 1285 PetscInt dual, n_v, n; 1286 1287 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v)); 1288 dual = size_schur - n_v; 1289 PetscCall(ISGetLocalSize(is_A_all, &n)); 1290 PetscCall(ISGetIndices(is_A_all, &idxs)); 1291 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all)); 1292 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B)); 1293 PetscCall(ISDestroy(&is_B_all)); 1294 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all)); 1295 PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B)); 1296 PetscCall(ISDestroy(&is_B_all)); 1297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R)); 1298 PetscCall(ISRestoreIndices(is_A_all, &idxs)); 1299 } 1300 PetscCall(ISGetLocalSize(msolv_ctx->is_R, &n_R)); 1301 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake)); 1302 PetscCall(MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY)); 1303 PetscCall(MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY)); 1304 PetscCall(PCSetOperators(msolv_ctx->correction_solver, Afake, Afake)); 1305 PetscCall(MatDestroy(&Afake)); 1306 PetscCall(VecDestroy(&vec1_B)); 1307 } 1308 /* communicate benign info to solver context */ 1309 if (benign_n) { 1310 PetscScalar *array; 1311 1312 msolv_ctx->benign_n = benign_n; 1313 msolv_ctx->benign_zerodiag_subs = is_p_r; 1314 PetscCall(PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals)); 1315 msolv_ctx->benign_csAIB = cs_AIB_mat; 1316 PetscCall(MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL)); 1317 PetscCall(VecGetArray(msolv_ctx->benign_corr_work, &array)); 1318 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec)); 1319 PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work, &array)); 1320 msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat; 1321 } 1322 } else { 1323 if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1324 PetscCall(PetscFree(sub_schurs->reuse_solver)); 1325 } 1326 PetscCall(MatDestroy(&A)); 1327 PetscCall(ISDestroy(&is_A_all)); 1328 1329 /* Work arrays */ 1330 PetscCall(PetscMalloc1(max_subset_size * max_subset_size, &work)); 1331 1332 /* S_Ej_all */ 1333 cum = cum2 = 0; 1334 PetscCall(MatDenseGetArrayRead(S_all, &rS_data)); 1335 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr)); 1336 if (sub_schurs->sum_S_Ej_inv_all) { PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr)); } 1337 if (sub_schurs->gdsw) { PetscCall(MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA)); } 1338 for (i = 0; i < sub_schurs->n_subs; i++) { 1339 PetscInt j; 1340 1341 /* get S_E (or K^i_EE for GDSW) */ 1342 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1343 if (sub_schurs->gdsw) { 1344 Mat T; 1345 1346 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T)); 1347 PetscCall(MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T)); 1348 PetscCall(MatDestroy(&T)); 1349 } else { 1350 if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */ 1351 PetscInt k; 1352 for (k = 0; k < subset_size; k++) { 1353 for (j = k; j < subset_size; j++) { 1354 work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1355 work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]); 1356 } 1357 } 1358 } else { /* just copy to workspace */ 1359 PetscInt k; 1360 for (k = 0; k < subset_size; k++) { 1361 for (j = 0; j < subset_size; j++) { work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; } 1362 } 1363 } 1364 } 1365 /* insert S_E values */ 1366 if (sub_schurs->change) { 1367 Mat change_sub, SEj, T; 1368 1369 /* change basis */ 1370 PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL)); 1371 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj)); 1372 if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 1373 Mat T2; 1374 PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2)); 1375 PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 1376 PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T)); 1377 PetscCall(MatDestroy(&T2)); 1378 } else { 1379 PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 1380 } 1381 PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN)); 1382 PetscCall(MatDestroy(&T)); 1383 PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL)); 1384 PetscCall(MatDestroy(&SEj)); 1385 } 1386 PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size)); 1387 if (compute_Stilda) { 1388 if (deluxe) { /* if adaptivity is requested, invert S_E blocks */ 1389 Mat M; 1390 const PetscScalar *vals; 1391 PetscBool isdense, isdensecuda; 1392 1393 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M)); 1394 PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef)); 1395 PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian)); 1396 if (!PetscBTLookup(sub_schurs->is_edge, i)) { PetscCall(MatSetType(M, Stype)); } 1397 PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense)); 1398 PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda)); 1399 if (use_cholesky) { 1400 PetscCall(MatCholeskyFactor(M, NULL, NULL)); 1401 } else { 1402 PetscCall(MatLUFactor(M, NULL, NULL, NULL)); 1403 } 1404 if (isdense) { 1405 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1406 #if defined(PETSC_HAVE_CUDA) 1407 } else if (isdensecuda) { 1408 PetscCall(MatSeqDenseCUDAInvertFactors_Private(M)); 1409 #endif 1410 } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype); 1411 PetscCall(MatDenseGetArrayRead(M, &vals)); 1412 PetscCall(PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size)); 1413 PetscCall(MatDenseRestoreArrayRead(M, &vals)); 1414 PetscCall(MatDestroy(&M)); 1415 } else if (scaling) { /* not using deluxe */ 1416 Mat SEj; 1417 Vec D; 1418 PetscScalar *array; 1419 1420 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj)); 1421 PetscCall(VecGetArray(Dall, &array)); 1422 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D)); 1423 PetscCall(VecRestoreArray(Dall, &array)); 1424 PetscCall(VecShift(D, -1.)); 1425 PetscCall(MatDiagonalScale(SEj, D, D)); 1426 PetscCall(MatDestroy(&SEj)); 1427 PetscCall(VecDestroy(&D)); 1428 PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size)); 1429 } 1430 } 1431 cum += subset_size; 1432 cum2 += subset_size * (size_schur + 1); 1433 SEj_arr += subset_size * subset_size; 1434 if (SEjinv_arr) SEjinv_arr += subset_size * subset_size; 1435 } 1436 if (sub_schurs->gdsw) { PetscCall(MatDestroySubMatrices(sub_schurs->n_subs, &gdswA)); } 1437 PetscCall(MatDenseRestoreArrayRead(S_all, &rS_data)); 1438 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr)); 1439 if (sub_schurs->sum_S_Ej_inv_all) { PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr)); } 1440 if (solver_S) { PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); } 1441 1442 /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory 1443 however, preliminary tests indicate using GPUs is still faster in the solve phase */ 1444 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 1445 if (reuse_solvers) { 1446 Mat St; 1447 MatFactorSchurStatus st; 1448 1449 flg = PETSC_FALSE; 1450 PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL)); 1451 PetscCall(MatFactorGetSchurComplement(F, &St, &st)); 1452 PetscCall(MatBindToCPU(St, flg)); 1453 PetscCall(MatFactorRestoreSchurComplement(F, &St, st)); 1454 } 1455 #endif 1456 1457 schur_factor = NULL; 1458 if (compute_Stilda && size_active_schur) { 1459 if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */ 1460 PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1461 PetscCall(PetscArraycpy(SEjinv_arr, work, size_schur * size_schur)); 1462 PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1463 } else { 1464 Mat S_all_inv = NULL; 1465 1466 if (solver_S && !sub_schurs->gdsw) { 1467 /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1. 1468 The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */ 1469 if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */ 1470 PetscScalar *data; 1471 PetscInt nd = 0; 1472 1473 PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices"); 1474 PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL)); 1475 PetscCall(MatDenseGetArray(S_all_inv, &data)); 1476 if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 1477 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1478 } 1479 1480 /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */ 1481 if (schur_has_vertices) { 1482 Mat M; 1483 PetscScalar *tdata; 1484 PetscInt nv = 0, news; 1485 1486 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 1487 news = size_active_schur + nv; 1488 PetscCall(PetscCalloc1(news * news, &tdata)); 1489 for (i = 0; i < size_active_schur; i++) { 1490 PetscCall(PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i)); 1491 PetscCall(PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv)); 1492 } 1493 for (i = 0; i < nv; i++) { 1494 PetscInt k = i + size_active_schur; 1495 PetscCall(PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i)); 1496 } 1497 1498 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M)); 1499 PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE)); 1500 PetscCall(MatCholeskyFactor(M, NULL, NULL)); 1501 /* save the factors */ 1502 cum = 0; 1503 PetscCall(PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor)); 1504 for (i = 0; i < size_active_schur; i++) { 1505 PetscCall(PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i)); 1506 cum += size_active_schur - i; 1507 } 1508 for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)])); 1509 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1510 /* move back just the active dofs to the Schur complement */ 1511 for (i = 0; i < size_active_schur; i++) { PetscCall(PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur)); } 1512 PetscCall(PetscFree(tdata)); 1513 PetscCall(MatDestroy(&M)); 1514 } else { /* we can factorize and invert just the activedofs */ 1515 Mat M; 1516 PetscScalar *aux; 1517 1518 PetscCall(PetscMalloc1(nd, &aux)); 1519 for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)]; 1520 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M)); 1521 PetscCall(MatDenseSetLDA(M, size_schur)); 1522 PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE)); 1523 PetscCall(MatCholeskyFactor(M, NULL, NULL)); 1524 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1525 PetscCall(MatDestroy(&M)); 1526 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M)); 1527 PetscCall(MatZeroEntries(M)); 1528 PetscCall(MatDestroy(&M)); 1529 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M)); 1530 PetscCall(MatDenseSetLDA(M, size_schur)); 1531 PetscCall(MatZeroEntries(M)); 1532 PetscCall(MatDestroy(&M)); 1533 for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i]; 1534 PetscCall(PetscFree(aux)); 1535 } 1536 PetscCall(MatDenseRestoreArray(S_all_inv, &data)); 1537 } else { /* use MatFactor calls to invert S */ 1538 PetscCall(MatFactorInvertSchurComplement(F)); 1539 PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL)); 1540 } 1541 } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */ 1542 PetscCall(PetscObjectReference((PetscObject)S_all)); 1543 S_all_inv = S_all; 1544 PetscCall(MatDenseGetArray(S_all_inv, &S_data)); 1545 PetscCall(PetscBLASIntCast(size_schur, &B_N)); 1546 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1547 if (use_potr) { 1548 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr)); 1549 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %d", (int)B_ierr); 1550 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr)); 1551 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %d", (int)B_ierr); 1552 } else if (use_sytr) { 1553 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 1554 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %d", (int)B_ierr); 1555 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr)); 1556 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %d", (int)B_ierr); 1557 } else { 1558 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr)); 1559 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %d", (int)B_ierr); 1560 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 1561 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %d", (int)B_ierr); 1562 } 1563 PetscCall(PetscLogFlops(1.0 * size_schur * size_schur * size_schur)); 1564 PetscCall(PetscFPTrapPop()); 1565 PetscCall(MatDenseRestoreArray(S_all_inv, &S_data)); 1566 } else if (sub_schurs->gdsw) { 1567 Mat tS, tX, SEj, S_II, S_IE, S_EE; 1568 KSP pS_II; 1569 PC pS_II_pc; 1570 IS EE, II; 1571 PetscInt nS; 1572 1573 PetscCall(MatFactorCreateSchurComplement(F, &tS, NULL)); 1574 PetscCall(MatGetSize(tS, &nS, NULL)); 1575 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1576 for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */ 1577 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1578 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj)); 1579 1580 PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE)); 1581 PetscCall(ISComplement(EE, 0, nS, &II)); 1582 PetscCall(MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II)); 1583 PetscCall(MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE)); 1584 PetscCall(MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE)); 1585 PetscCall(ISDestroy(&II)); 1586 PetscCall(ISDestroy(&EE)); 1587 1588 PetscCall(KSPCreate(PETSC_COMM_SELF, &pS_II)); 1589 PetscCall(KSPSetType(pS_II, KSPPREONLY)); 1590 PetscCall(KSPGetPC(pS_II, &pS_II_pc)); 1591 PetscCall(PCSetType(pS_II_pc, PCSVD)); 1592 PetscCall(KSPSetOptionsPrefix(pS_II, sub_schurs->prefix)); 1593 PetscCall(KSPAppendOptionsPrefix(pS_II, "pseudo_")); 1594 PetscCall(KSPSetOperators(pS_II, S_II, S_II)); 1595 PetscCall(MatDestroy(&S_II)); 1596 PetscCall(KSPSetFromOptions(pS_II)); 1597 PetscCall(KSPSetUp(pS_II)); 1598 PetscCall(MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX)); 1599 PetscCall(KSPMatSolve(pS_II, S_IE, tX)); 1600 PetscCall(KSPDestroy(&pS_II)); 1601 1602 PetscCall(MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DEFAULT, &SEj)); 1603 PetscCall(MatDestroy(&S_IE)); 1604 PetscCall(MatDestroy(&tX)); 1605 PetscCall(MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN)); 1606 PetscCall(MatDestroy(&S_EE)); 1607 1608 PetscCall(MatDestroy(&SEj)); 1609 cum += subset_size; 1610 SEjinv_arr += subset_size * subset_size; 1611 } 1612 PetscCall(MatDestroy(&tS)); 1613 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1614 } 1615 /* S_Ej_tilda_all */ 1616 cum = cum2 = 0; 1617 rS_data = NULL; 1618 if (S_all_inv) PetscCall(MatDenseGetArrayRead(S_all_inv, &rS_data)); 1619 PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1620 for (i = 0; i < sub_schurs->n_subs; i++) { 1621 PetscInt j; 1622 1623 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1624 /* get (St^-1)_E */ 1625 /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1 1626 will be properly accessed later during adaptive selection */ 1627 if (rS_data) { 1628 if (S_lower_triangular) { 1629 PetscInt k; 1630 if (sub_schurs->change) { 1631 for (k = 0; k < subset_size; k++) { 1632 for (j = k; j < subset_size; j++) { 1633 work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1634 work[j * subset_size + k] = work[k * subset_size + j]; 1635 } 1636 } 1637 } else { 1638 for (k = 0; k < subset_size; k++) { 1639 for (j = k; j < subset_size; j++) { work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; } 1640 } 1641 } 1642 } else { 1643 PetscInt k; 1644 for (k = 0; k < subset_size; k++) { 1645 for (j = 0; j < subset_size; j++) { work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; } 1646 } 1647 } 1648 } 1649 if (sub_schurs->change) { 1650 Mat change_sub, SEj, T; 1651 PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL; 1652 1653 /* change basis */ 1654 PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL)); 1655 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, rS_data ? work : SEjinv_arr, &SEj)); 1656 if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 1657 Mat T2; 1658 PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2)); 1659 PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 1660 PetscCall(MatDestroy(&T2)); 1661 PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T)); 1662 } else { 1663 PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 1664 } 1665 PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN)); 1666 PetscCall(MatDestroy(&T)); 1667 PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL)); 1668 PetscCall(MatDestroy(&SEj)); 1669 } 1670 if (rS_data) PetscCall(PetscArraycpy(SEjinv_arr, work, subset_size * subset_size)); 1671 cum += subset_size; 1672 cum2 += subset_size * (size_schur + 1); 1673 SEjinv_arr += subset_size * subset_size; 1674 } 1675 PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1676 if (S_all_inv) { 1677 PetscCall(MatDenseRestoreArrayRead(S_all_inv, &rS_data)); 1678 if (solver_S) { 1679 if (schur_has_vertices) { 1680 PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED)); 1681 } else { 1682 PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED)); 1683 } 1684 } 1685 } 1686 PetscCall(MatDestroy(&S_all_inv)); 1687 } 1688 1689 /* move back factors if needed */ 1690 if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) { 1691 Mat S_tmp; 1692 PetscInt nd = 0; 1693 1694 PetscCheck(solver_S, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 1695 PetscCall(MatFactorGetSchurComplement(F, &S_tmp, NULL)); 1696 if (use_potr) { 1697 PetscScalar *data; 1698 1699 PetscCall(MatDenseGetArray(S_tmp, &data)); 1700 PetscCall(PetscArrayzero(data, size_schur * size_schur)); 1701 1702 if (S_lower_triangular) { 1703 cum = 0; 1704 for (i = 0; i < size_active_schur; i++) { 1705 PetscCall(PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i)); 1706 cum += size_active_schur - i; 1707 } 1708 } else { 1709 PetscCall(PetscArraycpy(data, schur_factor, size_schur * size_schur)); 1710 } 1711 if (sub_schurs->is_dir) { 1712 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1713 for (i = 0; i < nd; i++) { data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i]; } 1714 } 1715 /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions, 1716 set the diagonal entry of the Schur factor to a very large value */ 1717 for (i = size_active_schur + nd; i < size_schur; i++) { data[i * (size_schur + 1)] = infty; } 1718 PetscCall(MatDenseRestoreArray(S_tmp, &data)); 1719 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices"); 1720 PetscCall(MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED)); 1721 } 1722 } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */ 1723 PetscScalar *data; 1724 PetscInt nd = 0; 1725 1726 if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 1727 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1728 } 1729 PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 1730 PetscCall(MatDenseGetArray(S_all, &data)); 1731 for (i = 0; i < size_active_schur; i++) { PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur)); } 1732 for (i = size_active_schur + nd; i < size_schur; i++) { 1733 PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur)); 1734 data[i * (size_schur + 1)] = infty; 1735 } 1736 PetscCall(MatDenseRestoreArray(S_all, &data)); 1737 PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 1738 } 1739 PetscCall(PetscFree(work)); 1740 PetscCall(PetscFree(schur_factor)); 1741 PetscCall(VecDestroy(&Dall)); 1742 } 1743 PetscCall(ISDestroy(&is_I_layer)); 1744 PetscCall(MatDestroy(&S_all)); 1745 PetscCall(MatDestroy(&A_BB)); 1746 PetscCall(MatDestroy(&A_IB)); 1747 PetscCall(MatDestroy(&A_BI)); 1748 PetscCall(MatDestroy(&F)); 1749 1750 PetscCall(PetscMalloc1(sub_schurs->n_subs, &nnz)); 1751 for (i = 0; i < sub_schurs->n_subs; i++) { PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &nnz[i])); } 1752 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, sub_schurs->n_subs, nnz, PETSC_OWN_POINTER, &is_I_layer)); 1753 PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all, sub_schurs->n_subs, nnz)); 1754 PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY)); 1755 PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY)); 1756 if (compute_Stilda) { 1757 PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all, sub_schurs->n_subs, nnz)); 1758 PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY)); 1759 PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY)); 1760 if (deluxe) { 1761 PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all, sub_schurs->n_subs, nnz)); 1762 PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY)); 1763 PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY)); 1764 } 1765 } 1766 PetscCall(ISDestroy(&is_I_layer)); 1767 1768 /* Get local part of (\sum_j S_Ej) */ 1769 if (!sub_schurs->sum_S_Ej_all) { PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all)); } 1770 PetscCall(VecSet(gstash, 0.0)); 1771 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray)); 1772 PetscCall(VecPlaceArray(lstash, stasharray)); 1773 PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1774 PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1775 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray)); 1776 PetscCall(VecResetArray(lstash)); 1777 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray)); 1778 PetscCall(VecPlaceArray(lstash, stasharray)); 1779 PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1780 PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1781 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray)); 1782 PetscCall(VecResetArray(lstash)); 1783 1784 /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */ 1785 if (compute_Stilda) { 1786 PetscCall(VecSet(gstash, 0.0)); 1787 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray)); 1788 PetscCall(VecPlaceArray(lstash, stasharray)); 1789 PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1790 PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1791 PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1792 PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1793 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray)); 1794 PetscCall(VecResetArray(lstash)); 1795 if (deluxe) { 1796 PetscCall(VecSet(gstash, 0.0)); 1797 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray)); 1798 PetscCall(VecPlaceArray(lstash, stasharray)); 1799 PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1800 PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1801 PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1802 PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1803 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray)); 1804 PetscCall(VecResetArray(lstash)); 1805 } else if (!sub_schurs->gdsw) { 1806 PetscScalar *array; 1807 PetscInt cum; 1808 1809 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array)); 1810 cum = 0; 1811 for (i = 0; i < sub_schurs->n_subs; i++) { 1812 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1813 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 1814 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1815 if (use_potr) { 1816 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr)); 1817 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %d", (int)B_ierr); 1818 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr)); 1819 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %d", (int)B_ierr); 1820 } else if (use_sytr) { 1821 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 1822 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %d", (int)B_ierr); 1823 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr)); 1824 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %d", (int)B_ierr); 1825 } else { 1826 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr)); 1827 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %d", (int)B_ierr); 1828 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 1829 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %d", (int)B_ierr); 1830 } 1831 PetscCall(PetscLogFlops(1.0 * subset_size * subset_size * subset_size)); 1832 PetscCall(PetscFPTrapPop()); 1833 cum += subset_size * subset_size; 1834 } 1835 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array)); 1836 PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all)); 1837 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 1838 sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all; 1839 } 1840 } 1841 PetscCall(VecDestroy(&lstash)); 1842 PetscCall(VecDestroy(&gstash)); 1843 PetscCall(VecScatterDestroy(&sstash)); 1844 1845 if (matl_dbg_viewer) { 1846 if (sub_schurs->S_Ej_all) { 1847 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE")); 1848 PetscCall(MatView(sub_schurs->S_Ej_all, matl_dbg_viewer)); 1849 } 1850 if (sub_schurs->sum_S_Ej_all) { 1851 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE")); 1852 PetscCall(MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer)); 1853 } 1854 if (sub_schurs->sum_S_Ej_inv_all) { 1855 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm")); 1856 PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer)); 1857 } 1858 if (sub_schurs->sum_S_Ej_tilda_all) { 1859 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt")); 1860 PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer)); 1861 } 1862 } 1863 1864 /* free workspace */ 1865 if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer)); 1866 if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n)); 1867 PetscCall(PetscViewerDestroy(&matl_dbg_viewer)); 1868 PetscCall(PetscFree2(Bwork, pivots)); 1869 PetscCall(PetscCommDestroy(&comm_n)); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw) { 1874 IS *faces, *edges, *all_cc, vertices; 1875 PetscInt s, i, n_faces, n_edges, n_all_cc; 1876 PetscBool is_sorted, ispardiso, ismumps; 1877 1878 PetscFunctionBegin; 1879 PetscCall(ISSorted(is_I, &is_sorted)); 1880 PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_I), PETSC_ERR_PLIB, "IS for I dofs should be shorted"); 1881 PetscCall(ISSorted(is_B, &is_sorted)); 1882 PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_B), PETSC_ERR_PLIB, "IS for B dofs should be shorted"); 1883 1884 /* reset any previous data */ 1885 PetscCall(PCBDDCSubSchursReset(sub_schurs)); 1886 1887 sub_schurs->gdsw = gdsw; 1888 1889 /* get index sets for faces and edges (already sorted by global ordering) */ 1890 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices)); 1891 n_all_cc = n_faces + n_edges; 1892 PetscCall(PetscBTCreate(n_all_cc, &sub_schurs->is_edge)); 1893 PetscCall(PetscMalloc1(n_all_cc, &all_cc)); 1894 n_all_cc = 0; 1895 for (i = 0; i < n_faces; i++) { 1896 PetscCall(ISGetSize(faces[i], &s)); 1897 if (!s) continue; 1898 if (copycc) { 1899 PetscCall(ISDuplicate(faces[i], &all_cc[n_all_cc])); 1900 } else { 1901 PetscCall(PetscObjectReference((PetscObject)faces[i])); 1902 all_cc[n_all_cc] = faces[i]; 1903 } 1904 n_all_cc++; 1905 } 1906 for (i = 0; i < n_edges; i++) { 1907 PetscCall(ISGetSize(edges[i], &s)); 1908 if (!s) continue; 1909 if (copycc) { 1910 PetscCall(ISDuplicate(edges[i], &all_cc[n_all_cc])); 1911 } else { 1912 PetscCall(PetscObjectReference((PetscObject)edges[i])); 1913 all_cc[n_all_cc] = edges[i]; 1914 } 1915 PetscCall(PetscBTSet(sub_schurs->is_edge, n_all_cc)); 1916 n_all_cc++; 1917 } 1918 PetscCall(PetscObjectReference((PetscObject)vertices)); 1919 sub_schurs->is_vertices = vertices; 1920 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices)); 1921 sub_schurs->is_dir = NULL; 1922 PetscCall(PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir)); 1923 1924 /* Determine if MatFactor can be used */ 1925 PetscCall(PetscStrallocpy(prefix, &sub_schurs->prefix)); 1926 #if defined(PETSC_HAVE_MUMPS) 1927 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type))); 1928 #elif defined(PETSC_HAVE_MKL_PARDISO) 1929 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type))); 1930 #else 1931 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type))); 1932 #endif 1933 #if defined(PETSC_USE_COMPLEX) 1934 sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */ 1935 #else 1936 sub_schurs->is_hermitian = PETSC_TRUE; 1937 #endif 1938 sub_schurs->is_posdef = PETSC_TRUE; 1939 sub_schurs->is_symmetric = PETSC_TRUE; 1940 sub_schurs->debug = PETSC_FALSE; 1941 sub_schurs->restrict_comm = PETSC_FALSE; 1942 PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 1943 PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL)); 1944 PetscCall(PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL)); 1945 PetscCall(PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL)); 1946 PetscCall(PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL)); 1947 PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL)); 1948 PetscCall(PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL)); 1949 PetscOptionsEnd(); 1950 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps)); 1951 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso)); 1952 sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps); 1953 1954 /* for reals, symmetric and hermitian are synonims */ 1955 #if !defined(PETSC_USE_COMPLEX) 1956 sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian); 1957 sub_schurs->is_hermitian = sub_schurs->is_symmetric; 1958 #endif 1959 1960 PetscCall(PetscObjectReference((PetscObject)is_I)); 1961 sub_schurs->is_I = is_I; 1962 PetscCall(PetscObjectReference((PetscObject)is_B)); 1963 sub_schurs->is_B = is_B; 1964 PetscCall(PetscObjectReference((PetscObject)graph->l2gmap)); 1965 sub_schurs->l2gmap = graph->l2gmap; 1966 PetscCall(PetscObjectReference((PetscObject)BtoNmap)); 1967 sub_schurs->BtoNmap = BtoNmap; 1968 sub_schurs->n_subs = n_all_cc; 1969 sub_schurs->is_subs = all_cc; 1970 sub_schurs->S_Ej_all = NULL; 1971 sub_schurs->sum_S_Ej_all = NULL; 1972 sub_schurs->sum_S_Ej_inv_all = NULL; 1973 sub_schurs->sum_S_Ej_tilda_all = NULL; 1974 sub_schurs->is_Ej_all = NULL; 1975 PetscFunctionReturn(0); 1976 } 1977 1978 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) { 1979 PCBDDCSubSchurs schurs_ctx; 1980 1981 PetscFunctionBegin; 1982 PetscCall(PetscNew(&schurs_ctx)); 1983 schurs_ctx->n_subs = 0; 1984 *sub_schurs = schurs_ctx; 1985 PetscFunctionReturn(0); 1986 } 1987 1988 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) { 1989 PetscInt i; 1990 1991 PetscFunctionBegin; 1992 if (!sub_schurs) PetscFunctionReturn(0); 1993 PetscCall(PetscFree(sub_schurs->prefix)); 1994 PetscCall(MatDestroy(&sub_schurs->A)); 1995 PetscCall(MatDestroy(&sub_schurs->S)); 1996 PetscCall(ISDestroy(&sub_schurs->is_I)); 1997 PetscCall(ISDestroy(&sub_schurs->is_B)); 1998 PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap)); 1999 PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap)); 2000 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 2001 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 2002 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 2003 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 2004 PetscCall(ISDestroy(&sub_schurs->is_Ej_all)); 2005 PetscCall(ISDestroy(&sub_schurs->is_vertices)); 2006 PetscCall(ISDestroy(&sub_schurs->is_dir)); 2007 PetscCall(PetscBTDestroy(&sub_schurs->is_edge)); 2008 for (i = 0; i < sub_schurs->n_subs; i++) { PetscCall(ISDestroy(&sub_schurs->is_subs[i])); } 2009 if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs)); 2010 if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 2011 PetscCall(PetscFree(sub_schurs->reuse_solver)); 2012 if (sub_schurs->change) { 2013 for (i = 0; i < sub_schurs->n_subs; i++) { 2014 PetscCall(KSPDestroy(&sub_schurs->change[i])); 2015 PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i])); 2016 } 2017 } 2018 PetscCall(PetscFree(sub_schurs->change)); 2019 PetscCall(PetscFree(sub_schurs->change_primal_sub)); 2020 sub_schurs->n_subs = 0; 2021 PetscFunctionReturn(0); 2022 } 2023 2024 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs) { 2025 PetscFunctionBegin; 2026 PetscCall(PCBDDCSubSchursReset(*sub_schurs)); 2027 PetscCall(PetscFree(*sub_schurs)); 2028 PetscFunctionReturn(0); 2029 } 2030 2031 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added) { 2032 PetscInt i, j, n; 2033 2034 PetscFunctionBegin; 2035 n = 0; 2036 for (i = -n_prev; i < 0; i++) { 2037 PetscInt start_dof = queue_tip[i]; 2038 for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) { 2039 PetscInt dof = adjncy[j]; 2040 if (!PetscBTLookup(touched, dof)) { 2041 PetscCall(PetscBTSet(touched, dof)); 2042 queue_tip[n] = dof; 2043 n++; 2044 } 2045 } 2046 } 2047 *n_added = n; 2048 PetscFunctionReturn(0); 2049 } 2050