1 #include <petsc/private/pcbddcimpl.h> 2 #include <petsc/private/pcbddcprivateimpl.h> 3 #include <../src/mat/impls/dense/seq/dense.h> 4 #include <petscblaslapack.h> 5 6 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *); 7 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *); 8 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC, Vec, Vec); 9 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC, Vec, Vec); 10 11 /* if v2 is not present, correction is done in-place */ 12 PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full) 13 { 14 PetscScalar *array; 15 PetscScalar *array2; 16 17 PetscFunctionBegin; 18 if (!ctx->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 19 if (sol && full) { 20 PetscInt n_I, size_schur; 21 22 /* get sizes */ 23 PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL)); 24 PetscCall(VecGetSize(v, &n_I)); 25 n_I = n_I - size_schur; 26 /* get schur sol from array */ 27 PetscCall(VecGetArray(v, &array)); 28 PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I)); 29 PetscCall(VecRestoreArray(v, &array)); 30 /* apply interior sol correction */ 31 PetscCall(MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work)); 32 PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 33 PetscCall(MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v)); 34 } 35 if (v2) { 36 PetscInt nl; 37 38 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 39 PetscCall(VecGetLocalSize(v2, &nl)); 40 PetscCall(VecGetArray(v2, &array2)); 41 PetscCall(PetscArraycpy(array2, array, nl)); 42 } else { 43 PetscCall(VecGetArray(v, &array)); 44 array2 = array; 45 } 46 if (!sol) { /* change rhs */ 47 PetscInt n; 48 for (n = 0; n < ctx->benign_n; n++) { 49 PetscScalar sum = 0.; 50 const PetscInt *cols; 51 PetscInt nz, i; 52 53 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz)); 54 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols)); 55 for (i = 0; i < nz - 1; i++) sum += array[cols[i]]; 56 #if defined(PETSC_USE_COMPLEX) 57 sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz)); 58 #else 59 sum = -sum / nz; 60 #endif 61 for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum; 62 ctx->benign_save_vals[n] = array2[cols[nz - 1]]; 63 array2[cols[nz - 1]] = sum; 64 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols)); 65 } 66 } else { 67 PetscInt n; 68 for (n = 0; n < ctx->benign_n; n++) { 69 PetscScalar sum = 0.; 70 const PetscInt *cols; 71 PetscInt nz, i; 72 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz)); 73 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols)); 74 for (i = 0; i < nz - 1; i++) sum += array[cols[i]]; 75 #if defined(PETSC_USE_COMPLEX) 76 sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz)); 77 #else 78 sum = -sum / nz; 79 #endif 80 for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum; 81 array2[cols[nz - 1]] = ctx->benign_save_vals[n]; 82 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols)); 83 } 84 } 85 if (v2) { 86 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 87 PetscCall(VecRestoreArray(v2, &array2)); 88 } else { 89 PetscCall(VecRestoreArray(v, &array)); 90 } 91 if (!sol && full) { 92 Vec usedv; 93 PetscInt n_I, size_schur; 94 95 /* get sizes */ 96 PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL)); 97 PetscCall(VecGetSize(v, &n_I)); 98 n_I = n_I - size_schur; 99 /* compute schur rhs correction */ 100 if (v2) { 101 usedv = v2; 102 } else { 103 usedv = v; 104 } 105 /* apply schur rhs correction */ 106 PetscCall(MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work)); 107 PetscCall(VecGetArrayRead(usedv, (const PetscScalar **)&array)); 108 PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I)); 109 PetscCall(VecRestoreArrayRead(usedv, (const PetscScalar **)&array)); 110 PetscCall(MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec)); 111 PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 112 } 113 PetscFunctionReturn(PETSC_SUCCESS); 114 } 115 116 static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full) 117 { 118 PCBDDCReuseSolvers ctx; 119 PetscBool copy = PETSC_FALSE; 120 121 PetscFunctionBegin; 122 PetscCall(PCShellGetContext(pc, &ctx)); 123 if (full) { 124 #if defined(PETSC_HAVE_MUMPS) 125 PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1)); 126 #endif 127 #if defined(PETSC_HAVE_MKL_PARDISO) 128 PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0)); 129 #endif 130 copy = ctx->has_vertices; 131 } else { /* interior solver */ 132 #if defined(PETSC_HAVE_MUMPS) 133 PetscCall(MatMumpsSetIcntl(ctx->F, 26, 0)); 134 #endif 135 #if defined(PETSC_HAVE_MKL_PARDISO) 136 PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 1)); 137 #endif 138 copy = PETSC_TRUE; 139 } 140 /* copy rhs into factored matrix workspace */ 141 if (copy) { 142 PetscInt n; 143 PetscScalar *array, *array_solver; 144 145 PetscCall(VecGetLocalSize(rhs, &n)); 146 PetscCall(VecGetArrayRead(rhs, (const PetscScalar **)&array)); 147 PetscCall(VecGetArray(ctx->rhs, &array_solver)); 148 PetscCall(PetscArraycpy(array_solver, array, n)); 149 PetscCall(VecRestoreArray(ctx->rhs, &array_solver)); 150 PetscCall(VecRestoreArrayRead(rhs, (const PetscScalar **)&array)); 151 152 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full)); 153 if (transpose) { 154 PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol)); 155 } else { 156 PetscCall(MatSolve(ctx->F, ctx->rhs, ctx->sol)); 157 } 158 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full)); 159 160 /* get back data to caller worskpace */ 161 PetscCall(VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver)); 162 PetscCall(VecGetArray(sol, &array)); 163 PetscCall(PetscArraycpy(array, array_solver, n)); 164 PetscCall(VecRestoreArray(sol, &array)); 165 PetscCall(VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver)); 166 } else { 167 if (ctx->benign_n) { 168 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full)); 169 if (transpose) { 170 PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, sol)); 171 } else { 172 PetscCall(MatSolve(ctx->F, ctx->rhs, sol)); 173 } 174 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full)); 175 } else { 176 if (transpose) { 177 PetscCall(MatSolveTranspose(ctx->F, rhs, sol)); 178 } else { 179 PetscCall(MatSolve(ctx->F, rhs, sol)); 180 } 181 } 182 } 183 /* restore defaults */ 184 #if defined(PETSC_HAVE_MUMPS) 185 PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1)); 186 #endif 187 #if defined(PETSC_HAVE_MKL_PARDISO) 188 PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0)); 189 #endif 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol) 194 { 195 PetscFunctionBegin; 196 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE)); 197 PetscFunctionReturn(PETSC_SUCCESS); 198 } 199 200 static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol) 201 { 202 PetscFunctionBegin; 203 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE)); 204 PetscFunctionReturn(PETSC_SUCCESS); 205 } 206 207 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol) 208 { 209 PetscFunctionBegin; 210 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE)); 211 PetscFunctionReturn(PETSC_SUCCESS); 212 } 213 214 static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol) 215 { 216 PetscFunctionBegin; 217 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE)); 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer) 222 { 223 PCBDDCReuseSolvers ctx; 224 PetscBool iascii; 225 226 PetscFunctionBegin; 227 PetscCall(PCShellGetContext(pc, &ctx)); 228 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 229 if (iascii) PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 230 PetscCall(MatView(ctx->F, viewer)); 231 if (iascii) PetscCall(PetscViewerPopFormat(viewer)); 232 PetscFunctionReturn(PETSC_SUCCESS); 233 } 234 235 static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse) 236 { 237 PetscInt i; 238 239 PetscFunctionBegin; 240 PetscCall(MatDestroy(&reuse->F)); 241 PetscCall(VecDestroy(&reuse->sol)); 242 PetscCall(VecDestroy(&reuse->rhs)); 243 PetscCall(PCDestroy(&reuse->interior_solver)); 244 PetscCall(PCDestroy(&reuse->correction_solver)); 245 PetscCall(ISDestroy(&reuse->is_R)); 246 PetscCall(ISDestroy(&reuse->is_B)); 247 PetscCall(VecScatterDestroy(&reuse->correction_scatter_B)); 248 PetscCall(VecDestroy(&reuse->sol_B)); 249 PetscCall(VecDestroy(&reuse->rhs_B)); 250 for (i = 0; i < reuse->benign_n; i++) PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i])); 251 PetscCall(PetscFree(reuse->benign_zerodiag_subs)); 252 PetscCall(PetscFree(reuse->benign_save_vals)); 253 PetscCall(MatDestroy(&reuse->benign_csAIB)); 254 PetscCall(MatDestroy(&reuse->benign_AIIm1ones)); 255 PetscCall(VecDestroy(&reuse->benign_corr_work)); 256 PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec)); 257 PetscFunctionReturn(PETSC_SUCCESS); 258 } 259 260 static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc) 261 { 262 PCBDDCReuseSolvers ctx; 263 264 PetscFunctionBegin; 265 PetscCall(PCShellGetContext(pc, &ctx)); 266 PetscCall(PCBDDCReuseSolversReset(ctx)); 267 PetscCall(PetscFree(ctx)); 268 PetscCall(PCShellSetContext(pc, NULL)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S) 273 { 274 Mat B, C, D, Bd, Cd, AinvBd; 275 KSP ksp; 276 PC pc; 277 PetscBool isLU, isILU, isCHOL, Bdense, Cdense; 278 PetscReal fill = 2.0; 279 PetscInt n_I; 280 PetscMPIInt size; 281 282 PetscFunctionBegin; 283 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M), &size)); 284 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for parallel matrices"); 285 if (reuse == MAT_REUSE_MATRIX) { 286 PetscBool Sdense; 287 288 PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense)); 289 PetscCheck(Sdense, PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "S should dense"); 290 } 291 PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D)); 292 PetscCall(MatSchurComplementGetKSP(M, &ksp)); 293 PetscCall(KSPGetPC(ksp, &pc)); 294 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU)); 295 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU)); 296 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL)); 297 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense)); 298 PetscCall(PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense)); 299 PetscCall(MatGetSize(B, &n_I, NULL)); 300 if (n_I) { 301 if (!Bdense) { 302 PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd)); 303 } else { 304 Bd = B; 305 } 306 307 if (isLU || isILU || isCHOL) { 308 Mat fact; 309 PetscCall(KSPSetUp(ksp)); 310 PetscCall(PCFactorGetMatrix(pc, &fact)); 311 PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 312 PetscCall(MatMatSolve(fact, Bd, AinvBd)); 313 } else { 314 PetscBool ex = PETSC_TRUE; 315 316 if (ex) { 317 Mat Ainvd; 318 319 PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd)); 320 PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd)); 321 PetscCall(MatDestroy(&Ainvd)); 322 } else { 323 Vec sol, rhs; 324 PetscScalar *arrayrhs, *arraysol; 325 PetscInt i, nrhs, n; 326 327 PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 328 PetscCall(MatGetSize(Bd, &n, &nrhs)); 329 PetscCall(MatDenseGetArray(Bd, &arrayrhs)); 330 PetscCall(MatDenseGetArray(AinvBd, &arraysol)); 331 PetscCall(KSPGetSolution(ksp, &sol)); 332 PetscCall(KSPGetRhs(ksp, &rhs)); 333 for (i = 0; i < nrhs; i++) { 334 PetscCall(VecPlaceArray(rhs, arrayrhs + i * n)); 335 PetscCall(VecPlaceArray(sol, arraysol + i * n)); 336 PetscCall(KSPSolve(ksp, rhs, sol)); 337 PetscCall(VecResetArray(rhs)); 338 PetscCall(VecResetArray(sol)); 339 } 340 PetscCall(MatDenseRestoreArray(Bd, &arrayrhs)); 341 PetscCall(MatDenseRestoreArray(AinvBd, &arrayrhs)); 342 } 343 } 344 if (!Bdense & !issym) PetscCall(MatDestroy(&Bd)); 345 346 if (!issym) { 347 if (!Cdense) { 348 PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd)); 349 } else { 350 Cd = C; 351 } 352 PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S)); 353 if (!Cdense) PetscCall(MatDestroy(&Cd)); 354 } else { 355 PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S)); 356 if (!Bdense) PetscCall(MatDestroy(&Bd)); 357 } 358 PetscCall(MatDestroy(&AinvBd)); 359 } 360 361 if (D) { 362 Mat Dd; 363 PetscBool Ddense; 364 365 PetscCall(PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense)); 366 if (!Ddense) { 367 PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd)); 368 } else { 369 Dd = D; 370 } 371 if (n_I) { 372 PetscCall(MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN)); 373 } else { 374 if (reuse == MAT_INITIAL_MATRIX) { 375 PetscCall(MatDuplicate(Dd, MAT_COPY_VALUES, S)); 376 } else { 377 PetscCall(MatCopy(Dd, *S, SAME_NONZERO_PATTERN)); 378 } 379 } 380 if (!Ddense) PetscCall(MatDestroy(&Dd)); 381 } else { 382 PetscCall(MatScale(*S, -1.0)); 383 } 384 PetscFunctionReturn(PETSC_SUCCESS); 385 } 386 387 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal) 388 { 389 Mat F, A_II, A_IB, A_BI, A_BB, AE_II; 390 Mat S_all; 391 Vec gstash, lstash; 392 VecScatter sstash; 393 IS is_I, is_I_layer; 394 IS all_subsets, all_subsets_mult, all_subsets_n; 395 PetscScalar *stasharray, *Bwork; 396 PetscInt *nnz, *all_local_idx_N; 397 PetscInt *auxnum1, *auxnum2; 398 PetscInt i, subset_size, max_subset_size; 399 PetscInt n_B, extra, local_size, global_size; 400 PetscInt local_stash_size; 401 PetscBLASInt B_N, B_ierr, B_lwork, *pivots; 402 MPI_Comm comm_n; 403 PetscBool deluxe = PETSC_TRUE; 404 PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE; 405 PetscViewer matl_dbg_viewer = NULL; 406 PetscBool flg; 407 408 PetscFunctionBegin; 409 PetscCall(MatDestroy(&sub_schurs->A)); 410 PetscCall(MatDestroy(&sub_schurs->S)); 411 if (Ain) { 412 PetscCall(PetscObjectReference((PetscObject)Ain)); 413 sub_schurs->A = Ain; 414 } 415 416 PetscCall(PetscObjectReference((PetscObject)Sin)); 417 sub_schurs->S = Sin; 418 if (sub_schurs->schur_explicit) sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A); 419 420 /* preliminary checks */ 421 PetscCheck(sub_schurs->schur_explicit || !compute_Stilda, PetscObjectComm((PetscObject)sub_schurs->l2gmap), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO"); 422 423 if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE; 424 425 /* debug (MATLAB) */ 426 if (sub_schurs->debug) { 427 PetscMPIInt size, rank; 428 PetscInt nr, *print_schurs_ranks, print_schurs = PETSC_FALSE; 429 430 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size)); 431 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank)); 432 nr = size; 433 PetscCall(PetscMalloc1(nr, &print_schurs_ranks)); 434 PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 435 PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg)); 436 if (!flg) print_schurs = PETSC_TRUE; 437 else { 438 print_schurs = PETSC_FALSE; 439 for (i = 0; i < nr; i++) 440 if (print_schurs_ranks[i] == rank) { 441 print_schurs = PETSC_TRUE; 442 break; 443 } 444 } 445 PetscOptionsEnd(); 446 PetscCall(PetscFree(print_schurs_ranks)); 447 if (print_schurs) { 448 char filename[256]; 449 450 PetscCall(PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank)); 451 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer)); 452 PetscCall(PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB)); 453 } 454 } 455 456 /* restrict work on active processes */ 457 if (sub_schurs->restrict_comm) { 458 PetscSubcomm subcomm; 459 PetscMPIInt color, rank; 460 461 color = 0; 462 if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */ 463 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank)); 464 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm)); 465 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 466 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 467 PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL)); 468 PetscCall(PetscSubcommDestroy(&subcomm)); 469 if (!sub_schurs->n_subs) { 470 PetscCall(PetscCommDestroy(&comm_n)); 471 PetscFunctionReturn(PETSC_SUCCESS); 472 } 473 } else { 474 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL)); 475 } 476 477 /* get Schur complement matrices */ 478 if (!sub_schurs->schur_explicit) { 479 Mat tA_IB, tA_BI, tA_BB; 480 PetscBool isseqsbaij; 481 PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB)); 482 PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij)); 483 if (isseqsbaij) { 484 PetscCall(MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB)); 485 PetscCall(MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB)); 486 PetscCall(MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI)); 487 } else { 488 PetscCall(PetscObjectReference((PetscObject)tA_BB)); 489 A_BB = tA_BB; 490 PetscCall(PetscObjectReference((PetscObject)tA_IB)); 491 A_IB = tA_IB; 492 PetscCall(PetscObjectReference((PetscObject)tA_BI)); 493 A_BI = tA_BI; 494 } 495 } else { 496 A_II = NULL; 497 A_IB = NULL; 498 A_BI = NULL; 499 A_BB = NULL; 500 } 501 S_all = NULL; 502 503 /* determine interior problems */ 504 PetscCall(ISGetLocalSize(sub_schurs->is_I, &i)); 505 if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */ 506 PetscBT touched; 507 const PetscInt *idx_B; 508 PetscInt n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering; 509 510 PetscCheck(xadj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot request layering without adjacency"); 511 /* get sizes */ 512 PetscCall(ISGetLocalSize(sub_schurs->is_I, &n_I)); 513 PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B)); 514 515 PetscCall(PetscMalloc1(n_I + n_B, &local_numbering)); 516 PetscCall(PetscBTCreate(n_I + n_B, &touched)); 517 PetscCall(PetscBTMemzero(n_I + n_B, touched)); 518 519 /* all boundary dofs must be skipped when adding layers */ 520 PetscCall(ISGetIndices(sub_schurs->is_B, &idx_B)); 521 for (j = 0; j < n_B; j++) PetscCall(PetscBTSet(touched, idx_B[j])); 522 PetscCall(PetscArraycpy(local_numbering, idx_B, n_B)); 523 PetscCall(ISRestoreIndices(sub_schurs->is_B, &idx_B)); 524 525 /* add prescribed number of layers of dofs */ 526 n_local_dofs = n_B; 527 n_prev_added = n_B; 528 for (layer = 0; layer < nlayers; layer++) { 529 PetscInt n_added = 0; 530 if (n_local_dofs == n_I + n_B) break; 531 PetscCheck(n_local_dofs <= n_I + n_B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")", layer, n_local_dofs, n_I + n_B); 532 PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added)); 533 n_prev_added = n_added; 534 n_local_dofs += n_added; 535 if (!n_added) break; 536 } 537 PetscCall(PetscBTDestroy(&touched)); 538 539 /* IS for I layer dofs in original numbering */ 540 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer)); 541 PetscCall(PetscFree(local_numbering)); 542 PetscCall(ISSort(is_I_layer)); 543 /* IS for I layer dofs in I numbering */ 544 if (!sub_schurs->schur_explicit) { 545 ISLocalToGlobalMapping ItoNmap; 546 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap)); 547 PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I)); 548 PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap)); 549 550 /* II block */ 551 PetscCall(MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II)); 552 } 553 } else { 554 PetscInt n_I; 555 556 /* IS for I dofs in original numbering */ 557 PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I)); 558 is_I_layer = sub_schurs->is_I; 559 560 /* IS for I dofs in I numbering (strided 1) */ 561 if (!sub_schurs->schur_explicit) { 562 PetscCall(ISGetSize(sub_schurs->is_I, &n_I)); 563 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I)); 564 565 /* II block is the same */ 566 PetscCall(PetscObjectReference((PetscObject)A_II)); 567 AE_II = A_II; 568 } 569 } 570 571 /* Get info on subset sizes and sum of all subsets sizes */ 572 max_subset_size = 0; 573 local_size = 0; 574 for (i = 0; i < sub_schurs->n_subs; i++) { 575 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 576 max_subset_size = PetscMax(subset_size, max_subset_size); 577 local_size += subset_size; 578 } 579 580 /* Work arrays for local indices */ 581 extra = 0; 582 PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B)); 583 if (sub_schurs->schur_explicit && is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &extra)); 584 PetscCall(PetscMalloc1(n_B + extra, &all_local_idx_N)); 585 if (extra) { 586 const PetscInt *idxs; 587 PetscCall(ISGetIndices(is_I_layer, &idxs)); 588 PetscCall(PetscArraycpy(all_local_idx_N, idxs, extra)); 589 PetscCall(ISRestoreIndices(is_I_layer, &idxs)); 590 } 591 PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum1)); 592 PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum2)); 593 594 /* Get local indices in local numbering */ 595 local_size = 0; 596 local_stash_size = 0; 597 for (i = 0; i < sub_schurs->n_subs; i++) { 598 const PetscInt *idxs; 599 600 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 601 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 602 /* start (smallest in global ordering) and multiplicity */ 603 auxnum1[i] = idxs[0]; 604 auxnum2[i] = subset_size * subset_size; 605 /* subset indices in local numbering */ 606 PetscCall(PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size)); 607 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 608 local_size += subset_size; 609 local_stash_size += subset_size * subset_size; 610 } 611 612 /* allocate extra workspace needed only for GETRI or SYTRF */ 613 use_potr = use_sytr = PETSC_FALSE; 614 if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) { 615 use_potr = PETSC_TRUE; 616 } else if (sub_schurs->is_symmetric) { 617 use_sytr = PETSC_TRUE; 618 } 619 if (local_size && !use_potr) { 620 PetscScalar lwork, dummyscalar = 0.; 621 PetscBLASInt dummyint = 0; 622 623 B_lwork = -1; 624 PetscCall(PetscBLASIntCast(local_size, &B_N)); 625 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 626 if (use_sytr) { 627 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr)); 628 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 629 } else { 630 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr)); 631 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 632 } 633 PetscCall(PetscFPTrapPop()); 634 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 635 PetscCall(PetscMalloc2(B_lwork, &Bwork, B_N, &pivots)); 636 } else { 637 Bwork = NULL; 638 pivots = NULL; 639 } 640 641 /* prepare data for summing up properly schurs on subsets */ 642 PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n)); 643 PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets)); 644 PetscCall(ISDestroy(&all_subsets_n)); 645 PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult)); 646 PetscCall(ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n)); 647 PetscCall(ISDestroy(&all_subsets)); 648 PetscCall(ISDestroy(&all_subsets_mult)); 649 PetscCall(ISGetLocalSize(all_subsets_n, &i)); 650 PetscCheck(i == local_stash_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT, i, local_stash_size); 651 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash)); 652 PetscCall(VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash)); 653 PetscCall(VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash)); 654 PetscCall(ISDestroy(&all_subsets_n)); 655 656 /* subset indices in local boundary numbering */ 657 if (!sub_schurs->is_Ej_all) { 658 PetscInt *all_local_idx_B; 659 660 PetscCall(PetscMalloc1(local_size, &all_local_idx_B)); 661 PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B)); 662 PetscCheck(subset_size == local_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT, subset_size, local_size); 663 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all)); 664 } 665 666 if (change) { 667 ISLocalToGlobalMapping BtoS; 668 IS change_primal_B; 669 IS change_primal_all; 670 671 PetscCheck(!sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 672 PetscCheck(!sub_schurs->change, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 673 PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub)); 674 for (i = 0; i < sub_schurs->n_subs; i++) { 675 ISLocalToGlobalMapping NtoS; 676 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS)); 677 PetscCall(ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i])); 678 PetscCall(ISLocalToGlobalMappingDestroy(&NtoS)); 679 } 680 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B)); 681 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS)); 682 PetscCall(ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all)); 683 PetscCall(ISLocalToGlobalMappingDestroy(&BtoS)); 684 PetscCall(ISDestroy(&change_primal_B)); 685 PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change)); 686 for (i = 0; i < sub_schurs->n_subs; i++) { 687 Mat change_sub; 688 689 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 690 PetscCall(KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i])); 691 PetscCall(KSPSetNestLevel(sub_schurs->change[i], 1)); /* do not seem to have direct access to a PC from which to get the level of nests */ 692 PetscCall(KSPSetType(sub_schurs->change[i], KSPPREONLY)); 693 if (!sub_schurs->change_with_qr) { 694 PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub)); 695 } else { 696 Mat change_subt; 697 PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt)); 698 PetscCall(MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub)); 699 PetscCall(MatDestroy(&change_subt)); 700 } 701 PetscCall(KSPSetOperators(sub_schurs->change[i], change_sub, change_sub)); 702 PetscCall(MatDestroy(&change_sub)); 703 PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix)); 704 PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_")); 705 } 706 PetscCall(ISDestroy(&change_primal_all)); 707 } 708 709 /* Local matrix of all local Schur on subsets (transposed) */ 710 if (!sub_schurs->S_Ej_all) { 711 Mat T; 712 PetscScalar *v; 713 PetscInt *ii, *jj; 714 PetscInt cum, i, j, k; 715 716 /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks) 717 Allocate properly a representative matrix and duplicate */ 718 PetscCall(PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v)); 719 ii[0] = 0; 720 cum = 0; 721 for (i = 0; i < sub_schurs->n_subs; i++) { 722 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 723 for (j = 0; j < subset_size; j++) { 724 const PetscInt row = cum + j; 725 PetscInt col = cum; 726 727 ii[row + 1] = ii[row] + subset_size; 728 for (k = ii[row]; k < ii[row + 1]; k++) { 729 jj[k] = col; 730 col++; 731 } 732 } 733 cum += subset_size; 734 } 735 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T)); 736 PetscCall(MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all)); 737 PetscCall(MatDestroy(&T)); 738 PetscCall(PetscFree3(ii, jj, v)); 739 } 740 /* matrices for deluxe scaling and adaptive selection */ 741 if (compute_Stilda) { 742 if (!sub_schurs->sum_S_Ej_tilda_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all)); 743 if (!sub_schurs->sum_S_Ej_inv_all && deluxe) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all)); 744 } 745 746 /* Compute Schur complements explicitly */ 747 F = NULL; 748 if (!sub_schurs->schur_explicit) { 749 /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested; 750 it is not efficient, unless the economic version of the scaling is used */ 751 Mat S_Ej_expl; 752 PetscScalar *work; 753 PetscInt j, *dummy_idx; 754 PetscBool Sdense; 755 756 PetscCall(PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work)); 757 local_size = 0; 758 for (i = 0; i < sub_schurs->n_subs; i++) { 759 IS is_subset_B; 760 Mat AE_EE, AE_IE, AE_EI, S_Ej; 761 762 /* subsets in original and boundary numbering */ 763 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B)); 764 /* EE block */ 765 PetscCall(MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE)); 766 /* IE block */ 767 PetscCall(MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE)); 768 /* EI block */ 769 if (sub_schurs->is_symmetric) { 770 PetscCall(MatCreateTranspose(AE_IE, &AE_EI)); 771 } else if (sub_schurs->is_hermitian) { 772 PetscCall(MatCreateHermitianTranspose(AE_IE, &AE_EI)); 773 } else { 774 PetscCall(MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI)); 775 } 776 PetscCall(ISDestroy(&is_subset_B)); 777 PetscCall(MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej)); 778 PetscCall(MatDestroy(&AE_EE)); 779 PetscCall(MatDestroy(&AE_IE)); 780 PetscCall(MatDestroy(&AE_EI)); 781 if (AE_II == A_II) { /* we can reuse the same ksp */ 782 KSP ksp; 783 PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &ksp)); 784 PetscCall(MatSchurComplementSetKSP(S_Ej, ksp)); 785 } else { /* build new ksp object which inherits ksp and pc types from the original one */ 786 KSP origksp, schurksp; 787 PC origpc, schurpc; 788 KSPType ksp_type; 789 PetscInt n_internal; 790 PetscBool ispcnone; 791 792 PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &origksp)); 793 PetscCall(MatSchurComplementGetKSP(S_Ej, &schurksp)); 794 PetscCall(KSPGetType(origksp, &ksp_type)); 795 PetscCall(KSPSetType(schurksp, ksp_type)); 796 PetscCall(KSPGetPC(schurksp, &schurpc)); 797 PetscCall(KSPGetPC(origksp, &origpc)); 798 PetscCall(PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone)); 799 if (!ispcnone) { 800 PCType pc_type; 801 PetscCall(PCGetType(origpc, &pc_type)); 802 PetscCall(PCSetType(schurpc, pc_type)); 803 } else { 804 PetscCall(PCSetType(schurpc, PCLU)); 805 } 806 PetscCall(ISGetSize(is_I, &n_internal)); 807 if (!n_internal) { /* UMFPACK gives error with 0 sized problems */ 808 MatSolverType solver = NULL; 809 PetscCall(PCFactorGetMatSolverType(origpc, &solver)); 810 if (solver) PetscCall(PCFactorSetMatSolverType(schurpc, solver)); 811 } 812 PetscCall(KSPSetUp(schurksp)); 813 } 814 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 815 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl)); 816 PetscCall(PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl)); 817 PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense)); 818 PetscCheck(Sdense, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices"); 819 for (j = 0; j < subset_size; j++) dummy_idx[j] = local_size + j; 820 PetscCall(MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES)); 821 PetscCall(MatDestroy(&S_Ej)); 822 PetscCall(MatDestroy(&S_Ej_expl)); 823 local_size += subset_size; 824 } 825 PetscCall(PetscFree2(dummy_idx, work)); 826 /* free */ 827 PetscCall(ISDestroy(&is_I)); 828 PetscCall(MatDestroy(&AE_II)); 829 PetscCall(PetscFree(all_local_idx_N)); 830 } else { 831 Mat A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL; 832 Mat *gdswA; 833 Vec Dall = NULL; 834 IS is_A_all, *is_p_r = NULL; 835 MatType Stype; 836 PetscScalar *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL; 837 PetscScalar *SEj_arr = NULL, *SEjinv_arr = NULL; 838 const PetscScalar *rS_data; 839 PetscInt n, n_I, size_schur, size_active_schur, cum, cum2; 840 PetscBool economic, solver_S, S_lower_triangular = PETSC_FALSE; 841 PetscBool schur_has_vertices, factor_workaround; 842 PetscBool use_cholesky; 843 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 844 PetscBool oldpin; 845 #endif 846 847 /* get sizes */ 848 n_I = 0; 849 if (is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &n_I)); 850 economic = PETSC_FALSE; 851 PetscCall(ISGetLocalSize(sub_schurs->is_I, &cum)); 852 if (cum != n_I) economic = PETSC_TRUE; 853 PetscCall(MatGetLocalSize(sub_schurs->A, &n, NULL)); 854 size_active_schur = local_size; 855 856 /* import scaling vector (wrong formulation if we have 3D edges) */ 857 if (scaling && compute_Stilda) { 858 const PetscScalar *array; 859 PetscScalar *array2; 860 const PetscInt *idxs; 861 PetscInt i; 862 863 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs)); 864 PetscCall(VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall)); 865 PetscCall(VecGetArrayRead(scaling, &array)); 866 PetscCall(VecGetArray(Dall, &array2)); 867 for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]]; 868 PetscCall(VecRestoreArray(Dall, &array2)); 869 PetscCall(VecRestoreArrayRead(scaling, &array)); 870 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs)); 871 deluxe = PETSC_FALSE; 872 } 873 874 /* size active schurs does not count any dirichlet or vertex dof on the interface */ 875 factor_workaround = PETSC_FALSE; 876 schur_has_vertices = PETSC_FALSE; 877 cum = n_I + size_active_schur; 878 if (sub_schurs->is_dir) { 879 const PetscInt *idxs; 880 PetscInt n_dir; 881 882 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir)); 883 PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs)); 884 PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_dir)); 885 PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs)); 886 cum += n_dir; 887 if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 888 } 889 /* include the primal vertices in the Schur complement */ 890 if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) { 891 PetscInt n_v; 892 893 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v)); 894 if (n_v) { 895 const PetscInt *idxs; 896 897 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 898 PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_v)); 899 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 900 cum += n_v; 901 if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 902 schur_has_vertices = PETSC_TRUE; 903 } 904 } 905 size_schur = cum - n_I; 906 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all)); 907 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 908 oldpin = sub_schurs->A->boundtocpu; 909 PetscCall(MatBindToCPU(sub_schurs->A, PETSC_TRUE)); 910 #endif 911 if (cum == n) { 912 PetscCall(ISSetPermutation(is_A_all)); 913 PetscCall(MatPermute(sub_schurs->A, is_A_all, is_A_all, &A)); 914 } else { 915 PetscCall(MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A)); 916 } 917 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 918 PetscCall(MatBindToCPU(sub_schurs->A, oldpin)); 919 #endif 920 PetscCall(MatSetOptionsPrefixFactor(A, sub_schurs->prefix)); 921 PetscCall(MatAppendOptionsPrefixFactor(A, "sub_schurs_")); 922 923 /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory 924 this is a workaround */ 925 if (benign_n) { 926 Vec v, benign_AIIm1_ones; 927 ISLocalToGlobalMapping N_to_reor; 928 IS is_p0, is_p0_p; 929 PetscScalar *cs_AIB, *AIIm1_data; 930 PetscInt sizeA; 931 932 PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor)); 933 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 934 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p)); 935 PetscCall(ISDestroy(&is_p0)); 936 PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones)); 937 PetscCall(VecGetSize(v, &sizeA)); 938 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat)); 939 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat)); 940 PetscCall(MatDenseGetArray(cs_AIB_mat, &cs_AIB)); 941 PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data)); 942 PetscCall(PetscMalloc1(benign_n, &is_p_r)); 943 /* compute colsum of A_IB restricted to pressures */ 944 for (i = 0; i < benign_n; i++) { 945 const PetscScalar *array; 946 const PetscInt *idxs; 947 PetscInt j, nz; 948 949 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i])); 950 PetscCall(ISGetLocalSize(is_p_r[i], &nz)); 951 PetscCall(ISGetIndices(is_p_r[i], &idxs)); 952 for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.; 953 PetscCall(ISRestoreIndices(is_p_r[i], &idxs)); 954 PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i)); 955 PetscCall(MatMult(A, benign_AIIm1_ones, v)); 956 PetscCall(VecResetArray(benign_AIIm1_ones)); 957 PetscCall(VecGetArrayRead(v, &array)); 958 for (j = 0; j < size_schur; j++) { 959 #if defined(PETSC_USE_COMPLEX) 960 cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz)); 961 #else 962 cs_AIB[i * size_schur + j] = array[j + n_I] / nz; 963 #endif 964 } 965 PetscCall(VecRestoreArrayRead(v, &array)); 966 } 967 PetscCall(MatDenseRestoreArray(cs_AIB_mat, &cs_AIB)); 968 PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data)); 969 PetscCall(VecDestroy(&v)); 970 PetscCall(VecDestroy(&benign_AIIm1_ones)); 971 PetscCall(MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 972 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 973 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 974 PetscCall(MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL)); 975 PetscCall(ISDestroy(&is_p0_p)); 976 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor)); 977 } 978 PetscCall(MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric)); 979 PetscCall(MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian)); 980 PetscCall(MatSetOption(A, MAT_SPD, sub_schurs->is_posdef)); 981 982 /* for complexes, symmetric and hermitian at the same time implies null imaginary part */ 983 use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric); 984 985 /* when using the benign subspace trick, the local Schur complements are SPD */ 986 /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization 987 Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */ 988 if (benign_trick) { 989 sub_schurs->is_posdef = PETSC_TRUE; 990 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg)); 991 if (flg) use_cholesky = PETSC_FALSE; 992 } 993 994 if (n_I) { 995 IS is_schur; 996 char stype[64]; 997 PetscBool gpu = PETSC_FALSE; 998 999 if (use_cholesky) { 1000 PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_CHOLESKY, &F)); 1001 } else { 1002 PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_LU, &F)); 1003 } 1004 PetscCheck(F, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)A)->type_name); 1005 PetscCall(MatSetErrorIfFailure(A, PETSC_TRUE)); 1006 #if defined(PETSC_HAVE_MKL_PARDISO) 1007 if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10)); 1008 #endif 1009 /* subsets ordered last */ 1010 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur)); 1011 PetscCall(MatFactorSetSchurIS(F, is_schur)); 1012 PetscCall(ISDestroy(&is_schur)); 1013 1014 /* factorization step */ 1015 if (use_cholesky) { 1016 PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL)); 1017 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 1018 PetscCall(MatMumpsSetIcntl(F, 19, 2)); 1019 #endif 1020 PetscCall(MatCholeskyFactorNumeric(F, A, NULL)); 1021 S_lower_triangular = PETSC_TRUE; 1022 } else { 1023 PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL)); 1024 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */ 1025 PetscCall(MatMumpsSetIcntl(F, 19, 3)); 1026 #endif 1027 PetscCall(MatLUFactorNumeric(F, A, NULL)); 1028 } 1029 PetscCall(MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view")); 1030 1031 if (matl_dbg_viewer) { 1032 Mat S; 1033 IS is; 1034 1035 PetscCall(PetscObjectSetName((PetscObject)A, "A")); 1036 PetscCall(MatView(A, matl_dbg_viewer)); 1037 PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); 1038 PetscCall(PetscObjectSetName((PetscObject)S, "S")); 1039 PetscCall(MatView(S, matl_dbg_viewer)); 1040 PetscCall(MatDestroy(&S)); 1041 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is)); 1042 PetscCall(PetscObjectSetName((PetscObject)is, "I")); 1043 PetscCall(ISView(is, matl_dbg_viewer)); 1044 PetscCall(ISDestroy(&is)); 1045 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is)); 1046 PetscCall(PetscObjectSetName((PetscObject)is, "B")); 1047 PetscCall(ISView(is, matl_dbg_viewer)); 1048 PetscCall(ISDestroy(&is)); 1049 PetscCall(PetscObjectSetName((PetscObject)is_A_all, "IA")); 1050 PetscCall(ISView(is_A_all, matl_dbg_viewer)); 1051 for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { 1052 IS is; 1053 char name[16]; 1054 1055 PetscCall(PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i)); 1056 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1057 PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is)); 1058 PetscCall(PetscObjectSetName((PetscObject)is, name)); 1059 PetscCall(ISView(is, matl_dbg_viewer)); 1060 PetscCall(ISDestroy(&is)); 1061 if (sub_schurs->change) { 1062 Mat T; 1063 1064 PetscCall(PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i)); 1065 PetscCall(KSPGetOperators(sub_schurs->change[i], &T, NULL)); 1066 PetscCall(PetscObjectSetName((PetscObject)T, name)); 1067 PetscCall(MatView(T, matl_dbg_viewer)); 1068 PetscCall(PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i)); 1069 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name)); 1070 PetscCall(ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer)); 1071 } 1072 cum += subset_size; 1073 } 1074 PetscCall(PetscViewerFlush(matl_dbg_viewer)); 1075 } 1076 1077 /* get explicit Schur Complement computed during numeric factorization */ 1078 PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 1079 PetscCall(PetscStrncpy(stype, MATSEQDENSE, sizeof(stype))); 1080 #if defined(PETSC_HAVE_CUDA) 1081 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, "")); 1082 #endif 1083 if (gpu) PetscCall(PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype))); 1084 PetscCall(PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL)); 1085 PetscCall(MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all)); 1086 PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef)); 1087 PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian)); 1088 PetscCall(MatGetType(S_all, &Stype)); 1089 1090 /* we can reuse the solvers if we are not using the economic version */ 1091 reuse_solvers = (PetscBool)(reuse_solvers && !economic); 1092 if (!sub_schurs->gdsw) { 1093 factor_workaround = (PetscBool)(reuse_solvers && factor_workaround); 1094 if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE; 1095 } 1096 solver_S = PETSC_TRUE; 1097 1098 /* update the Schur complement with the change of basis on the pressures */ 1099 if (benign_n) { 1100 const PetscScalar *cs_AIB; 1101 PetscScalar *S_data, *AIIm1_data; 1102 Mat S2 = NULL, S3 = NULL; /* dbg */ 1103 PetscScalar *S2_data, *S3_data; /* dbg */ 1104 Vec v, benign_AIIm1_ones; 1105 PetscInt sizeA; 1106 1107 PetscCall(MatDenseGetArray(S_all, &S_data)); 1108 PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones)); 1109 PetscCall(VecGetSize(v, &sizeA)); 1110 #if defined(PETSC_HAVE_MUMPS) 1111 PetscCall(MatMumpsSetIcntl(F, 26, 0)); 1112 #endif 1113 #if defined(PETSC_HAVE_MKL_PARDISO) 1114 PetscCall(MatMkl_PardisoSetCntl(F, 70, 1)); 1115 #endif 1116 PetscCall(MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB)); 1117 PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data)); 1118 if (matl_dbg_viewer) { 1119 PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2)); 1120 PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3)); 1121 PetscCall(MatDenseGetArray(S2, &S2_data)); 1122 PetscCall(MatDenseGetArray(S3, &S3_data)); 1123 } 1124 for (i = 0; i < benign_n; i++) { 1125 PetscScalar *array, sum = 0., one = 1., *sums; 1126 const PetscInt *idxs; 1127 PetscInt k, j, nz; 1128 PetscBLASInt B_k, B_n; 1129 1130 PetscCall(PetscCalloc1(benign_n, &sums)); 1131 PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i)); 1132 PetscCall(VecCopy(benign_AIIm1_ones, v)); 1133 PetscCall(MatSolve(F, v, benign_AIIm1_ones)); 1134 PetscCall(MatMult(A, benign_AIIm1_ones, v)); 1135 PetscCall(VecResetArray(benign_AIIm1_ones)); 1136 /* p0 dofs (eliminated) are excluded from the sums */ 1137 for (k = 0; k < benign_n; k++) { 1138 PetscCall(ISGetLocalSize(is_p_r[k], &nz)); 1139 PetscCall(ISGetIndices(is_p_r[k], &idxs)); 1140 for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i]; 1141 PetscCall(ISRestoreIndices(is_p_r[k], &idxs)); 1142 } 1143 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 1144 if (matl_dbg_viewer) { 1145 Vec vv; 1146 char name[16]; 1147 1148 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv)); 1149 PetscCall(PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i)); 1150 PetscCall(PetscObjectSetName((PetscObject)vv, name)); 1151 PetscCall(VecView(vv, matl_dbg_viewer)); 1152 } 1153 /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */ 1154 /* cs_AIB already scaled by 1./nz */ 1155 B_k = 1; 1156 PetscCall(PetscBLASIntCast(size_schur, &B_n)); 1157 for (k = 0; k < benign_n; k++) { 1158 sum = sums[k]; 1159 1160 if (PetscAbsScalar(sum) == 0.0) continue; 1161 if (k == i) { 1162 if (B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1163 if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n)); 1164 } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */ 1165 sum /= 2.0; 1166 if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1167 if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n)); 1168 } 1169 } 1170 sum = 1.; 1171 if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1172 if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n)); 1173 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 1174 /* set p0 entry of AIIm1_ones to zero */ 1175 PetscCall(ISGetLocalSize(is_p_r[i], &nz)); 1176 PetscCall(ISGetIndices(is_p_r[i], &idxs)); 1177 for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.; 1178 PetscCall(ISRestoreIndices(is_p_r[i], &idxs)); 1179 PetscCall(PetscFree(sums)); 1180 } 1181 PetscCall(VecDestroy(&benign_AIIm1_ones)); 1182 if (matl_dbg_viewer) { 1183 PetscCall(MatDenseRestoreArray(S2, &S2_data)); 1184 PetscCall(MatDenseRestoreArray(S3, &S3_data)); 1185 } 1186 if (!S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */ 1187 PetscInt k, j; 1188 for (k = 0; k < size_schur; k++) { 1189 for (j = k; j < size_schur; j++) S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]); 1190 } 1191 } 1192 1193 /* restore defaults */ 1194 #if defined(PETSC_HAVE_MUMPS) 1195 PetscCall(MatMumpsSetIcntl(F, 26, -1)); 1196 #endif 1197 #if defined(PETSC_HAVE_MKL_PARDISO) 1198 PetscCall(MatMkl_PardisoSetCntl(F, 70, 0)); 1199 #endif 1200 PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB)); 1201 PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data)); 1202 PetscCall(VecDestroy(&v)); 1203 PetscCall(MatDenseRestoreArray(S_all, &S_data)); 1204 if (matl_dbg_viewer) { 1205 Mat S; 1206 1207 PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 1208 PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); 1209 PetscCall(PetscObjectSetName((PetscObject)S, "Sb")); 1210 PetscCall(MatView(S, matl_dbg_viewer)); 1211 PetscCall(MatDestroy(&S)); 1212 PetscCall(PetscObjectSetName((PetscObject)S2, "S2P")); 1213 PetscCall(MatView(S2, matl_dbg_viewer)); 1214 PetscCall(PetscObjectSetName((PetscObject)S3, "S3P")); 1215 PetscCall(MatView(S3, matl_dbg_viewer)); 1216 PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat, "cs")); 1217 PetscCall(MatView(cs_AIB_mat, matl_dbg_viewer)); 1218 PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 1219 } 1220 PetscCall(MatDestroy(&S2)); 1221 PetscCall(MatDestroy(&S3)); 1222 } 1223 if (!reuse_solvers) { 1224 for (i = 0; i < benign_n; i++) PetscCall(ISDestroy(&is_p_r[i])); 1225 PetscCall(PetscFree(is_p_r)); 1226 PetscCall(MatDestroy(&cs_AIB_mat)); 1227 PetscCall(MatDestroy(&benign_AIIm1_ones_mat)); 1228 } 1229 } else { /* we can't use MatFactor when size_schur == size_of_the_problem */ 1230 PetscCall(MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all)); 1231 PetscCall(MatGetType(S_all, &Stype)); 1232 reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */ 1233 factor_workaround = PETSC_FALSE; 1234 solver_S = PETSC_FALSE; 1235 } 1236 1237 if (reuse_solvers) { 1238 Mat A_II, pA_II, Afake; 1239 Vec vec1_B; 1240 PCBDDCReuseSolvers msolv_ctx; 1241 PetscInt n_R; 1242 1243 if (sub_schurs->reuse_solver) { 1244 PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1245 } else { 1246 PetscCall(PetscNew(&sub_schurs->reuse_solver)); 1247 } 1248 msolv_ctx = sub_schurs->reuse_solver; 1249 PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL)); 1250 PetscCall(PetscObjectReference((PetscObject)F)); 1251 msolv_ctx->F = F; 1252 PetscCall(MatCreateVecs(F, &msolv_ctx->sol, NULL)); 1253 /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */ 1254 { 1255 PetscScalar *array; 1256 PetscInt n; 1257 1258 PetscCall(VecGetLocalSize(msolv_ctx->sol, &n)); 1259 PetscCall(VecGetArray(msolv_ctx->sol, &array)); 1260 PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs)); 1261 PetscCall(VecRestoreArray(msolv_ctx->sol, &array)); 1262 } 1263 msolv_ctx->has_vertices = schur_has_vertices; 1264 1265 /* interior solver */ 1266 PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver)); 1267 PetscCall(PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II)); 1268 PetscCall(PCSetType(msolv_ctx->interior_solver, PCSHELL)); 1269 PetscCall(PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)")); 1270 PetscCall(PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx)); 1271 PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View)); 1272 PetscCall(PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior)); 1273 PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose)); 1274 if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy)); 1275 1276 /* correction solver */ 1277 if (!sub_schurs->gdsw) { 1278 PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver)); 1279 PetscCall(PCSetType(msolv_ctx->correction_solver, PCSHELL)); 1280 PetscCall(PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)")); 1281 PetscCall(PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx)); 1282 PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View)); 1283 PetscCall(PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction)); 1284 PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose)); 1285 1286 /* scatter and vecs for Schur complement solver */ 1287 PetscCall(MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B)); 1288 PetscCall(MatCreateVecs(sub_schurs->S, &vec1_B, NULL)); 1289 if (!schur_has_vertices) { 1290 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B)); 1291 PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B)); 1292 PetscCall(PetscObjectReference((PetscObject)is_A_all)); 1293 msolv_ctx->is_R = is_A_all; 1294 } else { 1295 IS is_B_all; 1296 const PetscInt *idxs; 1297 PetscInt dual, n_v, n; 1298 1299 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v)); 1300 dual = size_schur - n_v; 1301 PetscCall(ISGetLocalSize(is_A_all, &n)); 1302 PetscCall(ISGetIndices(is_A_all, &idxs)); 1303 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all)); 1304 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B)); 1305 PetscCall(ISDestroy(&is_B_all)); 1306 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all)); 1307 PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B)); 1308 PetscCall(ISDestroy(&is_B_all)); 1309 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R)); 1310 PetscCall(ISRestoreIndices(is_A_all, &idxs)); 1311 } 1312 PetscCall(ISGetLocalSize(msolv_ctx->is_R, &n_R)); 1313 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake)); 1314 PetscCall(MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY)); 1315 PetscCall(MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY)); 1316 PetscCall(PCSetOperators(msolv_ctx->correction_solver, Afake, Afake)); 1317 PetscCall(MatDestroy(&Afake)); 1318 PetscCall(VecDestroy(&vec1_B)); 1319 } 1320 /* communicate benign info to solver context */ 1321 if (benign_n) { 1322 PetscScalar *array; 1323 1324 msolv_ctx->benign_n = benign_n; 1325 msolv_ctx->benign_zerodiag_subs = is_p_r; 1326 PetscCall(PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals)); 1327 msolv_ctx->benign_csAIB = cs_AIB_mat; 1328 PetscCall(MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL)); 1329 PetscCall(VecGetArray(msolv_ctx->benign_corr_work, &array)); 1330 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec)); 1331 PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work, &array)); 1332 msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat; 1333 } 1334 } else { 1335 if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1336 PetscCall(PetscFree(sub_schurs->reuse_solver)); 1337 } 1338 PetscCall(MatDestroy(&A)); 1339 PetscCall(ISDestroy(&is_A_all)); 1340 1341 /* Work arrays */ 1342 PetscCall(PetscMalloc1(max_subset_size * max_subset_size, &work)); 1343 1344 /* S_Ej_all */ 1345 cum = cum2 = 0; 1346 PetscCall(MatDenseGetArrayRead(S_all, &rS_data)); 1347 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr)); 1348 if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr)); 1349 if (sub_schurs->gdsw) PetscCall(MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA)); 1350 for (i = 0; i < sub_schurs->n_subs; i++) { 1351 PetscInt j; 1352 1353 /* get S_E (or K^i_EE for GDSW) */ 1354 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1355 if (sub_schurs->gdsw) { 1356 Mat T; 1357 1358 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T)); 1359 PetscCall(MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T)); 1360 PetscCall(MatDestroy(&T)); 1361 } else { 1362 if (S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */ 1363 PetscInt k; 1364 for (k = 0; k < subset_size; k++) { 1365 for (j = k; j < subset_size; j++) { 1366 work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1367 work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]); 1368 } 1369 } 1370 } else { /* just copy to workspace */ 1371 PetscInt k; 1372 for (k = 0; k < subset_size; k++) { 1373 for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1374 } 1375 } 1376 } 1377 /* insert S_E values */ 1378 if (sub_schurs->change) { 1379 Mat change_sub, SEj, T; 1380 1381 /* change basis */ 1382 PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL)); 1383 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj)); 1384 if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 1385 Mat T2; 1386 PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2)); 1387 PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 1388 PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T)); 1389 PetscCall(MatDestroy(&T2)); 1390 } else { 1391 PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 1392 } 1393 PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN)); 1394 PetscCall(MatDestroy(&T)); 1395 PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL)); 1396 PetscCall(MatDestroy(&SEj)); 1397 } 1398 PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size)); 1399 if (compute_Stilda) { 1400 if (deluxe) { /* if adaptivity is requested, invert S_E blocks */ 1401 Mat M; 1402 const PetscScalar *vals; 1403 PetscBool isdense, isdensecuda; 1404 1405 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M)); 1406 PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef)); 1407 PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian)); 1408 if (!PetscBTLookup(sub_schurs->is_edge, i)) PetscCall(MatSetType(M, Stype)); 1409 PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense)); 1410 PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda)); 1411 if (use_cholesky) { 1412 PetscCall(MatCholeskyFactor(M, NULL, NULL)); 1413 } else { 1414 PetscCall(MatLUFactor(M, NULL, NULL, NULL)); 1415 } 1416 if (isdense) { 1417 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1418 #if defined(PETSC_HAVE_CUDA) 1419 } else if (isdensecuda) { 1420 PetscCall(MatSeqDenseCUDAInvertFactors_Internal(M)); 1421 #endif 1422 } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype); 1423 PetscCall(MatDenseGetArrayRead(M, &vals)); 1424 PetscCall(PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size)); 1425 PetscCall(MatDenseRestoreArrayRead(M, &vals)); 1426 PetscCall(MatDestroy(&M)); 1427 } else if (scaling) { /* not using deluxe */ 1428 Mat SEj; 1429 Vec D; 1430 PetscScalar *array; 1431 1432 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj)); 1433 PetscCall(VecGetArray(Dall, &array)); 1434 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D)); 1435 PetscCall(VecRestoreArray(Dall, &array)); 1436 PetscCall(VecShift(D, -1.)); 1437 PetscCall(MatDiagonalScale(SEj, D, D)); 1438 PetscCall(MatDestroy(&SEj)); 1439 PetscCall(VecDestroy(&D)); 1440 PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size)); 1441 } 1442 } 1443 cum += subset_size; 1444 cum2 += subset_size * (size_schur + 1); 1445 SEj_arr += subset_size * subset_size; 1446 if (SEjinv_arr) SEjinv_arr += subset_size * subset_size; 1447 } 1448 if (sub_schurs->gdsw) PetscCall(MatDestroySubMatrices(sub_schurs->n_subs, &gdswA)); 1449 PetscCall(MatDenseRestoreArrayRead(S_all, &rS_data)); 1450 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr)); 1451 if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr)); 1452 if (solver_S) PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 1453 1454 /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory 1455 however, preliminary tests indicate using GPUs is still faster in the solve phase */ 1456 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 1457 if (reuse_solvers) { 1458 Mat St; 1459 MatFactorSchurStatus st; 1460 1461 flg = PETSC_FALSE; 1462 PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL)); 1463 PetscCall(MatFactorGetSchurComplement(F, &St, &st)); 1464 PetscCall(MatBindToCPU(St, flg)); 1465 PetscCall(MatFactorRestoreSchurComplement(F, &St, st)); 1466 } 1467 #endif 1468 1469 schur_factor = NULL; 1470 if (compute_Stilda && size_active_schur) { 1471 if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */ 1472 PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1473 PetscCall(PetscArraycpy(SEjinv_arr, work, size_schur * size_schur)); 1474 PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1475 } else { 1476 Mat S_all_inv = NULL; 1477 1478 if (solver_S && !sub_schurs->gdsw) { 1479 /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1. 1480 The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */ 1481 if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */ 1482 PetscScalar *data; 1483 PetscInt nd = 0; 1484 1485 PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices"); 1486 PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL)); 1487 PetscCall(MatDenseGetArray(S_all_inv, &data)); 1488 if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 1489 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1490 } 1491 1492 /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */ 1493 if (schur_has_vertices) { 1494 Mat M; 1495 PetscScalar *tdata; 1496 PetscInt nv = 0, news; 1497 1498 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 1499 news = size_active_schur + nv; 1500 PetscCall(PetscCalloc1(news * news, &tdata)); 1501 for (i = 0; i < size_active_schur; i++) { 1502 PetscCall(PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i)); 1503 PetscCall(PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv)); 1504 } 1505 for (i = 0; i < nv; i++) { 1506 PetscInt k = i + size_active_schur; 1507 PetscCall(PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i)); 1508 } 1509 1510 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M)); 1511 PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE)); 1512 PetscCall(MatCholeskyFactor(M, NULL, NULL)); 1513 /* save the factors */ 1514 cum = 0; 1515 PetscCall(PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor)); 1516 for (i = 0; i < size_active_schur; i++) { 1517 PetscCall(PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i)); 1518 cum += size_active_schur - i; 1519 } 1520 for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)])); 1521 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1522 /* move back just the active dofs to the Schur complement */ 1523 for (i = 0; i < size_active_schur; i++) PetscCall(PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur)); 1524 PetscCall(PetscFree(tdata)); 1525 PetscCall(MatDestroy(&M)); 1526 } else { /* we can factorize and invert just the activedofs */ 1527 Mat M; 1528 PetscScalar *aux; 1529 1530 PetscCall(PetscMalloc1(nd, &aux)); 1531 for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)]; 1532 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M)); 1533 PetscCall(MatDenseSetLDA(M, size_schur)); 1534 PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE)); 1535 PetscCall(MatCholeskyFactor(M, NULL, NULL)); 1536 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1537 PetscCall(MatDestroy(&M)); 1538 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M)); 1539 PetscCall(MatZeroEntries(M)); 1540 PetscCall(MatDestroy(&M)); 1541 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M)); 1542 PetscCall(MatDenseSetLDA(M, size_schur)); 1543 PetscCall(MatZeroEntries(M)); 1544 PetscCall(MatDestroy(&M)); 1545 for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i]; 1546 PetscCall(PetscFree(aux)); 1547 } 1548 PetscCall(MatDenseRestoreArray(S_all_inv, &data)); 1549 } else { /* use MatFactor calls to invert S */ 1550 PetscCall(MatFactorInvertSchurComplement(F)); 1551 PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL)); 1552 } 1553 } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */ 1554 PetscCall(PetscObjectReference((PetscObject)S_all)); 1555 S_all_inv = S_all; 1556 PetscCall(MatDenseGetArray(S_all_inv, &S_data)); 1557 PetscCall(PetscBLASIntCast(size_schur, &B_N)); 1558 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1559 if (use_potr) { 1560 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr)); 1561 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 1562 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr)); 1563 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 1564 } else if (use_sytr) { 1565 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 1566 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 1567 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr)); 1568 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 1569 } else { 1570 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr)); 1571 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 1572 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 1573 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 1574 } 1575 PetscCall(PetscLogFlops(1.0 * size_schur * size_schur * size_schur)); 1576 PetscCall(PetscFPTrapPop()); 1577 PetscCall(MatDenseRestoreArray(S_all_inv, &S_data)); 1578 } else if (sub_schurs->gdsw) { 1579 Mat tS, tX, SEj, S_II, S_IE, S_EE; 1580 KSP pS_II; 1581 PC pS_II_pc; 1582 IS EE, II; 1583 PetscInt nS; 1584 1585 PetscCall(MatFactorCreateSchurComplement(F, &tS, NULL)); 1586 PetscCall(MatGetSize(tS, &nS, NULL)); 1587 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1588 for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */ 1589 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1590 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj)); 1591 1592 PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE)); 1593 PetscCall(ISComplement(EE, 0, nS, &II)); 1594 PetscCall(MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II)); 1595 PetscCall(MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE)); 1596 PetscCall(MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE)); 1597 PetscCall(ISDestroy(&II)); 1598 PetscCall(ISDestroy(&EE)); 1599 1600 PetscCall(KSPCreate(PETSC_COMM_SELF, &pS_II)); 1601 PetscCall(KSPSetNestLevel(pS_II, 1)); /* do not have direct access to a PC to provide the level of nesting of the KSP */ 1602 PetscCall(KSPSetType(pS_II, KSPPREONLY)); 1603 PetscCall(KSPGetPC(pS_II, &pS_II_pc)); 1604 PetscCall(PCSetType(pS_II_pc, PCSVD)); 1605 PetscCall(KSPSetOptionsPrefix(pS_II, sub_schurs->prefix)); 1606 PetscCall(KSPAppendOptionsPrefix(pS_II, "pseudo_")); 1607 PetscCall(KSPSetOperators(pS_II, S_II, S_II)); 1608 PetscCall(MatDestroy(&S_II)); 1609 PetscCall(KSPSetFromOptions(pS_II)); 1610 PetscCall(KSPSetUp(pS_II)); 1611 PetscCall(MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX)); 1612 PetscCall(KSPMatSolve(pS_II, S_IE, tX)); 1613 PetscCall(KSPDestroy(&pS_II)); 1614 1615 PetscCall(MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DETERMINE, &SEj)); 1616 PetscCall(MatDestroy(&S_IE)); 1617 PetscCall(MatDestroy(&tX)); 1618 PetscCall(MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN)); 1619 PetscCall(MatDestroy(&S_EE)); 1620 1621 PetscCall(MatDestroy(&SEj)); 1622 cum += subset_size; 1623 SEjinv_arr += subset_size * subset_size; 1624 } 1625 PetscCall(MatDestroy(&tS)); 1626 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1627 } 1628 /* S_Ej_tilda_all */ 1629 cum = cum2 = 0; 1630 rS_data = NULL; 1631 if (S_all_inv) PetscCall(MatDenseGetArrayRead(S_all_inv, &rS_data)); 1632 PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1633 for (i = 0; i < sub_schurs->n_subs; i++) { 1634 PetscInt j; 1635 1636 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1637 /* get (St^-1)_E */ 1638 /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1 1639 will be properly accessed later during adaptive selection */ 1640 if (rS_data) { 1641 if (S_lower_triangular) { 1642 PetscInt k; 1643 if (sub_schurs->change) { 1644 for (k = 0; k < subset_size; k++) { 1645 for (j = k; j < subset_size; j++) { 1646 work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1647 work[j * subset_size + k] = work[k * subset_size + j]; 1648 } 1649 } 1650 } else { 1651 for (k = 0; k < subset_size; k++) { 1652 for (j = k; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1653 } 1654 } 1655 } else { 1656 PetscInt k; 1657 for (k = 0; k < subset_size; k++) { 1658 for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1659 } 1660 } 1661 } 1662 if (sub_schurs->change) { 1663 Mat change_sub, SEj, T; 1664 PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL; 1665 1666 /* change basis */ 1667 PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL)); 1668 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, rS_data ? work : SEjinv_arr, &SEj)); 1669 if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 1670 Mat T2; 1671 PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2)); 1672 PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 1673 PetscCall(MatDestroy(&T2)); 1674 PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T)); 1675 } else { 1676 PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 1677 } 1678 PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN)); 1679 PetscCall(MatDestroy(&T)); 1680 PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL)); 1681 PetscCall(MatDestroy(&SEj)); 1682 } 1683 if (rS_data) PetscCall(PetscArraycpy(SEjinv_arr, work, subset_size * subset_size)); 1684 cum += subset_size; 1685 cum2 += subset_size * (size_schur + 1); 1686 SEjinv_arr += subset_size * subset_size; 1687 } 1688 PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1689 if (S_all_inv) { 1690 PetscCall(MatDenseRestoreArrayRead(S_all_inv, &rS_data)); 1691 if (solver_S) { 1692 if (schur_has_vertices) { 1693 PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED)); 1694 } else { 1695 PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED)); 1696 } 1697 } 1698 } 1699 PetscCall(MatDestroy(&S_all_inv)); 1700 } 1701 1702 /* move back factors if needed */ 1703 if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) { 1704 Mat S_tmp; 1705 PetscInt nd = 0; 1706 PetscScalar *data; 1707 1708 PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices"); 1709 PetscCheck(solver_S, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 1710 PetscCall(MatFactorGetSchurComplement(F, &S_tmp, NULL)); 1711 PetscCall(MatDenseGetArray(S_tmp, &data)); 1712 PetscCall(PetscArrayzero(data, size_schur * size_schur)); 1713 1714 if (S_lower_triangular) { 1715 cum = 0; 1716 for (i = 0; i < size_active_schur; i++) { 1717 PetscCall(PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i)); 1718 cum += size_active_schur - i; 1719 } 1720 } else { 1721 PetscCall(PetscArraycpy(data, schur_factor, size_schur * size_schur)); 1722 } 1723 if (sub_schurs->is_dir) { 1724 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1725 for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i]; 1726 } 1727 /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions, 1728 set the diagonal entry of the Schur factor to a very large value */ 1729 for (i = size_active_schur + nd; i < size_schur; i++) data[i * (size_schur + 1)] = infty; 1730 PetscCall(MatDenseRestoreArray(S_tmp, &data)); 1731 PetscCall(MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED)); 1732 } 1733 } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */ 1734 PetscScalar *data; 1735 PetscInt nd = 0; 1736 1737 if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 1738 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1739 } 1740 PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 1741 PetscCall(MatDenseGetArray(S_all, &data)); 1742 for (i = 0; i < size_active_schur; i++) PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur)); 1743 for (i = size_active_schur + nd; i < size_schur; i++) { 1744 PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur)); 1745 data[i * (size_schur + 1)] = infty; 1746 } 1747 PetscCall(MatDenseRestoreArray(S_all, &data)); 1748 PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 1749 } 1750 PetscCall(PetscFree(work)); 1751 PetscCall(PetscFree(schur_factor)); 1752 PetscCall(VecDestroy(&Dall)); 1753 } 1754 PetscCall(ISDestroy(&is_I_layer)); 1755 PetscCall(MatDestroy(&S_all)); 1756 PetscCall(MatDestroy(&A_BB)); 1757 PetscCall(MatDestroy(&A_IB)); 1758 PetscCall(MatDestroy(&A_BI)); 1759 PetscCall(MatDestroy(&F)); 1760 1761 PetscCall(PetscMalloc1(sub_schurs->n_subs, &nnz)); 1762 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &nnz[i])); 1763 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, sub_schurs->n_subs, nnz, PETSC_OWN_POINTER, &is_I_layer)); 1764 PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all, sub_schurs->n_subs, nnz)); 1765 PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY)); 1766 PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY)); 1767 if (compute_Stilda) { 1768 PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all, sub_schurs->n_subs, nnz)); 1769 PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY)); 1770 PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY)); 1771 if (deluxe) { 1772 PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all, sub_schurs->n_subs, nnz)); 1773 PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY)); 1774 PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY)); 1775 } 1776 } 1777 PetscCall(ISDestroy(&is_I_layer)); 1778 1779 /* Get local part of (\sum_j S_Ej) */ 1780 if (!sub_schurs->sum_S_Ej_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all)); 1781 PetscCall(VecSet(gstash, 0.0)); 1782 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray)); 1783 PetscCall(VecPlaceArray(lstash, stasharray)); 1784 PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1785 PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1786 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray)); 1787 PetscCall(VecResetArray(lstash)); 1788 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray)); 1789 PetscCall(VecPlaceArray(lstash, stasharray)); 1790 PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1791 PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1792 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray)); 1793 PetscCall(VecResetArray(lstash)); 1794 1795 /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */ 1796 if (compute_Stilda) { 1797 PetscCall(VecSet(gstash, 0.0)); 1798 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray)); 1799 PetscCall(VecPlaceArray(lstash, stasharray)); 1800 PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1801 PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1802 PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1803 PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1804 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray)); 1805 PetscCall(VecResetArray(lstash)); 1806 if (deluxe) { 1807 PetscCall(VecSet(gstash, 0.0)); 1808 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray)); 1809 PetscCall(VecPlaceArray(lstash, stasharray)); 1810 PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1811 PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1812 PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1813 PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1814 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray)); 1815 PetscCall(VecResetArray(lstash)); 1816 } else if (!sub_schurs->gdsw) { 1817 PetscScalar *array; 1818 PetscInt cum; 1819 1820 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array)); 1821 cum = 0; 1822 for (i = 0; i < sub_schurs->n_subs; i++) { 1823 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1824 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 1825 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1826 if (use_potr) { 1827 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr)); 1828 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 1829 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr)); 1830 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 1831 } else if (use_sytr) { 1832 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 1833 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 1834 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr)); 1835 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 1836 } else { 1837 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr)); 1838 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 1839 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 1840 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 1841 } 1842 PetscCall(PetscLogFlops(1.0 * subset_size * subset_size * subset_size)); 1843 PetscCall(PetscFPTrapPop()); 1844 cum += subset_size * subset_size; 1845 } 1846 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array)); 1847 PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all)); 1848 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 1849 sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all; 1850 } 1851 } 1852 PetscCall(VecDestroy(&lstash)); 1853 PetscCall(VecDestroy(&gstash)); 1854 PetscCall(VecScatterDestroy(&sstash)); 1855 1856 if (matl_dbg_viewer) { 1857 if (sub_schurs->S_Ej_all) { 1858 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE")); 1859 PetscCall(MatView(sub_schurs->S_Ej_all, matl_dbg_viewer)); 1860 } 1861 if (sub_schurs->sum_S_Ej_all) { 1862 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE")); 1863 PetscCall(MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer)); 1864 } 1865 if (sub_schurs->sum_S_Ej_inv_all) { 1866 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm")); 1867 PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer)); 1868 } 1869 if (sub_schurs->sum_S_Ej_tilda_all) { 1870 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt")); 1871 PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer)); 1872 } 1873 } 1874 1875 /* free workspace */ 1876 if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer)); 1877 if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n)); 1878 PetscCall(PetscViewerDestroy(&matl_dbg_viewer)); 1879 PetscCall(PetscFree2(Bwork, pivots)); 1880 PetscCall(PetscCommDestroy(&comm_n)); 1881 PetscFunctionReturn(PETSC_SUCCESS); 1882 } 1883 1884 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw) 1885 { 1886 IS *faces, *edges, *all_cc, vertices; 1887 PetscInt s, i, n_faces, n_edges, n_all_cc; 1888 PetscBool is_sorted, ispardiso, ismumps; 1889 1890 PetscFunctionBegin; 1891 PetscCall(ISSorted(is_I, &is_sorted)); 1892 PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_I), PETSC_ERR_PLIB, "IS for I dofs should be shorted"); 1893 PetscCall(ISSorted(is_B, &is_sorted)); 1894 PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_B), PETSC_ERR_PLIB, "IS for B dofs should be shorted"); 1895 1896 /* reset any previous data */ 1897 PetscCall(PCBDDCSubSchursReset(sub_schurs)); 1898 1899 sub_schurs->gdsw = gdsw; 1900 1901 /* get index sets for faces and edges (already sorted by global ordering) */ 1902 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices)); 1903 n_all_cc = n_faces + n_edges; 1904 PetscCall(PetscBTCreate(n_all_cc, &sub_schurs->is_edge)); 1905 PetscCall(PetscMalloc1(n_all_cc, &all_cc)); 1906 n_all_cc = 0; 1907 for (i = 0; i < n_faces; i++) { 1908 PetscCall(ISGetSize(faces[i], &s)); 1909 if (!s) continue; 1910 if (copycc) { 1911 PetscCall(ISDuplicate(faces[i], &all_cc[n_all_cc])); 1912 } else { 1913 PetscCall(PetscObjectReference((PetscObject)faces[i])); 1914 all_cc[n_all_cc] = faces[i]; 1915 } 1916 n_all_cc++; 1917 } 1918 for (i = 0; i < n_edges; i++) { 1919 PetscCall(ISGetSize(edges[i], &s)); 1920 if (!s) continue; 1921 if (copycc) { 1922 PetscCall(ISDuplicate(edges[i], &all_cc[n_all_cc])); 1923 } else { 1924 PetscCall(PetscObjectReference((PetscObject)edges[i])); 1925 all_cc[n_all_cc] = edges[i]; 1926 } 1927 PetscCall(PetscBTSet(sub_schurs->is_edge, n_all_cc)); 1928 n_all_cc++; 1929 } 1930 PetscCall(PetscObjectReference((PetscObject)vertices)); 1931 sub_schurs->is_vertices = vertices; 1932 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices)); 1933 sub_schurs->is_dir = NULL; 1934 PetscCall(PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir)); 1935 1936 /* Determine if MatFactor can be used */ 1937 PetscCall(PetscStrallocpy(prefix, &sub_schurs->prefix)); 1938 #if defined(PETSC_HAVE_MUMPS) 1939 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type))); 1940 #elif defined(PETSC_HAVE_MKL_PARDISO) 1941 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type))); 1942 #else 1943 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type))); 1944 #endif 1945 #if defined(PETSC_USE_COMPLEX) 1946 sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */ 1947 #else 1948 sub_schurs->is_hermitian = PETSC_TRUE; 1949 #endif 1950 sub_schurs->is_posdef = PETSC_TRUE; 1951 sub_schurs->is_symmetric = PETSC_TRUE; 1952 sub_schurs->debug = PETSC_FALSE; 1953 sub_schurs->restrict_comm = PETSC_FALSE; 1954 PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 1955 PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL)); 1956 PetscCall(PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL)); 1957 PetscCall(PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL)); 1958 PetscCall(PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL)); 1959 PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL)); 1960 PetscCall(PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL)); 1961 PetscOptionsEnd(); 1962 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps)); 1963 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso)); 1964 sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps); 1965 1966 /* for reals, symmetric and Hermitian are synonyms */ 1967 #if !defined(PETSC_USE_COMPLEX) 1968 sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian); 1969 sub_schurs->is_hermitian = sub_schurs->is_symmetric; 1970 #endif 1971 1972 PetscCall(PetscObjectReference((PetscObject)is_I)); 1973 sub_schurs->is_I = is_I; 1974 PetscCall(PetscObjectReference((PetscObject)is_B)); 1975 sub_schurs->is_B = is_B; 1976 PetscCall(PetscObjectReference((PetscObject)graph->l2gmap)); 1977 sub_schurs->l2gmap = graph->l2gmap; 1978 PetscCall(PetscObjectReference((PetscObject)BtoNmap)); 1979 sub_schurs->BtoNmap = BtoNmap; 1980 sub_schurs->n_subs = n_all_cc; 1981 sub_schurs->is_subs = all_cc; 1982 sub_schurs->S_Ej_all = NULL; 1983 sub_schurs->sum_S_Ej_all = NULL; 1984 sub_schurs->sum_S_Ej_inv_all = NULL; 1985 sub_schurs->sum_S_Ej_tilda_all = NULL; 1986 sub_schurs->is_Ej_all = NULL; 1987 PetscFunctionReturn(PETSC_SUCCESS); 1988 } 1989 1990 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) 1991 { 1992 PCBDDCSubSchurs schurs_ctx; 1993 1994 PetscFunctionBegin; 1995 PetscCall(PetscNew(&schurs_ctx)); 1996 schurs_ctx->n_subs = 0; 1997 *sub_schurs = schurs_ctx; 1998 PetscFunctionReturn(PETSC_SUCCESS); 1999 } 2000 2001 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) 2002 { 2003 PetscInt i; 2004 2005 PetscFunctionBegin; 2006 if (!sub_schurs) PetscFunctionReturn(PETSC_SUCCESS); 2007 PetscCall(PetscFree(sub_schurs->prefix)); 2008 PetscCall(MatDestroy(&sub_schurs->A)); 2009 PetscCall(MatDestroy(&sub_schurs->S)); 2010 PetscCall(ISDestroy(&sub_schurs->is_I)); 2011 PetscCall(ISDestroy(&sub_schurs->is_B)); 2012 PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap)); 2013 PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap)); 2014 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 2015 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 2016 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 2017 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 2018 PetscCall(ISDestroy(&sub_schurs->is_Ej_all)); 2019 PetscCall(ISDestroy(&sub_schurs->is_vertices)); 2020 PetscCall(ISDestroy(&sub_schurs->is_dir)); 2021 PetscCall(PetscBTDestroy(&sub_schurs->is_edge)); 2022 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISDestroy(&sub_schurs->is_subs[i])); 2023 if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs)); 2024 if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 2025 PetscCall(PetscFree(sub_schurs->reuse_solver)); 2026 if (sub_schurs->change) { 2027 for (i = 0; i < sub_schurs->n_subs; i++) { 2028 PetscCall(KSPDestroy(&sub_schurs->change[i])); 2029 PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i])); 2030 } 2031 } 2032 PetscCall(PetscFree(sub_schurs->change)); 2033 PetscCall(PetscFree(sub_schurs->change_primal_sub)); 2034 sub_schurs->n_subs = 0; 2035 PetscFunctionReturn(PETSC_SUCCESS); 2036 } 2037 2038 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs) 2039 { 2040 PetscFunctionBegin; 2041 PetscCall(PCBDDCSubSchursReset(*sub_schurs)); 2042 PetscCall(PetscFree(*sub_schurs)); 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added) 2047 { 2048 PetscInt i, j, n; 2049 2050 PetscFunctionBegin; 2051 n = 0; 2052 for (i = -n_prev; i < 0; i++) { 2053 PetscInt start_dof = queue_tip[i]; 2054 for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) { 2055 PetscInt dof = adjncy[j]; 2056 if (!PetscBTLookup(touched, dof)) { 2057 PetscCall(PetscBTSet(touched, dof)); 2058 queue_tip[n] = dof; 2059 n++; 2060 } 2061 } 2062 } 2063 *n_added = n; 2064 PetscFunctionReturn(PETSC_SUCCESS); 2065 } 2066