1 #include <petsc/private/pcbddcimpl.h> 2 #include <petsc/private/pcbddcprivateimpl.h> 3 #include <../src/mat/impls/dense/seq/dense.h> 4 #include <petscblaslapack.h> 5 6 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *); 7 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *); 8 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC, Vec, Vec); 9 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC, Vec, Vec); 10 11 /* if v2 is not present, correction is done in-place */ 12 PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full) 13 { 14 PetscScalar *array; 15 PetscScalar *array2; 16 17 PetscFunctionBegin; 18 if (!ctx->benign_n) PetscFunctionReturn(PETSC_SUCCESS); 19 if (sol && full) { 20 PetscInt n_I, size_schur; 21 22 /* get sizes */ 23 PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL)); 24 PetscCall(VecGetSize(v, &n_I)); 25 n_I = n_I - size_schur; 26 /* get schur sol from array */ 27 PetscCall(VecGetArray(v, &array)); 28 PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I)); 29 PetscCall(VecRestoreArray(v, &array)); 30 /* apply interior sol correction */ 31 PetscCall(MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work)); 32 PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 33 PetscCall(MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v)); 34 } 35 if (v2) { 36 PetscInt nl; 37 38 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 39 PetscCall(VecGetLocalSize(v2, &nl)); 40 PetscCall(VecGetArray(v2, &array2)); 41 PetscCall(PetscArraycpy(array2, array, nl)); 42 } else { 43 PetscCall(VecGetArray(v, &array)); 44 array2 = array; 45 } 46 if (!sol) { /* change rhs */ 47 PetscInt n; 48 for (n = 0; n < ctx->benign_n; n++) { 49 PetscScalar sum = 0.; 50 const PetscInt *cols; 51 PetscInt nz, i; 52 53 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz)); 54 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols)); 55 for (i = 0; i < nz - 1; i++) sum += array[cols[i]]; 56 #if defined(PETSC_USE_COMPLEX) 57 sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz)); 58 #else 59 sum = -sum / nz; 60 #endif 61 for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum; 62 ctx->benign_save_vals[n] = array2[cols[nz - 1]]; 63 array2[cols[nz - 1]] = sum; 64 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols)); 65 } 66 } else { 67 PetscInt n; 68 for (n = 0; n < ctx->benign_n; n++) { 69 PetscScalar sum = 0.; 70 const PetscInt *cols; 71 PetscInt nz, i; 72 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz)); 73 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols)); 74 for (i = 0; i < nz - 1; i++) sum += array[cols[i]]; 75 #if defined(PETSC_USE_COMPLEX) 76 sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz)); 77 #else 78 sum = -sum / nz; 79 #endif 80 for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum; 81 array2[cols[nz - 1]] = ctx->benign_save_vals[n]; 82 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols)); 83 } 84 } 85 if (v2) { 86 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 87 PetscCall(VecRestoreArray(v2, &array2)); 88 } else { 89 PetscCall(VecRestoreArray(v, &array)); 90 } 91 if (!sol && full) { 92 Vec usedv; 93 PetscInt n_I, size_schur; 94 95 /* get sizes */ 96 PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL)); 97 PetscCall(VecGetSize(v, &n_I)); 98 n_I = n_I - size_schur; 99 /* compute schur rhs correction */ 100 if (v2) { 101 usedv = v2; 102 } else { 103 usedv = v; 104 } 105 /* apply schur rhs correction */ 106 PetscCall(MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work)); 107 PetscCall(VecGetArrayRead(usedv, (const PetscScalar **)&array)); 108 PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I)); 109 PetscCall(VecRestoreArrayRead(usedv, (const PetscScalar **)&array)); 110 PetscCall(MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec)); 111 PetscCall(VecResetArray(ctx->benign_dummy_schur_vec)); 112 } 113 PetscFunctionReturn(PETSC_SUCCESS); 114 } 115 116 static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full) 117 { 118 PCBDDCReuseSolvers ctx; 119 PetscBool copy = PETSC_FALSE; 120 121 PetscFunctionBegin; 122 PetscCall(PCShellGetContext(pc, &ctx)); 123 if (full) { 124 PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1)); 125 #if defined(PETSC_HAVE_MKL_PARDISO) 126 PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0)); 127 #endif 128 copy = ctx->has_vertices; 129 } else { /* interior solver */ 130 PetscCall(MatMumpsSetIcntl(ctx->F, 26, 0)); 131 #if defined(PETSC_HAVE_MKL_PARDISO) 132 PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 1)); 133 #endif 134 copy = PETSC_TRUE; 135 } 136 /* copy rhs into factored matrix workspace */ 137 if (copy) { 138 PetscInt n; 139 PetscScalar *array, *array_solver; 140 141 PetscCall(VecGetLocalSize(rhs, &n)); 142 PetscCall(VecGetArrayRead(rhs, (const PetscScalar **)&array)); 143 PetscCall(VecGetArray(ctx->rhs, &array_solver)); 144 PetscCall(PetscArraycpy(array_solver, array, n)); 145 PetscCall(VecRestoreArray(ctx->rhs, &array_solver)); 146 PetscCall(VecRestoreArrayRead(rhs, (const PetscScalar **)&array)); 147 148 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full)); 149 if (transpose) { 150 PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol)); 151 } else { 152 PetscCall(MatSolve(ctx->F, ctx->rhs, ctx->sol)); 153 } 154 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full)); 155 156 /* get back data to caller worskpace */ 157 PetscCall(VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver)); 158 PetscCall(VecGetArray(sol, &array)); 159 PetscCall(PetscArraycpy(array, array_solver, n)); 160 PetscCall(VecRestoreArray(sol, &array)); 161 PetscCall(VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver)); 162 } else { 163 if (ctx->benign_n) { 164 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full)); 165 if (transpose) { 166 PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, sol)); 167 } else { 168 PetscCall(MatSolve(ctx->F, ctx->rhs, sol)); 169 } 170 PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full)); 171 } else { 172 if (transpose) { 173 PetscCall(MatSolveTranspose(ctx->F, rhs, sol)); 174 } else { 175 PetscCall(MatSolve(ctx->F, rhs, sol)); 176 } 177 } 178 } 179 /* restore defaults */ 180 PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1)); 181 #if defined(PETSC_HAVE_MKL_PARDISO) 182 PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0)); 183 #endif 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol) 188 { 189 PetscFunctionBegin; 190 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol) 195 { 196 PetscFunctionBegin; 197 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol) 202 { 203 PetscFunctionBegin; 204 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE)); 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol) 209 { 210 PetscFunctionBegin; 211 PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE)); 212 PetscFunctionReturn(PETSC_SUCCESS); 213 } 214 215 static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer) 216 { 217 PCBDDCReuseSolvers ctx; 218 PetscBool isascii; 219 220 PetscFunctionBegin; 221 PetscCall(PCShellGetContext(pc, &ctx)); 222 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 223 if (isascii) PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 224 PetscCall(MatView(ctx->F, viewer)); 225 if (isascii) PetscCall(PetscViewerPopFormat(viewer)); 226 PetscFunctionReturn(PETSC_SUCCESS); 227 } 228 229 static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse) 230 { 231 PetscInt i; 232 233 PetscFunctionBegin; 234 PetscCall(MatDestroy(&reuse->F)); 235 PetscCall(VecDestroy(&reuse->sol)); 236 PetscCall(VecDestroy(&reuse->rhs)); 237 PetscCall(PCDestroy(&reuse->interior_solver)); 238 PetscCall(PCDestroy(&reuse->correction_solver)); 239 PetscCall(ISDestroy(&reuse->is_R)); 240 PetscCall(ISDestroy(&reuse->is_B)); 241 PetscCall(VecScatterDestroy(&reuse->correction_scatter_B)); 242 PetscCall(VecDestroy(&reuse->sol_B)); 243 PetscCall(VecDestroy(&reuse->rhs_B)); 244 for (i = 0; i < reuse->benign_n; i++) PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i])); 245 PetscCall(PetscFree(reuse->benign_zerodiag_subs)); 246 PetscCall(PetscFree(reuse->benign_save_vals)); 247 PetscCall(MatDestroy(&reuse->benign_csAIB)); 248 PetscCall(MatDestroy(&reuse->benign_AIIm1ones)); 249 PetscCall(VecDestroy(&reuse->benign_corr_work)); 250 PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec)); 251 PetscFunctionReturn(PETSC_SUCCESS); 252 } 253 254 static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc) 255 { 256 PCBDDCReuseSolvers ctx; 257 258 PetscFunctionBegin; 259 PetscCall(PCShellGetContext(pc, &ctx)); 260 PetscCall(PCBDDCReuseSolversReset(ctx)); 261 PetscCall(PetscFree(ctx)); 262 PetscCall(PCShellSetContext(pc, NULL)); 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S) 267 { 268 Mat B, C, D, Bd, Cd, AinvBd; 269 KSP ksp; 270 PC pc; 271 PetscBool isLU, isILU, isCHOL, Bdense, Cdense; 272 PetscReal fill = 2.0; 273 PetscInt n_I; 274 PetscMPIInt size; 275 276 PetscFunctionBegin; 277 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M), &size)); 278 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for parallel matrices"); 279 if (reuse == MAT_REUSE_MATRIX) { 280 PetscBool Sdense; 281 282 PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense)); 283 PetscCheck(Sdense, PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "S should dense"); 284 } 285 PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D)); 286 PetscCall(MatSchurComplementGetKSP(M, &ksp)); 287 PetscCall(KSPGetPC(ksp, &pc)); 288 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU)); 289 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU)); 290 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL)); 291 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense)); 292 PetscCall(PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense)); 293 PetscCall(MatGetSize(B, &n_I, NULL)); 294 if (n_I) { 295 if (!Bdense) { 296 PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd)); 297 } else { 298 Bd = B; 299 } 300 301 if (isLU || isILU || isCHOL) { 302 Mat fact; 303 PetscCall(KSPSetUp(ksp)); 304 PetscCall(PCFactorGetMatrix(pc, &fact)); 305 PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 306 PetscCall(MatMatSolve(fact, Bd, AinvBd)); 307 } else { 308 PetscBool ex = PETSC_TRUE; 309 310 if (ex) { 311 Mat Ainvd; 312 313 PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd)); 314 PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd)); 315 PetscCall(MatDestroy(&Ainvd)); 316 } else { 317 Vec sol, rhs; 318 PetscScalar *arrayrhs, *arraysol; 319 PetscInt i, nrhs, n; 320 321 PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd)); 322 PetscCall(MatGetSize(Bd, &n, &nrhs)); 323 PetscCall(MatDenseGetArray(Bd, &arrayrhs)); 324 PetscCall(MatDenseGetArray(AinvBd, &arraysol)); 325 PetscCall(KSPGetSolution(ksp, &sol)); 326 PetscCall(KSPGetRhs(ksp, &rhs)); 327 for (i = 0; i < nrhs; i++) { 328 PetscCall(VecPlaceArray(rhs, arrayrhs + i * n)); 329 PetscCall(VecPlaceArray(sol, arraysol + i * n)); 330 PetscCall(KSPSolve(ksp, rhs, sol)); 331 PetscCall(VecResetArray(rhs)); 332 PetscCall(VecResetArray(sol)); 333 } 334 PetscCall(MatDenseRestoreArray(Bd, &arrayrhs)); 335 PetscCall(MatDenseRestoreArray(AinvBd, &arrayrhs)); 336 } 337 } 338 if (!Bdense & !issym) PetscCall(MatDestroy(&Bd)); 339 340 if (!issym) { 341 if (!Cdense) { 342 PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd)); 343 } else { 344 Cd = C; 345 } 346 PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S)); 347 if (!Cdense) PetscCall(MatDestroy(&Cd)); 348 } else { 349 PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S)); 350 if (!Bdense) PetscCall(MatDestroy(&Bd)); 351 } 352 PetscCall(MatDestroy(&AinvBd)); 353 } 354 355 if (D) { 356 Mat Dd; 357 PetscBool Ddense; 358 359 PetscCall(PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense)); 360 if (!Ddense) { 361 PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd)); 362 } else { 363 Dd = D; 364 } 365 if (n_I) { 366 PetscCall(MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN)); 367 } else { 368 if (reuse == MAT_INITIAL_MATRIX) { 369 PetscCall(MatDuplicate(Dd, MAT_COPY_VALUES, S)); 370 } else { 371 PetscCall(MatCopy(Dd, *S, SAME_NONZERO_PATTERN)); 372 } 373 } 374 if (!Ddense) PetscCall(MatDestroy(&Dd)); 375 } else { 376 PetscCall(MatScale(*S, -1.0)); 377 } 378 PetscFunctionReturn(PETSC_SUCCESS); 379 } 380 381 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal) 382 { 383 Mat F, A_II, A_IB, A_BI, A_BB, AE_II; 384 Mat S_all; 385 Vec gstash, lstash; 386 VecScatter sstash; 387 IS is_I, is_I_layer; 388 IS all_subsets, all_subsets_mult, all_subsets_n; 389 PetscScalar *stasharray, *Bwork; 390 PetscInt *all_local_idx_N, *all_local_subid_N = NULL; 391 PetscInt *auxnum1, *auxnum2; 392 PetscInt *local_subs = sub_schurs->graph->local_subs; 393 PetscInt i, subset_size, max_subset_size, n_local_subs = sub_schurs->graph->n_local_subs; 394 PetscInt n_B, extra, local_size, global_size; 395 PetscInt local_stash_size; 396 PetscBLASInt B_N, B_ierr, B_lwork, *pivots; 397 MPI_Comm comm_n; 398 PetscBool deluxe = PETSC_TRUE; 399 PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE; 400 PetscViewer matl_dbg_viewer = NULL; 401 PetscBool flg, multi_element = sub_schurs->graph->multi_element; 402 403 PetscFunctionBegin; 404 PetscCall(MatDestroy(&sub_schurs->A)); 405 PetscCall(MatDestroy(&sub_schurs->S)); 406 if (Ain) { 407 PetscCall(PetscObjectReference((PetscObject)Ain)); 408 sub_schurs->A = Ain; 409 } 410 411 PetscCall(PetscObjectReference((PetscObject)Sin)); 412 sub_schurs->S = Sin; 413 if (sub_schurs->schur_explicit) sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A); 414 415 /* preliminary checks */ 416 PetscCheck(sub_schurs->schur_explicit || !compute_Stilda, PetscObjectComm((PetscObject)sub_schurs->l2gmap), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO"); 417 418 if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE; 419 420 /* debug (MATLAB) */ 421 if (sub_schurs->debug) { 422 PetscMPIInt size, rank; 423 PetscInt nr, *print_schurs_ranks, print_schurs = PETSC_FALSE; 424 425 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size)); 426 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank)); 427 nr = size; 428 PetscCall(PetscMalloc1(nr, &print_schurs_ranks)); 429 PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 430 PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg)); 431 if (!flg) print_schurs = PETSC_TRUE; 432 else { 433 print_schurs = PETSC_FALSE; 434 for (i = 0; i < nr; i++) 435 if (print_schurs_ranks[i] == rank) { 436 print_schurs = PETSC_TRUE; 437 break; 438 } 439 } 440 PetscOptionsEnd(); 441 PetscCall(PetscFree(print_schurs_ranks)); 442 if (print_schurs) { 443 char filename[256]; 444 445 PetscCall(PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank)); 446 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer)); 447 PetscCall(PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB)); 448 } 449 } 450 451 /* DEBUG: turn on/off multi-element code path */ 452 PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_multielement_code", &multi_element, NULL)); 453 if (n_local_subs == 0) multi_element = PETSC_FALSE; 454 455 /* restrict work on active processes */ 456 if (sub_schurs->restrict_comm) { 457 PetscSubcomm subcomm; 458 PetscMPIInt color, rank; 459 460 color = 0; 461 if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */ 462 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank)); 463 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm)); 464 PetscCall(PetscSubcommSetNumber(subcomm, 2)); 465 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank)); 466 PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL)); 467 PetscCall(PetscSubcommDestroy(&subcomm)); 468 if (!sub_schurs->n_subs) { 469 PetscCall(PetscCommDestroy(&comm_n)); 470 PetscFunctionReturn(PETSC_SUCCESS); 471 } 472 } else { 473 PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL)); 474 } 475 476 /* get Schur complement matrices */ 477 if (!sub_schurs->schur_explicit) { 478 Mat tA_IB, tA_BI, tA_BB; 479 PetscBool isseqsbaij; 480 PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB)); 481 PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij)); 482 if (isseqsbaij) { 483 PetscCall(MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB)); 484 PetscCall(MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB)); 485 PetscCall(MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI)); 486 } else { 487 PetscCall(PetscObjectReference((PetscObject)tA_BB)); 488 A_BB = tA_BB; 489 PetscCall(PetscObjectReference((PetscObject)tA_IB)); 490 A_IB = tA_IB; 491 PetscCall(PetscObjectReference((PetscObject)tA_BI)); 492 A_BI = tA_BI; 493 } 494 } else { 495 A_II = NULL; 496 A_IB = NULL; 497 A_BI = NULL; 498 A_BB = NULL; 499 } 500 S_all = NULL; 501 502 /* determine interior problems */ 503 PetscCall(ISGetLocalSize(sub_schurs->is_I, &i)); 504 if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */ 505 PetscBT touched; 506 const PetscInt *idx_B; 507 PetscInt n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering; 508 509 PetscCheck(xadj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot request layering without adjacency"); 510 /* get sizes */ 511 PetscCall(ISGetLocalSize(sub_schurs->is_I, &n_I)); 512 PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B)); 513 514 PetscCall(PetscMalloc1(n_I + n_B, &local_numbering)); 515 PetscCall(PetscBTCreate(n_I + n_B, &touched)); 516 PetscCall(PetscBTMemzero(n_I + n_B, touched)); 517 518 /* all boundary dofs must be skipped when adding layers */ 519 PetscCall(ISGetIndices(sub_schurs->is_B, &idx_B)); 520 for (j = 0; j < n_B; j++) PetscCall(PetscBTSet(touched, idx_B[j])); 521 PetscCall(PetscArraycpy(local_numbering, idx_B, n_B)); 522 PetscCall(ISRestoreIndices(sub_schurs->is_B, &idx_B)); 523 524 /* add prescribed number of layers of dofs */ 525 n_local_dofs = n_B; 526 n_prev_added = n_B; 527 for (layer = 0; layer < nlayers; layer++) { 528 PetscInt n_added = 0; 529 if (n_local_dofs == n_I + n_B) break; 530 PetscCheck(n_local_dofs <= n_I + n_B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")", layer, n_local_dofs, n_I + n_B); 531 PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added)); 532 n_prev_added = n_added; 533 n_local_dofs += n_added; 534 if (!n_added) break; 535 } 536 PetscCall(PetscBTDestroy(&touched)); 537 538 /* IS for I layer dofs in original numbering */ 539 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer)); 540 PetscCall(PetscFree(local_numbering)); 541 PetscCall(ISSort(is_I_layer)); 542 /* IS for I layer dofs in I numbering */ 543 if (!sub_schurs->schur_explicit) { 544 ISLocalToGlobalMapping ItoNmap; 545 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap)); 546 PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I)); 547 PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap)); 548 549 /* II block */ 550 PetscCall(MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II)); 551 } 552 } else { 553 PetscInt n_I; 554 555 /* IS for I dofs in original numbering */ 556 PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I)); 557 is_I_layer = sub_schurs->is_I; 558 559 /* IS for I dofs in I numbering (strided 1) */ 560 if (!sub_schurs->schur_explicit) { 561 PetscCall(ISGetSize(sub_schurs->is_I, &n_I)); 562 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I)); 563 564 /* II block is the same */ 565 PetscCall(PetscObjectReference((PetscObject)A_II)); 566 AE_II = A_II; 567 } 568 } 569 570 /* Get info on subset sizes and sum of all subsets sizes */ 571 max_subset_size = 0; 572 local_size = 0; 573 for (i = 0; i < sub_schurs->n_subs; i++) { 574 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 575 max_subset_size = PetscMax(subset_size, max_subset_size); 576 local_size += subset_size; 577 } 578 579 /* Work arrays for local indices */ 580 extra = 0; 581 PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B)); 582 if (sub_schurs->schur_explicit && is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &extra)); 583 PetscCall(PetscMalloc1(n_B + extra, &all_local_idx_N)); 584 if (multi_element) PetscCall(PetscMalloc1(n_B + extra, &all_local_subid_N)); 585 if (extra) { 586 const PetscInt *idxs; 587 PetscCall(ISGetIndices(is_I_layer, &idxs)); 588 PetscCall(PetscArraycpy(all_local_idx_N, idxs, extra)); 589 if (multi_element) 590 for (PetscInt j = 0; j < extra; j++) all_local_subid_N[j] = local_subs[idxs[j]]; 591 PetscCall(ISRestoreIndices(is_I_layer, &idxs)); 592 } 593 PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum1)); 594 PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum2)); 595 596 /* Get local indices in local numbering */ 597 local_size = 0; 598 local_stash_size = 0; 599 for (i = 0; i < sub_schurs->n_subs; i++) { 600 const PetscInt *idxs; 601 602 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 603 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs)); 604 /* start (smallest in global ordering) and multiplicity */ 605 auxnum1[i] = idxs[0]; 606 auxnum2[i] = subset_size * subset_size; 607 /* subset indices in local numbering */ 608 PetscCall(PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size)); 609 if (multi_element) 610 for (PetscInt j = 0; j < subset_size; j++) all_local_subid_N[j + local_size + extra] = local_subs[idxs[j]]; 611 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs)); 612 local_size += subset_size; 613 local_stash_size += subset_size * subset_size; 614 } 615 616 /* allocate extra workspace needed only for GETRI or SYTRF when inverting the blocks or the entire Schur complement */ 617 use_potr = use_sytr = PETSC_FALSE; 618 if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) { 619 use_potr = PETSC_TRUE; 620 } else if (sub_schurs->is_symmetric) { 621 use_sytr = PETSC_TRUE; 622 } 623 if (local_size && !use_potr && compute_Stilda) { 624 PetscScalar lwork, dummyscalar = 0.; 625 PetscBLASInt dummyint = 0; 626 627 B_lwork = -1; 628 PetscCall(PetscBLASIntCast(local_size, &B_N)); 629 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 630 if (use_sytr) { 631 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr)); 632 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 633 } else { 634 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr)); 635 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 636 } 637 PetscCall(PetscFPTrapPop()); 638 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork)); 639 PetscCall(PetscMalloc2(B_lwork, &Bwork, B_N, &pivots)); 640 } else { 641 Bwork = NULL; 642 pivots = NULL; 643 } 644 645 /* prepare data for summing up properly schurs on subsets */ 646 PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n)); 647 PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets)); 648 PetscCall(ISDestroy(&all_subsets_n)); 649 PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult)); 650 PetscCall(ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n)); 651 PetscCall(ISDestroy(&all_subsets)); 652 PetscCall(ISDestroy(&all_subsets_mult)); 653 PetscCall(ISGetLocalSize(all_subsets_n, &i)); 654 PetscCheck(i == local_stash_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT, i, local_stash_size); 655 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash)); 656 PetscCall(VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash)); 657 PetscCall(VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash)); 658 PetscCall(ISDestroy(&all_subsets_n)); 659 660 /* subset indices in local boundary numbering */ 661 if (!sub_schurs->is_Ej_all) { 662 PetscInt *all_local_idx_B; 663 664 PetscCall(PetscMalloc1(local_size, &all_local_idx_B)); 665 PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B)); 666 PetscCheck(subset_size == local_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT, subset_size, local_size); 667 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all)); 668 } 669 670 if (change) { 671 ISLocalToGlobalMapping BtoS; 672 IS change_primal_B; 673 IS change_primal_all; 674 675 PetscCheck(!sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 676 PetscCheck(!sub_schurs->change, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 677 PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub)); 678 for (i = 0; i < sub_schurs->n_subs; i++) { 679 ISLocalToGlobalMapping NtoS; 680 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS)); 681 PetscCall(ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i])); 682 PetscCall(ISLocalToGlobalMappingDestroy(&NtoS)); 683 } 684 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B)); 685 PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS)); 686 PetscCall(ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all)); 687 PetscCall(ISLocalToGlobalMappingDestroy(&BtoS)); 688 PetscCall(ISDestroy(&change_primal_B)); 689 PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change)); 690 for (i = 0; i < sub_schurs->n_subs; i++) { 691 Mat change_sub; 692 693 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 694 PetscCall(KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i])); 695 PetscCall(KSPSetNestLevel(sub_schurs->change[i], 1)); /* do not seem to have direct access to a PC from which to get the level of nests */ 696 PetscCall(KSPSetType(sub_schurs->change[i], KSPPREONLY)); 697 if (!sub_schurs->change_with_qr) { 698 PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub)); 699 } else { 700 Mat change_subt; 701 PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt)); 702 PetscCall(MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub)); 703 PetscCall(MatDestroy(&change_subt)); 704 } 705 PetscCall(KSPSetOperators(sub_schurs->change[i], change_sub, change_sub)); 706 PetscCall(MatDestroy(&change_sub)); 707 PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix)); 708 PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_")); 709 } 710 PetscCall(ISDestroy(&change_primal_all)); 711 } 712 713 /* Local matrix of all local Schur on subsets (transposed) */ 714 if (!sub_schurs->S_Ej_all) { 715 Mat T; 716 PetscScalar *v; 717 PetscInt *ii, *jj; 718 PetscInt cum, i, j, k; 719 720 /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks) 721 Allocate properly a representative matrix and duplicate */ 722 PetscCall(PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v)); 723 ii[0] = 0; 724 cum = 0; 725 for (i = 0; i < sub_schurs->n_subs; i++) { 726 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 727 for (j = 0; j < subset_size; j++) { 728 const PetscInt row = cum + j; 729 PetscInt col = cum; 730 731 ii[row + 1] = ii[row] + subset_size; 732 for (k = ii[row]; k < ii[row + 1]; k++) { 733 jj[k] = col; 734 col++; 735 } 736 } 737 cum += subset_size; 738 } 739 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T)); 740 PetscCall(MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all)); 741 PetscCall(MatDestroy(&T)); 742 PetscCall(PetscFree3(ii, jj, v)); 743 } 744 /* matrices for deluxe scaling and adaptive selection */ 745 if (compute_Stilda) { 746 if (!sub_schurs->sum_S_Ej_tilda_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all)); 747 if (!sub_schurs->sum_S_Ej_inv_all && deluxe) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all)); 748 } 749 750 /* Compute Schur complements explicitly */ 751 F = NULL; 752 if (!sub_schurs->schur_explicit) { 753 /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested; 754 it is not efficient, unless the economic version of the scaling is used */ 755 Mat S_Ej_expl; 756 PetscScalar *work; 757 PetscInt j, *dummy_idx; 758 PetscBool Sdense; 759 760 PetscCall(PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work)); 761 local_size = 0; 762 for (i = 0; i < sub_schurs->n_subs; i++) { 763 IS is_subset_B; 764 Mat AE_EE, AE_IE, AE_EI, S_Ej; 765 766 /* subsets in original and boundary numbering */ 767 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B)); 768 /* EE block */ 769 PetscCall(MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE)); 770 /* IE block */ 771 PetscCall(MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE)); 772 /* EI block */ 773 if (sub_schurs->is_symmetric) { 774 PetscCall(MatCreateTranspose(AE_IE, &AE_EI)); 775 } else if (sub_schurs->is_hermitian) { 776 PetscCall(MatCreateHermitianTranspose(AE_IE, &AE_EI)); 777 } else { 778 PetscCall(MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI)); 779 } 780 PetscCall(ISDestroy(&is_subset_B)); 781 PetscCall(MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej)); 782 PetscCall(MatDestroy(&AE_EE)); 783 PetscCall(MatDestroy(&AE_IE)); 784 PetscCall(MatDestroy(&AE_EI)); 785 if (AE_II == A_II) { /* we can reuse the same ksp */ 786 KSP ksp; 787 PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &ksp)); 788 PetscCall(MatSchurComplementSetKSP(S_Ej, ksp)); 789 } else { /* build new ksp object which inherits ksp and pc types from the original one */ 790 KSP origksp, schurksp; 791 PC origpc, schurpc; 792 KSPType ksp_type; 793 PetscInt n_internal; 794 PetscBool ispcnone; 795 796 PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &origksp)); 797 PetscCall(MatSchurComplementGetKSP(S_Ej, &schurksp)); 798 PetscCall(KSPGetType(origksp, &ksp_type)); 799 PetscCall(KSPSetType(schurksp, ksp_type)); 800 PetscCall(KSPGetPC(schurksp, &schurpc)); 801 PetscCall(KSPGetPC(origksp, &origpc)); 802 PetscCall(PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone)); 803 if (!ispcnone) { 804 PCType pc_type; 805 PetscCall(PCGetType(origpc, &pc_type)); 806 PetscCall(PCSetType(schurpc, pc_type)); 807 } else { 808 PetscCall(PCSetType(schurpc, PCLU)); 809 } 810 PetscCall(ISGetSize(is_I, &n_internal)); 811 if (!n_internal) { /* UMFPACK gives error with 0 sized problems */ 812 MatSolverType solver = NULL; 813 PetscCall(PCFactorGetMatSolverType(origpc, &solver)); 814 if (solver) PetscCall(PCFactorSetMatSolverType(schurpc, solver)); 815 } 816 PetscCall(KSPSetUp(schurksp)); 817 } 818 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 819 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl)); 820 PetscCall(PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl)); 821 PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense)); 822 PetscCheck(Sdense, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices"); 823 for (j = 0; j < subset_size; j++) dummy_idx[j] = local_size + j; 824 PetscCall(MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES)); 825 PetscCall(MatDestroy(&S_Ej)); 826 PetscCall(MatDestroy(&S_Ej_expl)); 827 local_size += subset_size; 828 } 829 PetscCall(PetscFree2(dummy_idx, work)); 830 /* free */ 831 PetscCall(ISDestroy(&is_I)); 832 PetscCall(MatDestroy(&AE_II)); 833 PetscCall(PetscFree(all_local_idx_N)); 834 } else { 835 Mat A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL; 836 Mat *gdswA; 837 Vec Dall = NULL; 838 IS is_A_all, *is_p_r = NULL, is_schur; 839 MatType Stype; 840 PetscScalar *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL; 841 PetscScalar *SEj_arr = NULL, *SEjinv_arr = NULL; 842 const PetscScalar *rS_data; 843 PetscInt n, n_I, size_schur, size_active_schur, cum, cum2; 844 PetscBool economic, solver_S, S_lower_triangular = PETSC_FALSE; 845 PetscBool schur_has_vertices, factor_workaround; 846 PetscBool use_cholesky; 847 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 848 PetscBool oldpin; 849 #endif 850 /* multi-element */ 851 IS *is_sub_all = NULL, *is_sub_schur_all = NULL, *is_sub_schur = NULL; 852 853 /* get sizes */ 854 n_I = 0; 855 if (is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &n_I)); 856 economic = PETSC_FALSE; 857 PetscCall(ISGetLocalSize(sub_schurs->is_I, &cum)); 858 if (cum != n_I) economic = PETSC_TRUE; 859 PetscCall(MatGetLocalSize(sub_schurs->A, &n, NULL)); 860 size_active_schur = local_size; 861 862 /* import scaling vector (wrong formulation if we have 3D edges) */ 863 if (scaling && compute_Stilda) { 864 const PetscScalar *array; 865 PetscScalar *array2; 866 const PetscInt *idxs; 867 PetscInt i; 868 869 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs)); 870 PetscCall(VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall)); 871 PetscCall(VecGetArrayRead(scaling, &array)); 872 PetscCall(VecGetArray(Dall, &array2)); 873 for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]]; 874 PetscCall(VecRestoreArray(Dall, &array2)); 875 PetscCall(VecRestoreArrayRead(scaling, &array)); 876 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs)); 877 deluxe = PETSC_FALSE; 878 } 879 880 /* size active schurs does not count any dirichlet or vertex dof on the interface */ 881 factor_workaround = PETSC_FALSE; 882 schur_has_vertices = PETSC_FALSE; 883 cum = n_I + size_active_schur; 884 if (sub_schurs->is_dir) { 885 const PetscInt *idxs; 886 PetscInt n_dir; 887 888 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir)); 889 PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs)); 890 PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_dir)); 891 if (multi_element) 892 for (PetscInt j = 0; j < n_dir; j++) all_local_subid_N[j + cum] = local_subs[idxs[j]]; 893 PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs)); 894 cum += n_dir; 895 if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 896 } 897 /* include the primal vertices in the Schur complement */ 898 if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) { 899 PetscInt n_v; 900 901 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v)); 902 if (n_v) { 903 const PetscInt *idxs; 904 905 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 906 PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_v)); 907 if (multi_element) 908 for (PetscInt j = 0; j < n_v; j++) all_local_subid_N[j + cum] = local_subs[idxs[j]]; 909 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 910 cum += n_v; 911 if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE; 912 schur_has_vertices = PETSC_TRUE; 913 } 914 } 915 size_schur = cum - n_I; 916 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all)); 917 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 918 oldpin = sub_schurs->A->boundtocpu; 919 PetscCall(MatBindToCPU(sub_schurs->A, PETSC_TRUE)); 920 #endif 921 if (cum == n) { 922 PetscCall(ISSetPermutation(is_A_all)); 923 PetscCall(MatPermute(sub_schurs->A, is_A_all, is_A_all, &A)); 924 } else { 925 PetscCall(MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A)); 926 } 927 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 928 PetscCall(MatBindToCPU(sub_schurs->A, oldpin)); 929 #endif 930 PetscCall(MatSetOptionsPrefixFactor(A, sub_schurs->prefix)); 931 PetscCall(MatAppendOptionsPrefixFactor(A, "sub_schurs_")); 932 /* subsets ordered last */ 933 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur)); 934 935 if (multi_element) { 936 PetscInt *idx_sub; 937 938 PetscCall(PetscMalloc3(n_local_subs, &is_sub_all, n_local_subs, &is_sub_schur_all, n_local_subs, &is_sub_schur)); 939 PetscCall(PetscMalloc1(n + size_schur, &idx_sub)); 940 for (PetscInt sub = 0; sub < n_local_subs; sub++) { 941 PetscInt size_sub = 0, size_schur_sub = 0, size_I_sub; 942 943 for (PetscInt j = 0; j < n_I; j++) 944 if (all_local_subid_N[j] == sub) idx_sub[size_sub++] = j; 945 size_I_sub = size_sub; 946 for (PetscInt j = n_I; j < n_I + size_schur; j++) 947 if (all_local_subid_N[j] == sub) { 948 idx_sub[size_sub++] = j; 949 idx_sub[n + size_schur_sub++] = j - n_I; 950 } 951 952 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size_sub, idx_sub, PETSC_COPY_VALUES, &is_sub_all[sub])); 953 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size_schur_sub, idx_sub + n, PETSC_COPY_VALUES, &is_sub_schur[sub])); 954 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur_sub, size_I_sub, 1, &is_sub_schur_all[sub])); 955 } 956 PetscCall(PetscFree(idx_sub)); 957 } 958 959 /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory 960 this is a workaround */ 961 if (benign_n) { 962 Vec v, benign_AIIm1_ones; 963 ISLocalToGlobalMapping N_to_reor; 964 IS is_p0, is_p0_p; 965 PetscScalar *cs_AIB, *AIIm1_data; 966 PetscInt sizeA; 967 968 PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor)); 969 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0)); 970 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p)); 971 PetscCall(ISDestroy(&is_p0)); 972 PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones)); 973 PetscCall(VecGetSize(v, &sizeA)); 974 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat)); 975 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat)); 976 PetscCall(MatDenseGetArray(cs_AIB_mat, &cs_AIB)); 977 PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data)); 978 PetscCall(PetscMalloc1(benign_n, &is_p_r)); 979 /* compute colsum of A_IB restricted to pressures */ 980 for (i = 0; i < benign_n; i++) { 981 const PetscScalar *array; 982 const PetscInt *idxs; 983 PetscInt j, nz; 984 985 PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i])); 986 PetscCall(ISGetLocalSize(is_p_r[i], &nz)); 987 PetscCall(ISGetIndices(is_p_r[i], &idxs)); 988 for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.; 989 PetscCall(ISRestoreIndices(is_p_r[i], &idxs)); 990 PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i)); 991 PetscCall(MatMult(A, benign_AIIm1_ones, v)); 992 PetscCall(VecResetArray(benign_AIIm1_ones)); 993 PetscCall(VecGetArrayRead(v, &array)); 994 for (j = 0; j < size_schur; j++) { 995 #if defined(PETSC_USE_COMPLEX) 996 cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz)); 997 #else 998 cs_AIB[i * size_schur + j] = array[j + n_I] / nz; 999 #endif 1000 } 1001 PetscCall(VecRestoreArrayRead(v, &array)); 1002 } 1003 PetscCall(MatDenseRestoreArray(cs_AIB_mat, &cs_AIB)); 1004 PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data)); 1005 PetscCall(VecDestroy(&v)); 1006 PetscCall(VecDestroy(&benign_AIIm1_ones)); 1007 PetscCall(MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE)); 1008 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1009 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 1010 PetscCall(MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL)); 1011 PetscCall(ISDestroy(&is_p0_p)); 1012 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor)); 1013 } 1014 PetscCall(MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric)); 1015 PetscCall(MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian)); 1016 PetscCall(MatSetOption(A, MAT_SPD, sub_schurs->is_posdef)); 1017 1018 /* for complexes, symmetric and hermitian at the same time implies null imaginary part */ 1019 use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric); 1020 1021 /* when using the benign subspace trick, the local Schur complements are SPD */ 1022 /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization 1023 Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */ 1024 if (benign_trick) { 1025 sub_schurs->is_posdef = PETSC_TRUE; 1026 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg)); 1027 if (flg) use_cholesky = PETSC_FALSE; 1028 } 1029 if (sub_schurs->mat_factor_type == MAT_FACTOR_NONE) sub_schurs->mat_factor_type = use_cholesky ? MAT_FACTOR_CHOLESKY : MAT_FACTOR_LU; 1030 1031 if (n_I && !multi_element) { 1032 char stype[64]; 1033 PetscBool gpu = PETSC_FALSE; 1034 1035 PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, sub_schurs->mat_factor_type, &F)); 1036 PetscCheck(F, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)A)->type_name); 1037 PetscCall(MatSetErrorIfFailure(A, PETSC_TRUE)); 1038 #if defined(PETSC_HAVE_MKL_PARDISO) 1039 if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10)); 1040 #endif 1041 PetscCall(MatFactorSetSchurIS(F, is_schur)); 1042 1043 /* factorization step */ 1044 switch (sub_schurs->mat_factor_type) { 1045 case MAT_FACTOR_CHOLESKY: 1046 PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL)); 1047 /* be sure that icntl 19 is not set by command line */ 1048 PetscCall(MatMumpsSetIcntl(F, 19, 2)); 1049 PetscCall(MatCholeskyFactorNumeric(F, A, NULL)); 1050 S_lower_triangular = PETSC_TRUE; 1051 break; 1052 case MAT_FACTOR_LU: 1053 PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL)); 1054 /* be sure that icntl 19 is not set by command line */ 1055 PetscCall(MatMumpsSetIcntl(F, 19, 3)); 1056 PetscCall(MatLUFactorNumeric(F, A, NULL)); 1057 break; 1058 default: 1059 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]); 1060 } 1061 PetscCall(MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view")); 1062 1063 if (matl_dbg_viewer) { 1064 Mat S; 1065 IS is; 1066 1067 PetscCall(PetscObjectSetName((PetscObject)A, "A")); 1068 PetscCall(MatView(A, matl_dbg_viewer)); 1069 PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); 1070 PetscCall(PetscObjectSetName((PetscObject)S, "S")); 1071 PetscCall(MatView(S, matl_dbg_viewer)); 1072 PetscCall(MatDestroy(&S)); 1073 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is)); 1074 PetscCall(PetscObjectSetName((PetscObject)is, "I")); 1075 PetscCall(ISView(is, matl_dbg_viewer)); 1076 PetscCall(ISDestroy(&is)); 1077 PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is)); 1078 PetscCall(PetscObjectSetName((PetscObject)is, "B")); 1079 PetscCall(ISView(is, matl_dbg_viewer)); 1080 PetscCall(ISDestroy(&is)); 1081 PetscCall(PetscObjectSetName((PetscObject)is_A_all, "IA")); 1082 PetscCall(ISView(is_A_all, matl_dbg_viewer)); 1083 for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { 1084 IS is; 1085 char name[16]; 1086 1087 PetscCall(PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i)); 1088 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1089 PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is)); 1090 PetscCall(PetscObjectSetName((PetscObject)is, name)); 1091 PetscCall(ISView(is, matl_dbg_viewer)); 1092 PetscCall(ISDestroy(&is)); 1093 if (sub_schurs->change) { 1094 Mat T; 1095 1096 PetscCall(PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i)); 1097 PetscCall(KSPGetOperators(sub_schurs->change[i], &T, NULL)); 1098 PetscCall(PetscObjectSetName((PetscObject)T, name)); 1099 PetscCall(MatView(T, matl_dbg_viewer)); 1100 PetscCall(PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i)); 1101 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name)); 1102 PetscCall(ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer)); 1103 } 1104 cum += subset_size; 1105 } 1106 PetscCall(PetscViewerFlush(matl_dbg_viewer)); 1107 } 1108 1109 /* get explicit Schur Complement computed during numeric factorization */ 1110 PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 1111 PetscCall(PetscStrncpy(stype, MATSEQDENSE, sizeof(stype))); 1112 #if defined(PETSC_HAVE_CUDA) 1113 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, "")); 1114 #endif 1115 if (gpu) PetscCall(PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype))); 1116 PetscCall(PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL)); 1117 PetscCall(MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all)); 1118 PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef)); 1119 PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian)); 1120 PetscCall(MatGetType(S_all, &Stype)); 1121 1122 /* we can reuse the solvers if we are not using the economic version */ 1123 reuse_solvers = (PetscBool)(reuse_solvers && !economic && !sub_schurs->graph->multi_element); 1124 if (!sub_schurs->gdsw) { 1125 factor_workaround = (PetscBool)(reuse_solvers && factor_workaround); 1126 if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE; 1127 } 1128 solver_S = PETSC_TRUE; 1129 1130 /* update the Schur complement with the change of basis on the pressures */ 1131 if (benign_n) { 1132 const PetscScalar *cs_AIB; 1133 PetscScalar *S_data, *AIIm1_data; 1134 Mat S2 = NULL, S3 = NULL; /* dbg */ 1135 PetscScalar *S2_data, *S3_data; /* dbg */ 1136 Vec v, benign_AIIm1_ones; 1137 PetscInt sizeA; 1138 1139 PetscCall(MatDenseGetArray(S_all, &S_data)); 1140 PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones)); 1141 PetscCall(VecGetSize(v, &sizeA)); 1142 PetscCall(MatMumpsSetIcntl(F, 26, 0)); 1143 #if defined(PETSC_HAVE_MKL_PARDISO) 1144 PetscCall(MatMkl_PardisoSetCntl(F, 70, 1)); 1145 #endif 1146 PetscCall(MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB)); 1147 PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data)); 1148 if (matl_dbg_viewer) { 1149 PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2)); 1150 PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3)); 1151 PetscCall(MatDenseGetArray(S2, &S2_data)); 1152 PetscCall(MatDenseGetArray(S3, &S3_data)); 1153 } 1154 for (i = 0; i < benign_n; i++) { 1155 PetscScalar *array, sum = 0., one = 1., *sums; 1156 const PetscInt *idxs; 1157 PetscInt k, j, nz; 1158 PetscBLASInt B_k, B_n; 1159 1160 PetscCall(PetscCalloc1(benign_n, &sums)); 1161 PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i)); 1162 PetscCall(VecCopy(benign_AIIm1_ones, v)); 1163 PetscCall(MatSolve(F, v, benign_AIIm1_ones)); 1164 PetscCall(MatMult(A, benign_AIIm1_ones, v)); 1165 PetscCall(VecResetArray(benign_AIIm1_ones)); 1166 /* p0 dofs (eliminated) are excluded from the sums */ 1167 for (k = 0; k < benign_n; k++) { 1168 PetscCall(ISGetLocalSize(is_p_r[k], &nz)); 1169 PetscCall(ISGetIndices(is_p_r[k], &idxs)); 1170 for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i]; 1171 PetscCall(ISRestoreIndices(is_p_r[k], &idxs)); 1172 } 1173 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array)); 1174 if (matl_dbg_viewer) { 1175 Vec vv; 1176 char name[16]; 1177 1178 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv)); 1179 PetscCall(PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i)); 1180 PetscCall(PetscObjectSetName((PetscObject)vv, name)); 1181 PetscCall(VecView(vv, matl_dbg_viewer)); 1182 } 1183 /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */ 1184 /* cs_AIB already scaled by 1./nz */ 1185 B_k = 1; 1186 PetscCall(PetscBLASIntCast(size_schur, &B_n)); 1187 for (k = 0; k < benign_n; k++) { 1188 sum = sums[k]; 1189 1190 if (PetscAbsScalar(sum) == 0.0) continue; 1191 if (k == i) { 1192 if (B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1193 if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n)); 1194 } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */ 1195 sum /= 2.0; 1196 if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1197 if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n)); 1198 } 1199 } 1200 sum = 1.; 1201 if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n)); 1202 if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n)); 1203 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array)); 1204 /* set p0 entry of AIIm1_ones to zero */ 1205 PetscCall(ISGetLocalSize(is_p_r[i], &nz)); 1206 PetscCall(ISGetIndices(is_p_r[i], &idxs)); 1207 for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.; 1208 PetscCall(ISRestoreIndices(is_p_r[i], &idxs)); 1209 PetscCall(PetscFree(sums)); 1210 } 1211 PetscCall(VecDestroy(&benign_AIIm1_ones)); 1212 if (matl_dbg_viewer) { 1213 PetscCall(MatDenseRestoreArray(S2, &S2_data)); 1214 PetscCall(MatDenseRestoreArray(S3, &S3_data)); 1215 } 1216 if (!S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */ 1217 PetscInt k, j; 1218 for (k = 0; k < size_schur; k++) { 1219 for (j = k; j < size_schur; j++) S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]); 1220 } 1221 } 1222 1223 /* restore defaults */ 1224 PetscCall(MatMumpsSetIcntl(F, 26, -1)); 1225 #if defined(PETSC_HAVE_MKL_PARDISO) 1226 PetscCall(MatMkl_PardisoSetCntl(F, 70, 0)); 1227 #endif 1228 PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB)); 1229 PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data)); 1230 PetscCall(VecDestroy(&v)); 1231 PetscCall(MatDenseRestoreArray(S_all, &S_data)); 1232 if (matl_dbg_viewer) { 1233 Mat S; 1234 1235 PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 1236 PetscCall(MatFactorCreateSchurComplement(F, &S, NULL)); 1237 PetscCall(PetscObjectSetName((PetscObject)S, "Sb")); 1238 PetscCall(MatView(S, matl_dbg_viewer)); 1239 PetscCall(MatDestroy(&S)); 1240 PetscCall(PetscObjectSetName((PetscObject)S2, "S2P")); 1241 PetscCall(MatView(S2, matl_dbg_viewer)); 1242 PetscCall(PetscObjectSetName((PetscObject)S3, "S3P")); 1243 PetscCall(MatView(S3, matl_dbg_viewer)); 1244 PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat, "cs")); 1245 PetscCall(MatView(cs_AIB_mat, matl_dbg_viewer)); 1246 PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 1247 } 1248 PetscCall(MatDestroy(&S2)); 1249 PetscCall(MatDestroy(&S3)); 1250 } 1251 if (!reuse_solvers) { 1252 for (i = 0; i < benign_n; i++) PetscCall(ISDestroy(&is_p_r[i])); 1253 PetscCall(PetscFree(is_p_r)); 1254 PetscCall(MatDestroy(&cs_AIB_mat)); 1255 PetscCall(MatDestroy(&benign_AIIm1_ones_mat)); 1256 } 1257 } else if (multi_element) { /* MUMPS does not support sparse Schur complements. Loop over local subs */ 1258 PetscInt *nnz; 1259 const PetscInt *idxs; 1260 PetscInt size_schur_sub; 1261 1262 PetscCall(PetscCalloc1(size_schur, &nnz)); 1263 for (PetscInt sub = 0; sub < n_local_subs; sub++) { 1264 PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub)); 1265 PetscCall(ISGetIndices(is_sub_schur[sub], &idxs)); 1266 for (PetscInt j = 0; j < size_schur_sub; j++) nnz[idxs[j]] = size_schur_sub; 1267 PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs)); 1268 } 1269 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, size_schur, size_schur, 0, nnz, &S_all)); 1270 PetscCall(MatSetOption(S_all, MAT_ROW_ORIENTED, sub_schurs->is_hermitian)); 1271 PetscCall(PetscFree(nnz)); 1272 1273 for (PetscInt sub = 0; sub < n_local_subs; sub++) { 1274 Mat Asub, Ssub; 1275 const PetscScalar *vals; 1276 PetscInt size_all_sub; 1277 1278 F = NULL; 1279 PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub)); 1280 PetscCall(ISGetLocalSize(is_sub_all[sub], &size_all_sub)); 1281 PetscCall(MatCreateSubMatrix(A, is_sub_all[sub], is_sub_all[sub], MAT_INITIAL_MATRIX, &Asub)); 1282 if (size_schur_sub == size_all_sub) { 1283 /* we can't use MatFactor when size_schur == size_of_the_problem */ 1284 PetscCall(MatConvert(Asub, MATDENSE, MAT_INITIAL_MATRIX, &Ssub)); 1285 } else { 1286 PetscCall(MatGetFactor(Asub, sub_schurs->mat_solver_type, sub_schurs->mat_factor_type, &F)); 1287 PetscCheck(F, PetscObjectComm((PetscObject)Asub), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)Asub)->type_name); 1288 PetscCall(MatSetErrorIfFailure(Asub, PETSC_TRUE)); 1289 #if defined(PETSC_HAVE_MKL_PARDISO) 1290 if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10)); 1291 #endif 1292 /* subsets ordered last */ 1293 PetscCall(MatFactorSetSchurIS(F, is_sub_schur_all[sub])); 1294 1295 /* factorization step */ 1296 switch (sub_schurs->mat_factor_type) { 1297 case MAT_FACTOR_CHOLESKY: 1298 PetscCall(MatCholeskyFactorSymbolic(F, Asub, NULL, NULL)); 1299 /* be sure that icntl 19 is not set by command line */ 1300 PetscCall(MatMumpsSetIcntl(F, 19, 2)); 1301 PetscCall(MatCholeskyFactorNumeric(F, Asub, NULL)); 1302 S_lower_triangular = PETSC_TRUE; 1303 break; 1304 case MAT_FACTOR_LU: 1305 PetscCall(MatLUFactorSymbolic(F, Asub, NULL, NULL, NULL)); 1306 /* be sure that icntl 19 is not set by command line */ 1307 PetscCall(MatMumpsSetIcntl(F, 19, 3)); 1308 PetscCall(MatLUFactorNumeric(F, Asub, NULL)); 1309 break; 1310 default: 1311 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]); 1312 } 1313 PetscCall(MatFactorCreateSchurComplement(F, &Ssub, NULL)); 1314 } 1315 PetscCall(MatDestroy(&Asub)); 1316 PetscCall(MatDenseGetArrayRead(Ssub, &vals)); 1317 PetscCall(ISGetIndices(is_sub_schur[sub], &idxs)); 1318 PetscCall(MatSetValues(S_all, size_schur_sub, idxs, size_schur_sub, idxs, vals, INSERT_VALUES)); 1319 PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs)); 1320 PetscCall(MatDenseRestoreArrayRead(Ssub, &vals)); 1321 PetscCall(MatDestroy(&Ssub)); 1322 PetscCall(MatDestroy(&F)); 1323 } 1324 PetscCall(MatAssemblyBegin(S_all, MAT_FINAL_ASSEMBLY)); 1325 PetscCall(MatAssemblyEnd(S_all, MAT_FINAL_ASSEMBLY)); 1326 PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef)); 1327 PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian)); 1328 Stype = MATDENSE; 1329 reuse_solvers = PETSC_FALSE; 1330 factor_workaround = PETSC_FALSE; 1331 solver_S = PETSC_FALSE; 1332 } else { /* we can't use MatFactor when size_schur == size_of_the_problem */ 1333 PetscCall(MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all)); 1334 PetscCall(MatGetType(S_all, &Stype)); 1335 reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */ 1336 factor_workaround = PETSC_FALSE; 1337 solver_S = PETSC_FALSE; 1338 } 1339 1340 if (reuse_solvers) { 1341 Mat A_II, pA_II, Afake; 1342 Vec vec1_B; 1343 PCBDDCReuseSolvers msolv_ctx; 1344 PetscInt n_R; 1345 1346 if (sub_schurs->reuse_solver) { 1347 PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1348 } else { 1349 PetscCall(PetscNew(&sub_schurs->reuse_solver)); 1350 } 1351 msolv_ctx = sub_schurs->reuse_solver; 1352 PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL)); 1353 PetscCall(PetscObjectReference((PetscObject)F)); 1354 msolv_ctx->F = F; 1355 PetscCall(MatCreateVecs(F, &msolv_ctx->sol, NULL)); 1356 /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */ 1357 { 1358 PetscScalar *array; 1359 PetscInt n; 1360 1361 PetscCall(VecGetLocalSize(msolv_ctx->sol, &n)); 1362 PetscCall(VecGetArray(msolv_ctx->sol, &array)); 1363 PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs)); 1364 PetscCall(VecRestoreArray(msolv_ctx->sol, &array)); 1365 } 1366 msolv_ctx->has_vertices = schur_has_vertices; 1367 1368 /* interior solver */ 1369 PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver)); 1370 PetscCall(PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II)); 1371 PetscCall(PCSetType(msolv_ctx->interior_solver, PCSHELL)); 1372 PetscCall(PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)")); 1373 PetscCall(PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx)); 1374 PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View)); 1375 PetscCall(PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior)); 1376 PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose)); 1377 if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy)); 1378 1379 /* correction solver */ 1380 if (!sub_schurs->gdsw) { 1381 PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver)); 1382 PetscCall(PCSetType(msolv_ctx->correction_solver, PCSHELL)); 1383 PetscCall(PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)")); 1384 PetscCall(PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx)); 1385 PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View)); 1386 PetscCall(PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction)); 1387 PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose)); 1388 1389 /* scatter and vecs for Schur complement solver */ 1390 PetscCall(MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B)); 1391 PetscCall(MatCreateVecs(sub_schurs->S, &vec1_B, NULL)); 1392 if (!schur_has_vertices) { 1393 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B)); 1394 PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B)); 1395 PetscCall(PetscObjectReference((PetscObject)is_A_all)); 1396 msolv_ctx->is_R = is_A_all; 1397 } else { 1398 IS is_B_all; 1399 const PetscInt *idxs; 1400 PetscInt dual, n_v, n; 1401 1402 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v)); 1403 dual = size_schur - n_v; 1404 PetscCall(ISGetLocalSize(is_A_all, &n)); 1405 PetscCall(ISGetIndices(is_A_all, &idxs)); 1406 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all)); 1407 PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B)); 1408 PetscCall(ISDestroy(&is_B_all)); 1409 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all)); 1410 PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B)); 1411 PetscCall(ISDestroy(&is_B_all)); 1412 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R)); 1413 PetscCall(ISRestoreIndices(is_A_all, &idxs)); 1414 } 1415 PetscCall(ISGetLocalSize(msolv_ctx->is_R, &n_R)); 1416 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake)); 1417 PetscCall(MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY)); 1418 PetscCall(MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY)); 1419 PetscCall(PCSetOperators(msolv_ctx->correction_solver, Afake, Afake)); 1420 PetscCall(MatDestroy(&Afake)); 1421 PetscCall(VecDestroy(&vec1_B)); 1422 } 1423 /* communicate benign info to solver context */ 1424 if (benign_n) { 1425 PetscScalar *array; 1426 1427 msolv_ctx->benign_n = benign_n; 1428 msolv_ctx->benign_zerodiag_subs = is_p_r; 1429 PetscCall(PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals)); 1430 msolv_ctx->benign_csAIB = cs_AIB_mat; 1431 PetscCall(MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL)); 1432 PetscCall(VecGetArray(msolv_ctx->benign_corr_work, &array)); 1433 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec)); 1434 PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work, &array)); 1435 msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat; 1436 } 1437 } else { 1438 if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 1439 PetscCall(PetscFree(sub_schurs->reuse_solver)); 1440 } 1441 PetscCall(MatDestroy(&A)); 1442 PetscCall(ISDestroy(&is_A_all)); 1443 1444 /* Work arrays */ 1445 PetscCall(PetscMalloc1(max_subset_size * max_subset_size, &work)); 1446 1447 /* S_Ej_all */ 1448 PetscInt *idx_work = NULL; 1449 cum = cum2 = 0; 1450 if (!multi_element) PetscCall(MatDenseGetArrayRead(S_all, &rS_data)); 1451 else PetscCall(PetscMalloc1(max_subset_size, &idx_work)); 1452 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr)); 1453 if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr)); 1454 if (sub_schurs->gdsw) PetscCall(MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA)); 1455 for (i = 0; i < sub_schurs->n_subs; i++) { 1456 /* get S_E (or K^i_EE for GDSW) */ 1457 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1458 if (sub_schurs->gdsw) { 1459 Mat T; 1460 1461 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T)); 1462 PetscCall(MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T)); 1463 PetscCall(MatDestroy(&T)); 1464 } else { 1465 if (multi_element) { /* transpose copy to workspace */ 1466 // XXX CSR directly? 1467 for (PetscInt j = 0; j < subset_size; j++) idx_work[j] = cum + j; 1468 PetscCall(MatGetValues(S_all, subset_size, idx_work, subset_size, idx_work, work)); 1469 if (!sub_schurs->is_hermitian) { 1470 for (PetscInt k = 0; k < subset_size; k++) { 1471 for (PetscInt j = k; j < subset_size; j++) { 1472 PetscScalar t = work[k * subset_size + j]; 1473 work[k * subset_size + j] = work[j * subset_size + k]; 1474 work[j * subset_size + k] = t; 1475 } 1476 } 1477 } 1478 } else if (S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */ 1479 for (PetscInt k = 0; k < subset_size; k++) { 1480 for (PetscInt j = k; j < subset_size; j++) { 1481 work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1482 work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]); 1483 } 1484 } 1485 } else { /* just copy to workspace */ 1486 for (PetscInt k = 0; k < subset_size; k++) { 1487 for (PetscInt j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1488 } 1489 } 1490 } 1491 /* insert S_E values */ 1492 if (sub_schurs->change) { 1493 Mat change_sub, SEj, T; 1494 1495 /* change basis */ 1496 PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL)); 1497 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj)); 1498 if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 1499 Mat T2; 1500 PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2)); 1501 PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 1502 PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T)); 1503 PetscCall(MatDestroy(&T2)); 1504 } else { 1505 PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 1506 } 1507 PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN)); 1508 PetscCall(MatDestroy(&T)); 1509 PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL)); 1510 PetscCall(MatDestroy(&SEj)); 1511 } 1512 PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size)); 1513 if (compute_Stilda) { 1514 if (deluxe) { /* if adaptivity is requested, invert S_E blocks */ 1515 Mat M; 1516 const PetscScalar *vals; 1517 PetscBool isdense, isdensecuda; 1518 1519 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M)); 1520 PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef)); 1521 PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian)); 1522 if (!PetscBTLookup(sub_schurs->is_edge, i)) PetscCall(MatSetType(M, Stype)); 1523 PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense)); 1524 PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda)); 1525 switch (sub_schurs->mat_factor_type) { 1526 case MAT_FACTOR_CHOLESKY: 1527 PetscCall(MatCholeskyFactor(M, NULL, NULL)); 1528 break; 1529 case MAT_FACTOR_LU: 1530 PetscCall(MatLUFactor(M, NULL, NULL, NULL)); 1531 break; 1532 default: 1533 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]); 1534 } 1535 if (isdense) { 1536 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1537 #if defined(PETSC_HAVE_CUDA) 1538 } else if (isdensecuda) { 1539 PetscCall(MatSeqDenseCUDAInvertFactors_Internal(M)); 1540 #endif 1541 } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype); 1542 PetscCall(MatDenseGetArrayRead(M, &vals)); 1543 PetscCall(PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size)); 1544 PetscCall(MatDenseRestoreArrayRead(M, &vals)); 1545 PetscCall(MatDestroy(&M)); 1546 } else if (scaling) { /* not using deluxe */ 1547 Mat SEj; 1548 Vec D; 1549 PetscScalar *array; 1550 1551 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj)); 1552 PetscCall(VecGetArray(Dall, &array)); 1553 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D)); 1554 PetscCall(VecRestoreArray(Dall, &array)); 1555 PetscCall(VecShift(D, -1.)); 1556 PetscCall(MatDiagonalScale(SEj, D, D)); 1557 PetscCall(MatDestroy(&SEj)); 1558 PetscCall(VecDestroy(&D)); 1559 PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size)); 1560 } 1561 } 1562 cum += subset_size; 1563 cum2 += subset_size * (size_schur + 1); 1564 SEj_arr += subset_size * subset_size; 1565 if (SEjinv_arr) SEjinv_arr += subset_size * subset_size; 1566 } 1567 if (sub_schurs->gdsw) PetscCall(MatDestroySubMatrices(sub_schurs->n_subs, &gdswA)); 1568 if (!multi_element) PetscCall(MatDenseRestoreArrayRead(S_all, &rS_data)); 1569 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr)); 1570 if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr)); 1571 if (solver_S) PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 1572 1573 /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory 1574 however, preliminary tests indicate using GPUs is still faster in the solve phase */ 1575 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 1576 if (reuse_solvers) { 1577 Mat St; 1578 MatFactorSchurStatus st; 1579 1580 flg = PETSC_FALSE; 1581 PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL)); 1582 PetscCall(MatFactorGetSchurComplement(F, &St, &st)); 1583 PetscCall(MatBindToCPU(St, flg)); 1584 PetscCall(MatFactorRestoreSchurComplement(F, &St, st)); 1585 } 1586 #endif 1587 1588 schur_factor = NULL; 1589 if (compute_Stilda && size_active_schur) { 1590 if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */ 1591 PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1592 PetscCall(PetscArraycpy(SEjinv_arr, work, size_schur * size_schur)); 1593 PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1594 } else { 1595 Mat S_all_inv = NULL; 1596 1597 if (solver_S && !sub_schurs->gdsw) { 1598 /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1. 1599 The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */ 1600 if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */ 1601 PetscScalar *data; 1602 PetscInt nd = 0; 1603 1604 PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices"); 1605 PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL)); 1606 PetscCall(MatDenseGetArray(S_all_inv, &data)); 1607 if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 1608 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1609 } 1610 1611 /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */ 1612 if (schur_has_vertices) { 1613 Mat M; 1614 PetscScalar *tdata; 1615 PetscInt nv = 0, news; 1616 1617 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv)); 1618 news = size_active_schur + nv; 1619 PetscCall(PetscCalloc1(news * news, &tdata)); 1620 for (i = 0; i < size_active_schur; i++) { 1621 PetscCall(PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i)); 1622 PetscCall(PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv)); 1623 } 1624 for (i = 0; i < nv; i++) { 1625 PetscInt k = i + size_active_schur; 1626 PetscCall(PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i)); 1627 } 1628 1629 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M)); 1630 PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE)); 1631 PetscCall(MatCholeskyFactor(M, NULL, NULL)); 1632 /* save the factors */ 1633 cum = 0; 1634 PetscCall(PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor)); 1635 for (i = 0; i < size_active_schur; i++) { 1636 PetscCall(PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i)); 1637 cum += size_active_schur - i; 1638 } 1639 for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)])); 1640 S_all_inv->ops->solve = M->ops->solve; 1641 S_all_inv->ops->matsolve = M->ops->matsolve; 1642 S_all_inv->ops->solvetranspose = M->ops->solvetranspose; 1643 S_all_inv->ops->matsolvetranspose = M->ops->matsolvetranspose; 1644 S_all_inv->factortype = MAT_FACTOR_CHOLESKY; 1645 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1646 /* move back just the active dofs to the Schur complement */ 1647 for (i = 0; i < size_active_schur; i++) PetscCall(PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur)); 1648 PetscCall(PetscFree(tdata)); 1649 PetscCall(MatDestroy(&M)); 1650 } else { /* we can factorize and invert just the activedofs */ 1651 Mat M; 1652 PetscScalar *aux; 1653 1654 PetscCall(PetscMalloc1(nd, &aux)); 1655 for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)]; 1656 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M)); 1657 PetscCall(MatDenseSetLDA(M, size_schur)); 1658 PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE)); 1659 PetscCall(MatCholeskyFactor(M, NULL, NULL)); 1660 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1661 PetscCall(MatDestroy(&M)); 1662 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M)); 1663 PetscCall(MatZeroEntries(M)); 1664 PetscCall(MatDestroy(&M)); 1665 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M)); 1666 PetscCall(MatDenseSetLDA(M, size_schur)); 1667 PetscCall(MatZeroEntries(M)); 1668 PetscCall(MatDestroy(&M)); 1669 for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i]; 1670 PetscCall(PetscFree(aux)); 1671 } 1672 PetscCall(MatDenseRestoreArray(S_all_inv, &data)); 1673 } else { /* use MatFactor calls to invert S */ 1674 PetscCall(MatFactorInvertSchurComplement(F)); 1675 PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL)); 1676 } 1677 } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */ 1678 if (multi_element) { 1679 PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S_all_inv)); 1680 PetscCall(MatSetOption(S_all_inv, MAT_ROW_ORIENTED, sub_schurs->is_hermitian)); 1681 for (PetscInt sub = 0; sub < n_local_subs; sub++) { 1682 const PetscScalar *vals; 1683 const PetscInt *idxs; 1684 PetscInt size_schur_sub; 1685 Mat M; 1686 1687 PetscCall(MatCreateSubMatrix(S_all, is_sub_schur[sub], is_sub_schur[sub], MAT_INITIAL_MATRIX, &M)); 1688 PetscCall(MatConvert(M, MATDENSE, MAT_INPLACE_MATRIX, &M)); 1689 PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef)); 1690 PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian)); 1691 switch (sub_schurs->mat_factor_type) { 1692 case MAT_FACTOR_CHOLESKY: 1693 PetscCall(MatCholeskyFactor(M, NULL, NULL)); 1694 break; 1695 case MAT_FACTOR_LU: 1696 PetscCall(MatLUFactor(M, NULL, NULL, NULL)); 1697 break; 1698 default: 1699 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]); 1700 } 1701 PetscCall(MatSeqDenseInvertFactors_Private(M)); 1702 PetscCall(MatDenseGetArrayRead(M, &vals)); 1703 PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub)); 1704 PetscCall(ISGetIndices(is_sub_schur[sub], &idxs)); 1705 PetscCall(MatSetValues(S_all_inv, size_schur_sub, idxs, size_schur_sub, idxs, vals, INSERT_VALUES)); 1706 PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs)); 1707 PetscCall(MatDenseRestoreArrayRead(M, &vals)); 1708 PetscCall(MatDestroy(&M)); 1709 } 1710 PetscCall(MatAssemblyBegin(S_all_inv, MAT_FINAL_ASSEMBLY)); 1711 PetscCall(MatAssemblyEnd(S_all_inv, MAT_FINAL_ASSEMBLY)); 1712 } else { 1713 PetscCall(PetscObjectReference((PetscObject)S_all)); 1714 S_all_inv = S_all; 1715 PetscCall(MatDenseGetArray(S_all_inv, &S_data)); 1716 PetscCall(PetscBLASIntCast(size_schur, &B_N)); 1717 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1718 if (use_potr) { 1719 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr)); 1720 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 1721 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr)); 1722 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 1723 } else if (use_sytr) { 1724 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 1725 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 1726 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr)); 1727 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 1728 } else { 1729 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr)); 1730 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 1731 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 1732 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 1733 } 1734 PetscCall(PetscLogFlops(1.0 * size_schur * size_schur * size_schur)); 1735 PetscCall(PetscFPTrapPop()); 1736 PetscCall(MatDenseRestoreArray(S_all_inv, &S_data)); 1737 } 1738 } else if (sub_schurs->gdsw) { 1739 Mat tS, tX, SEj, S_II, S_IE, S_EE; 1740 KSP pS_II; 1741 PC pS_II_pc; 1742 IS EE, II; 1743 PetscInt nS; 1744 1745 PetscCall(MatFactorCreateSchurComplement(F, &tS, NULL)); 1746 PetscCall(MatGetSize(tS, &nS, NULL)); 1747 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1748 for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */ 1749 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1750 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj)); 1751 1752 PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE)); 1753 PetscCall(ISComplement(EE, 0, nS, &II)); 1754 PetscCall(MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II)); 1755 PetscCall(MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE)); 1756 PetscCall(MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE)); 1757 PetscCall(ISDestroy(&II)); 1758 PetscCall(ISDestroy(&EE)); 1759 1760 PetscCall(KSPCreate(PETSC_COMM_SELF, &pS_II)); 1761 PetscCall(KSPSetNestLevel(pS_II, 1)); /* do not have direct access to a PC to provide the level of nesting of the KSP */ 1762 PetscCall(KSPSetType(pS_II, KSPPREONLY)); 1763 PetscCall(KSPGetPC(pS_II, &pS_II_pc)); 1764 PetscCall(PCSetType(pS_II_pc, PCSVD)); 1765 PetscCall(KSPSetOptionsPrefix(pS_II, sub_schurs->prefix)); 1766 PetscCall(KSPAppendOptionsPrefix(pS_II, "pseudo_")); 1767 PetscCall(KSPSetOperators(pS_II, S_II, S_II)); 1768 PetscCall(MatDestroy(&S_II)); 1769 PetscCall(KSPSetFromOptions(pS_II)); 1770 PetscCall(KSPSetUp(pS_II)); 1771 PetscCall(MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX)); 1772 PetscCall(KSPMatSolve(pS_II, S_IE, tX)); 1773 PetscCall(KSPDestroy(&pS_II)); 1774 1775 PetscCall(MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DETERMINE, &SEj)); 1776 PetscCall(MatDestroy(&S_IE)); 1777 PetscCall(MatDestroy(&tX)); 1778 PetscCall(MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN)); 1779 PetscCall(MatDestroy(&S_EE)); 1780 1781 PetscCall(MatDestroy(&SEj)); 1782 cum += subset_size; 1783 SEjinv_arr += subset_size * subset_size; 1784 } 1785 PetscCall(MatDestroy(&tS)); 1786 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1787 } 1788 /* S_Ej_tilda_all */ 1789 cum = cum2 = 0; 1790 rS_data = NULL; 1791 if (S_all_inv && !multi_element) PetscCall(MatDenseGetArrayRead(S_all_inv, &rS_data)); 1792 PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1793 for (i = 0; i < sub_schurs->n_subs; i++) { 1794 PetscInt j; 1795 1796 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 1797 /* get (St^-1)_E */ 1798 /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1 1799 will be properly accessed later during adaptive selection */ 1800 if (multi_element) { /* transpose copy to workspace */ 1801 // XXX CSR directly? 1802 for (PetscInt j = 0; j < subset_size; j++) idx_work[j] = cum + j; 1803 PetscCall(MatGetValues(S_all_inv, subset_size, idx_work, subset_size, idx_work, work)); 1804 if (!sub_schurs->is_hermitian) { 1805 for (PetscInt k = 0; k < subset_size; k++) { 1806 for (PetscInt j = k; j < subset_size; j++) { 1807 PetscScalar t = work[k * subset_size + j]; 1808 work[k * subset_size + j] = work[j * subset_size + k]; 1809 work[j * subset_size + k] = t; 1810 } 1811 } 1812 } 1813 } else if (rS_data) { 1814 if (S_lower_triangular) { 1815 PetscInt k; 1816 if (sub_schurs->change) { 1817 for (k = 0; k < subset_size; k++) { 1818 for (j = k; j < subset_size; j++) { 1819 work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1820 work[j * subset_size + k] = work[k * subset_size + j]; 1821 } 1822 } 1823 } else { 1824 for (k = 0; k < subset_size; k++) { 1825 for (j = k; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1826 } 1827 } 1828 } else { 1829 PetscInt k; 1830 for (k = 0; k < subset_size; k++) { 1831 for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; 1832 } 1833 } 1834 } 1835 if (sub_schurs->change) { 1836 Mat change_sub, SEj, T; 1837 PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL; 1838 1839 /* change basis */ 1840 PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL)); 1841 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, (rS_data || multi_element) ? work : SEjinv_arr, &SEj)); 1842 if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */ 1843 Mat T2; 1844 PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2)); 1845 PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 1846 PetscCall(MatDestroy(&T2)); 1847 PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T)); 1848 } else { 1849 PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T)); 1850 } 1851 PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN)); 1852 PetscCall(MatDestroy(&T)); 1853 PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL)); 1854 PetscCall(MatDestroy(&SEj)); 1855 } 1856 if (rS_data || multi_element) PetscCall(PetscArraycpy(SEjinv_arr, work, subset_size * subset_size)); 1857 cum += subset_size; 1858 cum2 += subset_size * (size_schur + 1); 1859 SEjinv_arr += subset_size * subset_size; 1860 } 1861 PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr)); 1862 if (S_all_inv) { 1863 if (!multi_element) PetscCall(MatDenseRestoreArrayRead(S_all_inv, &rS_data)); 1864 if (solver_S) { 1865 if (schur_has_vertices) { 1866 PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED)); 1867 } else { 1868 PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED)); 1869 } 1870 } 1871 } 1872 PetscCall(MatDestroy(&S_all_inv)); 1873 } 1874 1875 /* move back factors if needed */ 1876 if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) { 1877 Mat S_tmp; 1878 PetscInt nd = 0; 1879 PetscScalar *data; 1880 1881 PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices"); 1882 PetscCheck(solver_S, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen"); 1883 PetscCall(MatFactorGetSchurComplement(F, &S_tmp, NULL)); 1884 PetscCall(MatDenseGetArray(S_tmp, &data)); 1885 PetscCall(PetscArrayzero(data, size_schur * size_schur)); 1886 1887 if (S_lower_triangular) { 1888 cum = 0; 1889 for (i = 0; i < size_active_schur; i++) { 1890 PetscCall(PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i)); 1891 cum += size_active_schur - i; 1892 } 1893 } else { 1894 PetscCall(PetscArraycpy(data, schur_factor, size_schur * size_schur)); 1895 } 1896 if (sub_schurs->is_dir) { 1897 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1898 for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i]; 1899 } 1900 /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions, 1901 set the diagonal entry of the Schur factor to a very large value */ 1902 for (i = size_active_schur + nd; i < size_schur; i++) data[i * (size_schur + 1)] = infty; 1903 PetscCall(MatDenseRestoreArray(S_tmp, &data)); 1904 PetscCall(MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED)); 1905 } 1906 } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */ 1907 PetscScalar *data; 1908 PetscInt nd = 0; 1909 1910 if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */ 1911 PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd)); 1912 } 1913 PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL)); 1914 PetscCall(MatDenseGetArray(S_all, &data)); 1915 for (i = 0; i < size_active_schur; i++) PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur)); 1916 for (i = size_active_schur + nd; i < size_schur; i++) { 1917 PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur)); 1918 data[i * (size_schur + 1)] = infty; 1919 } 1920 PetscCall(MatDenseRestoreArray(S_all, &data)); 1921 PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); 1922 } 1923 PetscCall(PetscFree(idx_work)); 1924 PetscCall(PetscFree(work)); 1925 PetscCall(PetscFree(schur_factor)); 1926 PetscCall(VecDestroy(&Dall)); 1927 PetscCall(ISDestroy(&is_schur)); 1928 if (multi_element) { 1929 for (PetscInt sub = 0; sub < n_local_subs; sub++) { 1930 PetscCall(ISDestroy(&is_sub_all[sub])); 1931 PetscCall(ISDestroy(&is_sub_schur_all[sub])); 1932 PetscCall(ISDestroy(&is_sub_schur[sub])); 1933 } 1934 PetscCall(PetscFree3(is_sub_all, is_sub_schur_all, is_sub_schur)); 1935 } 1936 } 1937 PetscCall(ISDestroy(&is_I_layer)); 1938 PetscCall(MatDestroy(&S_all)); 1939 PetscCall(MatDestroy(&A_BB)); 1940 PetscCall(MatDestroy(&A_IB)); 1941 PetscCall(MatDestroy(&A_BI)); 1942 PetscCall(MatDestroy(&F)); 1943 1944 PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY)); 1945 PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY)); 1946 if (compute_Stilda) { 1947 PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY)); 1948 PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY)); 1949 if (deluxe) { 1950 PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY)); 1951 PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY)); 1952 } 1953 } 1954 1955 /* Get local part of (\sum_j S_Ej) */ 1956 if (!sub_schurs->sum_S_Ej_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all)); 1957 PetscCall(VecSet(gstash, 0.0)); 1958 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray)); 1959 PetscCall(VecPlaceArray(lstash, stasharray)); 1960 PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1961 PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1962 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray)); 1963 PetscCall(VecResetArray(lstash)); 1964 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray)); 1965 PetscCall(VecPlaceArray(lstash, stasharray)); 1966 PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1967 PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1968 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray)); 1969 PetscCall(VecResetArray(lstash)); 1970 1971 /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */ 1972 if (compute_Stilda) { 1973 PetscCall(VecSet(gstash, 0.0)); 1974 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray)); 1975 PetscCall(VecPlaceArray(lstash, stasharray)); 1976 PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1977 PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1978 PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1979 PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1980 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray)); 1981 PetscCall(VecResetArray(lstash)); 1982 if (deluxe) { 1983 PetscCall(VecSet(gstash, 0.0)); 1984 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray)); 1985 PetscCall(VecPlaceArray(lstash, stasharray)); 1986 PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1987 PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD)); 1988 PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1989 PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE)); 1990 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray)); 1991 PetscCall(VecResetArray(lstash)); 1992 } else if (!sub_schurs->gdsw) { 1993 PetscScalar *array; 1994 PetscInt cum; 1995 1996 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array)); 1997 cum = 0; 1998 for (i = 0; i < sub_schurs->n_subs; i++) { 1999 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 2000 PetscCall(PetscBLASIntCast(subset_size, &B_N)); 2001 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 2002 if (use_potr) { 2003 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr)); 2004 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 2005 PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr)); 2006 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 2007 } else if (use_sytr) { 2008 PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 2009 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 2010 PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr)); 2011 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 2012 } else { 2013 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr)); 2014 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %" PetscBLASInt_FMT, B_ierr); 2015 PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr)); 2016 PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr); 2017 } 2018 PetscCall(PetscLogFlops(1.0 * subset_size * subset_size * subset_size)); 2019 PetscCall(PetscFPTrapPop()); 2020 cum += subset_size * subset_size; 2021 } 2022 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array)); 2023 PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all)); 2024 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 2025 sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all; 2026 } 2027 } 2028 PetscCall(VecDestroy(&lstash)); 2029 PetscCall(VecDestroy(&gstash)); 2030 PetscCall(VecScatterDestroy(&sstash)); 2031 2032 if (matl_dbg_viewer) { 2033 if (sub_schurs->S_Ej_all) { 2034 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE")); 2035 PetscCall(MatView(sub_schurs->S_Ej_all, matl_dbg_viewer)); 2036 } 2037 if (sub_schurs->sum_S_Ej_all) { 2038 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE")); 2039 PetscCall(MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer)); 2040 } 2041 if (sub_schurs->sum_S_Ej_inv_all) { 2042 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm")); 2043 PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer)); 2044 } 2045 if (sub_schurs->sum_S_Ej_tilda_all) { 2046 PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt")); 2047 PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer)); 2048 } 2049 } 2050 2051 /* when not explicit, we need to set the factor type */ 2052 if (sub_schurs->mat_factor_type == MAT_FACTOR_NONE) sub_schurs->mat_factor_type = sub_schurs->is_hermitian ? MAT_FACTOR_CHOLESKY : MAT_FACTOR_LU; 2053 2054 /* free workspace */ 2055 if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer)); 2056 if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n)); 2057 PetscCall(PetscViewerDestroy(&matl_dbg_viewer)); 2058 PetscCall(PetscFree2(Bwork, pivots)); 2059 PetscCall(PetscCommDestroy(&comm_n)); 2060 PetscCall(PetscFree(all_local_subid_N)); 2061 PetscFunctionReturn(PETSC_SUCCESS); 2062 } 2063 2064 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw) 2065 { 2066 IS *faces, *edges, *all_cc, vertices; 2067 PetscInt s, i, n_faces, n_edges, n_all_cc; 2068 PetscBool is_sorted, ispardiso, ismumps; 2069 2070 PetscFunctionBegin; 2071 PetscCall(ISSorted(is_I, &is_sorted)); 2072 PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_I), PETSC_ERR_PLIB, "IS for I dofs should be shorted"); 2073 PetscCall(ISSorted(is_B, &is_sorted)); 2074 PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_B), PETSC_ERR_PLIB, "IS for B dofs should be shorted"); 2075 2076 /* reset any previous data */ 2077 PetscCall(PCBDDCSubSchursReset(sub_schurs)); 2078 2079 sub_schurs->gdsw = gdsw; 2080 sub_schurs->graph = graph; 2081 2082 /* get index sets for faces and edges (already sorted by global ordering) */ 2083 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices)); 2084 n_all_cc = n_faces + n_edges; 2085 PetscCall(PetscBTCreate(n_all_cc, &sub_schurs->is_edge)); 2086 PetscCall(PetscMalloc1(n_all_cc, &all_cc)); 2087 n_all_cc = 0; 2088 for (i = 0; i < n_faces; i++) { 2089 PetscCall(ISGetSize(faces[i], &s)); 2090 if (!s) continue; 2091 if (copycc) { 2092 PetscCall(ISDuplicate(faces[i], &all_cc[n_all_cc])); 2093 } else { 2094 PetscCall(PetscObjectReference((PetscObject)faces[i])); 2095 all_cc[n_all_cc] = faces[i]; 2096 } 2097 n_all_cc++; 2098 } 2099 for (i = 0; i < n_edges; i++) { 2100 PetscCall(ISGetSize(edges[i], &s)); 2101 if (!s) continue; 2102 if (copycc) { 2103 PetscCall(ISDuplicate(edges[i], &all_cc[n_all_cc])); 2104 } else { 2105 PetscCall(PetscObjectReference((PetscObject)edges[i])); 2106 all_cc[n_all_cc] = edges[i]; 2107 } 2108 PetscCall(PetscBTSet(sub_schurs->is_edge, n_all_cc)); 2109 n_all_cc++; 2110 } 2111 PetscCall(PetscObjectReference((PetscObject)vertices)); 2112 sub_schurs->is_vertices = vertices; 2113 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices)); 2114 sub_schurs->is_dir = NULL; 2115 PetscCall(PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir)); 2116 2117 /* Determine if MatFactor can be used */ 2118 PetscCall(PetscStrallocpy(prefix, &sub_schurs->prefix)); 2119 #if defined(PETSC_HAVE_MUMPS) 2120 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type))); 2121 #elif defined(PETSC_HAVE_MKL_PARDISO) 2122 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type))); 2123 #else 2124 PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type))); 2125 #endif 2126 sub_schurs->mat_factor_type = MAT_FACTOR_NONE; 2127 sub_schurs->is_hermitian = PetscDefined(USE_COMPLEX) ? PETSC_FALSE : PETSC_TRUE; /* Hermitian Cholesky is not supported by PETSc and external packages */ 2128 sub_schurs->is_posdef = PETSC_TRUE; 2129 sub_schurs->is_symmetric = PETSC_TRUE; 2130 sub_schurs->debug = PETSC_FALSE; 2131 sub_schurs->restrict_comm = PETSC_FALSE; 2132 PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC"); 2133 PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL)); 2134 PetscCall(PetscOptionsEnum("-sub_schurs_mat_factor_type", "Factor type to use. Use MAT_FACTOR_NONE for automatic selection", NULL, MatFactorTypes, (PetscEnum)sub_schurs->mat_factor_type, (PetscEnum *)&sub_schurs->mat_factor_type, NULL)); 2135 PetscCall(PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL)); 2136 PetscCall(PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL)); 2137 PetscCall(PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL)); 2138 PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL)); 2139 PetscCall(PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL)); 2140 PetscOptionsEnd(); 2141 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps)); 2142 PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso)); 2143 sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps); 2144 2145 /* for reals, symmetric and Hermitian are synonyms */ 2146 #if !defined(PETSC_USE_COMPLEX) 2147 sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian); 2148 sub_schurs->is_hermitian = sub_schurs->is_symmetric; 2149 #endif 2150 2151 PetscCall(PetscObjectReference((PetscObject)is_I)); 2152 sub_schurs->is_I = is_I; 2153 PetscCall(PetscObjectReference((PetscObject)is_B)); 2154 sub_schurs->is_B = is_B; 2155 PetscCall(PetscObjectReference((PetscObject)graph->l2gmap)); 2156 sub_schurs->l2gmap = graph->l2gmap; 2157 PetscCall(PetscObjectReference((PetscObject)BtoNmap)); 2158 sub_schurs->BtoNmap = BtoNmap; 2159 sub_schurs->n_subs = n_all_cc; 2160 sub_schurs->is_subs = all_cc; 2161 sub_schurs->S_Ej_all = NULL; 2162 sub_schurs->sum_S_Ej_all = NULL; 2163 sub_schurs->sum_S_Ej_inv_all = NULL; 2164 sub_schurs->sum_S_Ej_tilda_all = NULL; 2165 sub_schurs->is_Ej_all = NULL; 2166 PetscFunctionReturn(PETSC_SUCCESS); 2167 } 2168 2169 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) 2170 { 2171 PCBDDCSubSchurs schurs_ctx; 2172 2173 PetscFunctionBegin; 2174 PetscCall(PetscNew(&schurs_ctx)); 2175 schurs_ctx->n_subs = 0; 2176 *sub_schurs = schurs_ctx; 2177 PetscFunctionReturn(PETSC_SUCCESS); 2178 } 2179 2180 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) 2181 { 2182 PetscFunctionBegin; 2183 if (!sub_schurs) PetscFunctionReturn(PETSC_SUCCESS); 2184 sub_schurs->graph = NULL; 2185 PetscCall(PetscFree(sub_schurs->prefix)); 2186 PetscCall(MatDestroy(&sub_schurs->A)); 2187 PetscCall(MatDestroy(&sub_schurs->S)); 2188 PetscCall(ISDestroy(&sub_schurs->is_I)); 2189 PetscCall(ISDestroy(&sub_schurs->is_B)); 2190 PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap)); 2191 PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap)); 2192 PetscCall(MatDestroy(&sub_schurs->S_Ej_all)); 2193 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all)); 2194 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all)); 2195 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all)); 2196 PetscCall(ISDestroy(&sub_schurs->is_Ej_all)); 2197 PetscCall(ISDestroy(&sub_schurs->is_vertices)); 2198 PetscCall(ISDestroy(&sub_schurs->is_dir)); 2199 PetscCall(PetscBTDestroy(&sub_schurs->is_edge)); 2200 for (PetscInt i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISDestroy(&sub_schurs->is_subs[i])); 2201 if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs)); 2202 if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver)); 2203 PetscCall(PetscFree(sub_schurs->reuse_solver)); 2204 if (sub_schurs->change) { 2205 for (PetscInt i = 0; i < sub_schurs->n_subs; i++) { 2206 PetscCall(KSPDestroy(&sub_schurs->change[i])); 2207 PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i])); 2208 } 2209 } 2210 PetscCall(PetscFree(sub_schurs->change)); 2211 PetscCall(PetscFree(sub_schurs->change_primal_sub)); 2212 sub_schurs->n_subs = 0; 2213 PetscFunctionReturn(PETSC_SUCCESS); 2214 } 2215 2216 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs) 2217 { 2218 PetscFunctionBegin; 2219 PetscCall(PCBDDCSubSchursReset(*sub_schurs)); 2220 PetscCall(PetscFree(*sub_schurs)); 2221 PetscFunctionReturn(PETSC_SUCCESS); 2222 } 2223 2224 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added) 2225 { 2226 PetscInt i, j, n; 2227 2228 PetscFunctionBegin; 2229 n = 0; 2230 for (i = -n_prev; i < 0; i++) { 2231 PetscInt start_dof = queue_tip[i]; 2232 for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) { 2233 PetscInt dof = adjncy[j]; 2234 if (!PetscBTLookup(touched, dof)) { 2235 PetscCall(PetscBTSet(touched, dof)); 2236 queue_tip[n] = dof; 2237 n++; 2238 } 2239 } 2240 } 2241 *n_added = n; 2242 PetscFunctionReturn(PETSC_SUCCESS); 2243 } 2244