1 #include <petsc/private/pcbddcimpl.h> 2 #include <petsc/private/pcbddcprivateimpl.h> 3 #include <petscblaslapack.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 6 /* prototypes for deluxe functions */ 7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC); 11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 12 13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X) { 14 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 15 const PetscScalar *b; 16 PetscScalar *x; 17 PetscInt n; 18 PetscBLASInt nrhs, info, m; 19 PetscBool flg; 20 21 PetscFunctionBegin; 22 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 23 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 24 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 25 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 26 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 27 28 PetscCall(MatGetSize(B, NULL, &n)); 29 PetscCall(PetscBLASIntCast(n, &nrhs)); 30 PetscCall(MatDenseGetArrayRead(B, &b)); 31 PetscCall(MatDenseGetArray(X, &x)); 32 PetscCall(PetscArraycpy(x, b, m * nrhs)); 33 PetscCall(MatDenseRestoreArrayRead(B, &b)); 34 35 if (A->factortype == MAT_FACTOR_LU) { 36 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 37 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve"); 38 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported"); 39 40 PetscCall(MatDenseRestoreArray(X, &x)); 41 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 42 PetscFunctionReturn(0); 43 } 44 45 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) { 46 PC_IS *pcis = (PC_IS *)pc->data; 47 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 48 49 PetscFunctionBegin; 50 /* Apply partition of unity */ 51 PetscCall(VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector)); 52 PetscCall(VecSet(global_vector, 0.0)); 53 PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE)); 54 PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE)); 55 PetscFunctionReturn(0); 56 } 57 58 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) { 59 PC_IS *pcis = (PC_IS *)pc->data; 60 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 61 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 62 63 PetscFunctionBegin; 64 PetscCall(VecSet(pcbddc->work_scaling, 0.0)); 65 PetscCall(VecSet(y, 0.0)); 66 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 67 PetscInt i; 68 const PetscScalar *array_x, *array_D; 69 PetscScalar *array; 70 PetscCall(VecGetArrayRead(x, &array_x)); 71 PetscCall(VecGetArrayRead(pcis->D, &array_D)); 72 PetscCall(VecGetArray(pcbddc->work_scaling, &array)); 73 for (i = 0; i < deluxe_ctx->n_simple; i++) array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]]; 74 PetscCall(VecRestoreArray(pcbddc->work_scaling, &array)); 75 PetscCall(VecRestoreArrayRead(pcis->D, &array_D)); 76 PetscCall(VecRestoreArrayRead(x, &array_x)); 77 } 78 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 79 if (deluxe_ctx->seq_mat) { 80 PetscInt i; 81 for (i = 0; i < deluxe_ctx->seq_n; i++) { 82 if (deluxe_ctx->change) { 83 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 84 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 85 if (deluxe_ctx->change_with_qr) { 86 Mat change; 87 88 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 89 PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 90 } else { 91 PetscCall(KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 92 } 93 } else { 94 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 95 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 96 } 97 PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 98 if (deluxe_ctx->seq_mat_inv_sum[i]) { 99 PetscScalar *x; 100 101 PetscCall(VecGetArray(deluxe_ctx->seq_work2[i], &x)); 102 PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i], x)); 103 PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i], &x)); 104 PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 105 PetscCall(VecResetArray(deluxe_ctx->seq_work1[i])); 106 } 107 if (deluxe_ctx->change) { 108 Mat change; 109 110 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 111 PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 112 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 113 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 114 } else { 115 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 116 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 117 } 118 } 119 } 120 /* put local boundary part in global vector */ 121 PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE)); 122 PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE)); 123 PetscFunctionReturn(0); 124 } 125 126 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) { 127 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 128 129 PetscFunctionBegin; 130 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 131 PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 2); 132 PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 3); 133 PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!"); 134 PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector)); 135 PetscFunctionReturn(0); 136 } 137 138 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) { 139 PC_IS *pcis = (PC_IS *)pc->data; 140 141 PetscFunctionBegin; 142 PetscCall(VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD)); 143 PetscCall(VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD)); 144 /* Apply partition of unity */ 145 PetscCall(VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector)); 146 PetscFunctionReturn(0); 147 } 148 149 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) { 150 PC_IS *pcis = (PC_IS *)pc->data; 151 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 152 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 153 154 PetscFunctionBegin; 155 /* get local boundary part of global vector */ 156 PetscCall(VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD)); 157 PetscCall(VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD)); 158 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 159 PetscInt i; 160 PetscScalar *array_y; 161 const PetscScalar *array_D; 162 PetscCall(VecGetArray(y, &array_y)); 163 PetscCall(VecGetArrayRead(pcis->D, &array_D)); 164 for (i = 0; i < deluxe_ctx->n_simple; i++) array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; 165 PetscCall(VecRestoreArrayRead(pcis->D, &array_D)); 166 PetscCall(VecRestoreArray(y, &array_y)); 167 } 168 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 169 if (deluxe_ctx->seq_mat) { 170 PetscInt i; 171 for (i = 0; i < deluxe_ctx->seq_n; i++) { 172 if (deluxe_ctx->change) { 173 Mat change; 174 175 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 176 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 177 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 178 PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 179 } else { 180 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 181 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 182 } 183 if (deluxe_ctx->seq_mat_inv_sum[i]) { 184 PetscScalar *x; 185 186 PetscCall(VecGetArray(deluxe_ctx->seq_work1[i], &x)); 187 PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i], x)); 188 PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i], &x)); 189 PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 190 PetscCall(VecResetArray(deluxe_ctx->seq_work2[i])); 191 } 192 PetscCall(MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 193 if (deluxe_ctx->change) { 194 if (deluxe_ctx->change_with_qr) { 195 Mat change; 196 197 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 198 PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 199 } else { 200 PetscCall(KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 201 } 202 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE)); 203 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE)); 204 } else { 205 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE)); 206 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE)); 207 } 208 } 209 } 210 PetscFunctionReturn(0); 211 } 212 213 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) { 214 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 215 216 PetscFunctionBegin; 217 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 218 PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 2); 219 PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 3); 220 PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!"); 221 PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector)); 222 PetscFunctionReturn(0); 223 } 224 225 PetscErrorCode PCBDDCScalingSetUp(PC pc) { 226 PC_IS *pcis = (PC_IS *)pc->data; 227 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 228 229 PetscFunctionBegin; 230 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 231 PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0)); 232 /* create work vector for the operator */ 233 PetscCall(VecDestroy(&pcbddc->work_scaling)); 234 PetscCall(VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling)); 235 /* always rebuild pcis->D */ 236 if (pcis->use_stiffness_scaling) { 237 PetscScalar *a; 238 PetscInt i, n; 239 240 PetscCall(MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N)); 241 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD)); 242 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD)); 243 PetscCall(VecAbs(pcis->D)); 244 PetscCall(VecGetLocalSize(pcis->D, &n)); 245 PetscCall(VecGetArray(pcis->D, &a)); 246 for (i = 0; i < n; i++) 247 if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0; 248 PetscCall(VecRestoreArray(pcis->D, &a)); 249 } 250 PetscCall(VecSet(pcis->vec1_global, 0.0)); 251 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 252 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 253 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 254 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 255 PetscCall(VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B)); 256 /* now setup */ 257 if (pcbddc->use_deluxe_scaling) { 258 if (!pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingCreate_Deluxe(pc)); 259 PetscCall(PCBDDCScalingSetUp_Deluxe(pc)); 260 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe)); 261 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe)); 262 } else { 263 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic)); 264 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic)); 265 } 266 PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0)); 267 268 /* test */ 269 if (pcbddc->dbg_flag) { 270 Mat B0_B = NULL; 271 Vec B0_Bv = NULL, B0_Bv2 = NULL; 272 Vec vec2_global; 273 PetscViewer viewer = pcbddc->dbg_viewer; 274 PetscReal error; 275 276 /* extension -> from local to parallel */ 277 PetscCall(VecSet(pcis->vec1_global, 0.0)); 278 PetscCall(VecSetRandom(pcis->vec1_B, NULL)); 279 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 280 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 281 PetscCall(VecDuplicate(pcis->vec1_global, &vec2_global)); 282 PetscCall(VecCopy(pcis->vec1_global, vec2_global)); 283 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 284 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 285 if (pcbddc->benign_n) { 286 IS is_dummy; 287 288 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 289 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 290 PetscCall(ISDestroy(&is_dummy)); 291 PetscCall(MatCreateVecs(B0_B, NULL, &B0_Bv)); 292 PetscCall(VecDuplicate(B0_Bv, &B0_Bv2)); 293 PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv)); 294 } 295 PetscCall(PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global)); 296 if (pcbddc->benign_saddle_point) { 297 PetscReal errorl = 0.; 298 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 299 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 300 if (pcbddc->benign_n) { 301 PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv2)); 302 PetscCall(VecAXPY(B0_Bv, -1.0, B0_Bv2)); 303 PetscCall(VecNorm(B0_Bv, NORM_INFINITY, &errorl)); 304 } 305 PetscCallMPI(MPI_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc))); 306 PetscCall(PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error)); 307 } 308 PetscCall(VecAXPY(pcis->vec1_global, -1.0, vec2_global)); 309 PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error)); 310 PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error)); 311 PetscCall(VecDestroy(&vec2_global)); 312 313 /* restriction -> from parallel to local */ 314 PetscCall(VecSet(pcis->vec1_global, 0.0)); 315 PetscCall(VecSetRandom(pcis->vec1_B, NULL)); 316 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 317 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 318 PetscCall(PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B)); 319 PetscCall(VecScale(pcis->vec1_B, -1.0)); 320 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 321 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 322 PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error)); 323 PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error)); 324 PetscCall(MatDestroy(&B0_B)); 325 PetscCall(VecDestroy(&B0_Bv)); 326 PetscCall(VecDestroy(&B0_Bv2)); 327 } 328 PetscFunctionReturn(0); 329 } 330 331 PetscErrorCode PCBDDCScalingDestroy(PC pc) { 332 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 333 334 PetscFunctionBegin; 335 if (pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingDestroy_Deluxe(pc)); 336 PetscCall(VecDestroy(&pcbddc->work_scaling)); 337 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL)); 338 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL)); 339 PetscFunctionReturn(0); 340 } 341 342 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) { 343 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 344 PCBDDCDeluxeScaling deluxe_ctx; 345 346 PetscFunctionBegin; 347 PetscCall(PetscNew(&deluxe_ctx)); 348 pcbddc->deluxe_ctx = deluxe_ctx; 349 PetscFunctionReturn(0); 350 } 351 352 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) { 353 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 354 355 PetscFunctionBegin; 356 PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx)); 357 PetscCall(PetscFree(pcbddc->deluxe_ctx)); 358 PetscFunctionReturn(0); 359 } 360 361 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) { 362 PetscInt i; 363 364 PetscFunctionBegin; 365 PetscCall(PetscFree(deluxe_ctx->idx_simple_B)); 366 deluxe_ctx->n_simple = 0; 367 for (i = 0; i < deluxe_ctx->seq_n; i++) { 368 PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i])); 369 PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i])); 370 PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i])); 371 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 372 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 373 } 374 PetscCall(PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum)); 375 PetscCall(PetscFree(deluxe_ctx->workspace)); 376 deluxe_ctx->seq_n = 0; 377 PetscFunctionReturn(0); 378 } 379 380 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) { 381 PC_IS *pcis = (PC_IS *)pc->data; 382 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 383 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 384 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 385 386 PetscFunctionBegin; 387 /* reset data structures if the topology has changed */ 388 if (pcbddc->recompute_topography) PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx)); 389 390 /* Compute data structures to solve sequential problems */ 391 PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc)); 392 393 /* diagonal scaling on interface dofs not contained in cc */ 394 if (sub_schurs->is_vertices || sub_schurs->is_dir) { 395 PetscInt n_com, n_dir; 396 n_com = 0; 397 if (sub_schurs->is_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_com)); 398 n_dir = 0; 399 if (sub_schurs->is_dir) PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir)); 400 if (!deluxe_ctx->n_simple) { 401 deluxe_ctx->n_simple = n_dir + n_com; 402 PetscCall(PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B)); 403 if (sub_schurs->is_vertices) { 404 PetscInt nmap; 405 const PetscInt *idxs; 406 407 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 408 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B)); 409 PetscCheck(nmap == n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_com); 410 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 411 } 412 if (sub_schurs->is_dir) { 413 PetscInt nmap; 414 const PetscInt *idxs; 415 416 PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs)); 417 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com)); 418 PetscCheck(nmap == n_dir, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_dir); 419 PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs)); 420 } 421 PetscCall(PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B)); 422 } else { 423 PetscCheck(deluxe_ctx->n_simple == n_dir + n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT, n_dir + n_com, deluxe_ctx->n_simple); 424 } 425 } else { 426 deluxe_ctx->n_simple = 0; 427 deluxe_ctx->idx_simple_B = NULL; 428 } 429 PetscFunctionReturn(0); 430 } 431 432 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) { 433 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 434 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 435 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 436 PetscScalar *matdata, *matdata2; 437 PetscInt i, max_subset_size, cum, cum2; 438 const PetscInt *idxs; 439 PetscBool newsetup = PETSC_FALSE; 440 441 PetscFunctionBegin; 442 PetscCheck(sub_schurs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Missing PCBDDCSubSchurs"); 443 if (!sub_schurs->n_subs) PetscFunctionReturn(0); 444 445 /* Allocate arrays for subproblems */ 446 if (!deluxe_ctx->seq_n) { 447 deluxe_ctx->seq_n = sub_schurs->n_subs; 448 PetscCall(PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum)); 449 newsetup = PETSC_TRUE; 450 } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT, deluxe_ctx->seq_n, sub_schurs->n_subs); 451 452 /* the change of basis is just a reference to sub_schurs->change (if any) */ 453 deluxe_ctx->change = sub_schurs->change; 454 deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; 455 456 /* Create objects for deluxe */ 457 max_subset_size = 0; 458 for (i = 0; i < sub_schurs->n_subs; i++) { 459 PetscInt subset_size; 460 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 461 max_subset_size = PetscMax(subset_size, max_subset_size); 462 } 463 if (newsetup) PetscCall(PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace)); 464 cum = cum2 = 0; 465 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs)); 466 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata)); 467 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2)); 468 for (i = 0; i < deluxe_ctx->seq_n; i++) { 469 PetscInt subset_size; 470 471 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 472 if (newsetup) { 473 IS sub; 474 /* work vectors */ 475 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i])); 476 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i])); 477 478 /* scatters */ 479 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub)); 480 PetscCall(VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i])); 481 PetscCall(ISDestroy(&sub)); 482 } 483 484 /* S_E_j */ 485 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 486 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i])); 487 488 /* \sum_k S^k_E_j */ 489 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 490 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i])); 491 PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef)); 492 PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian)); 493 if (sub_schurs->is_hermitian) { 494 PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL)); 495 } else { 496 PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL)); 497 } 498 if (pcbddc->deluxe_singlemat) { 499 Mat X, Y; 500 if (!sub_schurs->is_hermitian) { 501 PetscCall(MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X)); 502 } else { 503 PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i])); 504 X = deluxe_ctx->seq_mat[i]; 505 } 506 PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y)); 507 if (!sub_schurs->is_hermitian) { 508 PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y)); 509 } else { 510 PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y)); 511 } 512 513 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 514 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 515 PetscCall(MatDestroy(&X)); 516 if (deluxe_ctx->change) { 517 Mat C, CY; 518 PetscCheck(deluxe_ctx->change_with_qr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only QR based change of basis"); 519 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &C, NULL)); 520 PetscCall(MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CY)); 521 PetscCall(MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 522 PetscCall(MatDestroy(&CY)); 523 PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */ 524 } 525 PetscCall(MatTranspose(Y, MAT_INPLACE_MATRIX, &Y)); 526 deluxe_ctx->seq_mat[i] = Y; 527 } 528 cum += subset_size; 529 cum2 += subset_size * subset_size; 530 } 531 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs)); 532 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata)); 533 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2)); 534 if (pcbddc->deluxe_singlemat) { 535 deluxe_ctx->change = NULL; 536 deluxe_ctx->change_with_qr = PETSC_FALSE; 537 } 538 539 if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { 540 for (i = 0; i < deluxe_ctx->seq_n; i++) { 541 if (newsetup) { 542 PC pc; 543 544 PetscCall(KSPGetPC(deluxe_ctx->change[i], &pc)); 545 PetscCall(PCSetType(pc, PCLU)); 546 PetscCall(KSPSetFromOptions(deluxe_ctx->change[i])); 547 } 548 PetscCall(KSPSetUp(deluxe_ctx->change[i])); 549 } 550 } 551 PetscFunctionReturn(0); 552 } 553