1 #include <petsc/private/pcbddcimpl.h> 2 #include <petsc/private/pcbddcprivateimpl.h> 3 #include <petscblaslapack.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 6 /* prototypes for deluxe functions */ 7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC); 11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 12 13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 16 const PetscScalar *b; 17 PetscScalar *x; 18 PetscInt n; 19 PetscBLASInt nrhs, info, m; 20 PetscBool flg; 21 22 PetscFunctionBegin; 23 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 24 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 25 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 26 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 27 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 28 29 PetscCall(MatGetSize(B, NULL, &n)); 30 PetscCall(PetscBLASIntCast(n, &nrhs)); 31 PetscCall(MatDenseGetArrayRead(B, &b)); 32 PetscCall(MatDenseGetArray(X, &x)); 33 PetscCall(PetscArraycpy(x, b, m * nrhs)); 34 PetscCall(MatDenseRestoreArrayRead(B, &b)); 35 36 PetscCheck(A->factortype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported"); 37 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 38 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve"); 39 40 PetscCall(MatDenseRestoreArray(X, &x)); 41 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 42 PetscFunctionReturn(PETSC_SUCCESS); 43 } 44 45 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) 46 { 47 PC_IS *pcis = (PC_IS *)pc->data; 48 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 49 50 PetscFunctionBegin; 51 /* Apply partition of unity */ 52 PetscCall(VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector)); 53 PetscCall(VecSet(global_vector, 0.0)); 54 PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE)); 55 PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE)); 56 PetscFunctionReturn(PETSC_SUCCESS); 57 } 58 59 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) 60 { 61 PC_IS *pcis = (PC_IS *)pc->data; 62 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 63 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 64 65 PetscFunctionBegin; 66 PetscCall(VecSet(pcbddc->work_scaling, 0.0)); 67 PetscCall(VecSet(y, 0.0)); 68 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 69 PetscInt i; 70 const PetscScalar *array_x, *array_D; 71 PetscScalar *array; 72 PetscCall(VecGetArrayRead(x, &array_x)); 73 PetscCall(VecGetArrayRead(pcis->D, &array_D)); 74 PetscCall(VecGetArray(pcbddc->work_scaling, &array)); 75 for (i = 0; i < deluxe_ctx->n_simple; i++) array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]]; 76 PetscCall(VecRestoreArray(pcbddc->work_scaling, &array)); 77 PetscCall(VecRestoreArrayRead(pcis->D, &array_D)); 78 PetscCall(VecRestoreArrayRead(x, &array_x)); 79 } 80 /* sequential part : all problems and Schur applications collapsed into a single matrix-vector multiplication or a matvec and a solve */ 81 if (deluxe_ctx->seq_mat) { 82 PetscInt i; 83 for (i = 0; i < deluxe_ctx->seq_n; i++) { 84 if (deluxe_ctx->change) { 85 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 86 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 87 if (deluxe_ctx->change_with_qr) { 88 Mat change; 89 90 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 91 PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 92 } else { 93 PetscCall(KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 94 } 95 } else { 96 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 97 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 98 } 99 PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 100 if (deluxe_ctx->seq_mat_inv_sum[i]) { 101 PetscScalar *x; 102 103 PetscCall(VecGetArray(deluxe_ctx->seq_work2[i], &x)); 104 PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i], x)); 105 PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i], &x)); 106 PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 107 PetscCall(VecResetArray(deluxe_ctx->seq_work1[i])); 108 } 109 if (deluxe_ctx->change) { 110 Mat change; 111 112 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 113 PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 114 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 115 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 116 } else { 117 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 118 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 119 } 120 } 121 } 122 /* put local boundary part in global vector */ 123 PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE)); 124 PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE)); 125 PetscFunctionReturn(PETSC_SUCCESS); 126 } 127 128 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) 129 { 130 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 131 132 PetscFunctionBegin; 133 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 134 PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 2); 135 PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 3); 136 PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!"); 137 PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector)); 138 PetscFunctionReturn(PETSC_SUCCESS); 139 } 140 141 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) 142 { 143 PC_IS *pcis = (PC_IS *)pc->data; 144 145 PetscFunctionBegin; 146 PetscCall(VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD)); 147 PetscCall(VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD)); 148 /* Apply partition of unity */ 149 PetscCall(VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector)); 150 PetscFunctionReturn(PETSC_SUCCESS); 151 } 152 153 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) 154 { 155 PC_IS *pcis = (PC_IS *)pc->data; 156 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 157 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 158 159 PetscFunctionBegin; 160 /* get local boundary part of global vector */ 161 PetscCall(VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD)); 162 PetscCall(VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD)); 163 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 164 PetscInt i; 165 PetscScalar *array_y; 166 const PetscScalar *array_D; 167 PetscCall(VecGetArray(y, &array_y)); 168 PetscCall(VecGetArrayRead(pcis->D, &array_D)); 169 for (i = 0; i < deluxe_ctx->n_simple; i++) array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; 170 PetscCall(VecRestoreArrayRead(pcis->D, &array_D)); 171 PetscCall(VecRestoreArray(y, &array_y)); 172 } 173 /* sequential part : all problems and Schur applications collapsed into a single matrix-vector multiplication or a matvec and a solve */ 174 if (deluxe_ctx->seq_mat) { 175 PetscInt i; 176 for (i = 0; i < deluxe_ctx->seq_n; i++) { 177 if (deluxe_ctx->change) { 178 Mat change; 179 180 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 181 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 182 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 183 PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 184 } else { 185 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 186 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 187 } 188 if (deluxe_ctx->seq_mat_inv_sum[i]) { 189 PetscScalar *x; 190 191 PetscCall(VecGetArray(deluxe_ctx->seq_work1[i], &x)); 192 PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i], x)); 193 PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i], &x)); 194 PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 195 PetscCall(VecResetArray(deluxe_ctx->seq_work2[i])); 196 } 197 PetscCall(MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 198 if (deluxe_ctx->change) { 199 if (deluxe_ctx->change_with_qr) { 200 Mat change; 201 202 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 203 PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 204 } else { 205 PetscCall(KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 206 } 207 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE)); 208 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE)); 209 } else { 210 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE)); 211 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE)); 212 } 213 } 214 } 215 PetscFunctionReturn(PETSC_SUCCESS); 216 } 217 218 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) 219 { 220 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 221 222 PetscFunctionBegin; 223 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 224 PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 2); 225 PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 3); 226 PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!"); 227 PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector)); 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 PetscErrorCode PCBDDCScalingSetUp(PC pc) 232 { 233 PC_IS *pcis = (PC_IS *)pc->data; 234 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 235 236 PetscFunctionBegin; 237 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 238 PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0)); 239 /* create work vector for the operator */ 240 PetscCall(VecDestroy(&pcbddc->work_scaling)); 241 PetscCall(VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling)); 242 /* always rebuild pcis->D */ 243 if (pcis->use_stiffness_scaling) { 244 PetscScalar *a; 245 PetscInt i, n; 246 247 PetscCall(MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N)); 248 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD)); 249 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD)); 250 PetscCall(VecAbs(pcis->D)); 251 PetscCall(VecGetLocalSize(pcis->D, &n)); 252 PetscCall(VecGetArray(pcis->D, &a)); 253 for (i = 0; i < n; i++) 254 if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0; 255 PetscCall(VecRestoreArray(pcis->D, &a)); 256 } 257 PetscCall(VecSet(pcis->vec1_global, 0.0)); 258 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 259 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 260 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 261 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 262 PetscCall(VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B)); 263 /* now setup */ 264 if (pcbddc->use_deluxe_scaling) { 265 if (!pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingCreate_Deluxe(pc)); 266 PetscCall(PCBDDCScalingSetUp_Deluxe(pc)); 267 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe)); 268 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe)); 269 } else { 270 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic)); 271 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic)); 272 } 273 PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0)); 274 275 /* test */ 276 if (pcbddc->dbg_flag) { 277 Mat B0_B = NULL; 278 Vec B0_Bv = NULL, B0_Bv2 = NULL; 279 Vec vec2_global; 280 PetscViewer viewer = pcbddc->dbg_viewer; 281 PetscReal error; 282 283 /* extension -> from local to parallel */ 284 PetscCall(VecSet(pcis->vec1_global, 0.0)); 285 PetscCall(VecSetRandom(pcis->vec1_B, NULL)); 286 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 287 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 288 PetscCall(VecDuplicate(pcis->vec1_global, &vec2_global)); 289 PetscCall(VecCopy(pcis->vec1_global, vec2_global)); 290 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 291 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 292 if (pcbddc->benign_n) { 293 IS is_dummy; 294 295 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 296 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 297 PetscCall(ISDestroy(&is_dummy)); 298 PetscCall(MatCreateVecs(B0_B, NULL, &B0_Bv)); 299 PetscCall(VecDuplicate(B0_Bv, &B0_Bv2)); 300 PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv)); 301 } 302 PetscCall(PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global)); 303 if (pcbddc->benign_saddle_point) { 304 PetscReal errorl = 0.; 305 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 306 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 307 if (pcbddc->benign_n) { 308 PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv2)); 309 PetscCall(VecAXPY(B0_Bv, -1.0, B0_Bv2)); 310 PetscCall(VecNorm(B0_Bv, NORM_INFINITY, &errorl)); 311 } 312 PetscCall(MPIU_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc))); 313 PetscCall(PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error)); 314 } 315 PetscCall(VecAXPY(pcis->vec1_global, -1.0, vec2_global)); 316 PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error)); 317 PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error)); 318 PetscCall(VecDestroy(&vec2_global)); 319 320 /* restriction -> from parallel to local */ 321 PetscCall(VecSet(pcis->vec1_global, 0.0)); 322 PetscCall(VecSetRandom(pcis->vec1_B, NULL)); 323 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 324 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 325 PetscCall(PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B)); 326 PetscCall(VecScale(pcis->vec1_B, -1.0)); 327 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 328 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 329 PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error)); 330 PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error)); 331 PetscCall(MatDestroy(&B0_B)); 332 PetscCall(VecDestroy(&B0_Bv)); 333 PetscCall(VecDestroy(&B0_Bv2)); 334 } 335 PetscFunctionReturn(PETSC_SUCCESS); 336 } 337 338 PetscErrorCode PCBDDCScalingDestroy(PC pc) 339 { 340 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 341 342 PetscFunctionBegin; 343 if (pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingDestroy_Deluxe(pc)); 344 PetscCall(VecDestroy(&pcbddc->work_scaling)); 345 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL)); 346 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL)); 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) 351 { 352 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 353 PCBDDCDeluxeScaling deluxe_ctx; 354 355 PetscFunctionBegin; 356 PetscCall(PetscNew(&deluxe_ctx)); 357 pcbddc->deluxe_ctx = deluxe_ctx; 358 PetscFunctionReturn(PETSC_SUCCESS); 359 } 360 361 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) 362 { 363 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 364 365 PetscFunctionBegin; 366 PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx)); 367 PetscCall(PetscFree(pcbddc->deluxe_ctx)); 368 PetscFunctionReturn(PETSC_SUCCESS); 369 } 370 371 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) 372 { 373 PetscInt i; 374 375 PetscFunctionBegin; 376 PetscCall(PetscFree(deluxe_ctx->idx_simple_B)); 377 deluxe_ctx->n_simple = 0; 378 for (i = 0; i < deluxe_ctx->seq_n; i++) { 379 PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i])); 380 PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i])); 381 PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i])); 382 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 383 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 384 } 385 PetscCall(PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum)); 386 PetscCall(PetscFree(deluxe_ctx->workspace)); 387 deluxe_ctx->seq_n = 0; 388 PetscFunctionReturn(PETSC_SUCCESS); 389 } 390 391 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) 392 { 393 PC_IS *pcis = (PC_IS *)pc->data; 394 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 395 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 396 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 397 398 PetscFunctionBegin; 399 /* reset data structures if the topology has changed */ 400 if (pcbddc->recompute_topography) PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx)); 401 402 /* Compute data structures to solve sequential problems */ 403 PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc)); 404 405 /* diagonal scaling on interface dofs not contained in cc */ 406 if (sub_schurs->is_vertices || sub_schurs->is_dir) { 407 PetscInt n_com, n_dir; 408 n_com = 0; 409 if (sub_schurs->is_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_com)); 410 n_dir = 0; 411 if (sub_schurs->is_dir) PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir)); 412 if (!deluxe_ctx->n_simple) { 413 deluxe_ctx->n_simple = n_dir + n_com; 414 PetscCall(PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B)); 415 if (sub_schurs->is_vertices) { 416 PetscInt nmap; 417 const PetscInt *idxs; 418 419 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 420 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B)); 421 PetscCheck(nmap == n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_com); 422 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 423 } 424 if (sub_schurs->is_dir) { 425 PetscInt nmap; 426 const PetscInt *idxs; 427 428 PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs)); 429 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com)); 430 PetscCheck(nmap == n_dir, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_dir); 431 PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs)); 432 } 433 PetscCall(PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B)); 434 } else { 435 PetscCheck(deluxe_ctx->n_simple == n_dir + n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT, n_dir + n_com, deluxe_ctx->n_simple); 436 } 437 } else { 438 deluxe_ctx->n_simple = 0; 439 deluxe_ctx->idx_simple_B = NULL; 440 } 441 PetscFunctionReturn(PETSC_SUCCESS); 442 } 443 444 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) 445 { 446 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 447 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 448 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 449 PetscScalar *matdata, *matdata2; 450 PetscInt i, max_subset_size, cum, cum2; 451 const PetscInt *idxs; 452 PetscBool newsetup = PETSC_FALSE; 453 454 PetscFunctionBegin; 455 PetscCheck(sub_schurs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Missing PCBDDCSubSchurs"); 456 if (!sub_schurs->n_subs) PetscFunctionReturn(PETSC_SUCCESS); 457 458 /* Allocate arrays for subproblems */ 459 if (!deluxe_ctx->seq_n) { 460 deluxe_ctx->seq_n = sub_schurs->n_subs; 461 PetscCall(PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum)); 462 newsetup = PETSC_TRUE; 463 } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT, deluxe_ctx->seq_n, sub_schurs->n_subs); 464 465 /* the change of basis is just a reference to sub_schurs->change (if any) */ 466 deluxe_ctx->change = sub_schurs->change; 467 deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; 468 469 /* Create objects for deluxe */ 470 max_subset_size = 0; 471 for (i = 0; i < sub_schurs->n_subs; i++) { 472 PetscInt subset_size; 473 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 474 max_subset_size = PetscMax(subset_size, max_subset_size); 475 } 476 if (newsetup) PetscCall(PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace)); 477 cum = cum2 = 0; 478 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs)); 479 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata)); 480 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2)); 481 for (i = 0; i < deluxe_ctx->seq_n; i++) { 482 PetscInt subset_size; 483 484 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 485 if (newsetup) { 486 IS sub; 487 /* work vectors */ 488 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i])); 489 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i])); 490 491 /* scatters */ 492 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub)); 493 PetscCall(VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i])); 494 PetscCall(ISDestroy(&sub)); 495 } 496 497 /* S_E_j */ 498 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 499 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i])); 500 501 /* \sum_k S^k_E_j */ 502 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 503 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i])); 504 PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef)); 505 PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian)); 506 if (sub_schurs->is_hermitian) { 507 PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL)); 508 } else { 509 PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL)); 510 } 511 if (pcbddc->deluxe_singlemat) { 512 Mat X, Y; 513 if (!sub_schurs->is_hermitian) { 514 PetscCall(MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X)); 515 } else { 516 PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i])); 517 X = deluxe_ctx->seq_mat[i]; 518 } 519 PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y)); 520 if (!sub_schurs->is_hermitian) { 521 PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y)); 522 } else { 523 PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y)); 524 } 525 526 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 527 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 528 PetscCall(MatDestroy(&X)); 529 if (deluxe_ctx->change) { 530 Mat C, CY; 531 PetscCheck(deluxe_ctx->change_with_qr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only QR based change of basis"); 532 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &C, NULL)); 533 PetscCall(MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CY)); 534 PetscCall(MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 535 PetscCall(MatDestroy(&CY)); 536 PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */ 537 } 538 PetscCall(MatTranspose(Y, MAT_INPLACE_MATRIX, &Y)); 539 deluxe_ctx->seq_mat[i] = Y; 540 } 541 cum += subset_size; 542 cum2 += subset_size * subset_size; 543 } 544 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs)); 545 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata)); 546 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2)); 547 if (pcbddc->deluxe_singlemat) { 548 deluxe_ctx->change = NULL; 549 deluxe_ctx->change_with_qr = PETSC_FALSE; 550 } 551 552 if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { 553 for (i = 0; i < deluxe_ctx->seq_n; i++) { 554 if (newsetup) { 555 PC pc; 556 557 PetscCall(KSPGetPC(deluxe_ctx->change[i], &pc)); 558 PetscCall(PCSetType(pc, PCLU)); 559 PetscCall(KSPSetFromOptions(deluxe_ctx->change[i])); 560 } 561 PetscCall(KSPSetUp(deluxe_ctx->change[i])); 562 } 563 } 564 PetscFunctionReturn(PETSC_SUCCESS); 565 } 566