1 #include <petsc/private/pcbddcimpl.h> 2 #include <petsc/private/pcbddcprivateimpl.h> 3 #include <petscblaslapack.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 6 /* prototypes for deluxe functions */ 7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC); 8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC); 9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC); 10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC); 11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling); 12 13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X) 14 { 15 Mat_SeqDense *mat = (Mat_SeqDense *)A->data; 16 const PetscScalar *b; 17 PetscScalar *x; 18 PetscInt n; 19 PetscBLASInt nrhs, info, m; 20 PetscBool flg; 21 22 PetscFunctionBegin; 23 PetscCall(PetscBLASIntCast(A->rmap->n, &m)); 24 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 25 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 26 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 27 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 28 29 PetscCall(MatGetSize(B, NULL, &n)); 30 PetscCall(PetscBLASIntCast(n, &nrhs)); 31 PetscCall(MatDenseGetArrayRead(B, &b)); 32 PetscCall(MatDenseGetArray(X, &x)); 33 PetscCall(PetscArraycpy(x, b, m * nrhs)); 34 PetscCall(MatDenseRestoreArrayRead(B, &b)); 35 36 if (A->factortype == MAT_FACTOR_LU) { 37 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info)); 38 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve"); 39 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported"); 40 41 PetscCall(MatDenseRestoreArray(X, &x)); 42 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m))); 43 PetscFunctionReturn(0); 44 } 45 46 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector) 47 { 48 PC_IS *pcis = (PC_IS *)pc->data; 49 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 50 51 PetscFunctionBegin; 52 /* Apply partition of unity */ 53 PetscCall(VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector)); 54 PetscCall(VecSet(global_vector, 0.0)); 55 PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE)); 56 PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE)); 57 PetscFunctionReturn(0); 58 } 59 60 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y) 61 { 62 PC_IS *pcis = (PC_IS *)pc->data; 63 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 64 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 65 66 PetscFunctionBegin; 67 PetscCall(VecSet(pcbddc->work_scaling, 0.0)); 68 PetscCall(VecSet(y, 0.0)); 69 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 70 PetscInt i; 71 const PetscScalar *array_x, *array_D; 72 PetscScalar *array; 73 PetscCall(VecGetArrayRead(x, &array_x)); 74 PetscCall(VecGetArrayRead(pcis->D, &array_D)); 75 PetscCall(VecGetArray(pcbddc->work_scaling, &array)); 76 for (i = 0; i < deluxe_ctx->n_simple; i++) array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]]; 77 PetscCall(VecRestoreArray(pcbddc->work_scaling, &array)); 78 PetscCall(VecRestoreArrayRead(pcis->D, &array_D)); 79 PetscCall(VecRestoreArrayRead(x, &array_x)); 80 } 81 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 82 if (deluxe_ctx->seq_mat) { 83 PetscInt i; 84 for (i = 0; i < deluxe_ctx->seq_n; i++) { 85 if (deluxe_ctx->change) { 86 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 87 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 88 if (deluxe_ctx->change_with_qr) { 89 Mat change; 90 91 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 92 PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 93 } else { 94 PetscCall(KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 95 } 96 } else { 97 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 98 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 99 } 100 PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 101 if (deluxe_ctx->seq_mat_inv_sum[i]) { 102 PetscScalar *x; 103 104 PetscCall(VecGetArray(deluxe_ctx->seq_work2[i], &x)); 105 PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i], x)); 106 PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i], &x)); 107 PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 108 PetscCall(VecResetArray(deluxe_ctx->seq_work1[i])); 109 } 110 if (deluxe_ctx->change) { 111 Mat change; 112 113 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 114 PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 115 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 116 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 117 } else { 118 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 119 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE)); 120 } 121 } 122 } 123 /* put local boundary part in global vector */ 124 PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE)); 125 PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE)); 126 PetscFunctionReturn(0); 127 } 128 129 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector) 130 { 131 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 132 133 PetscFunctionBegin; 134 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 135 PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 2); 136 PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 3); 137 PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!"); 138 PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector)); 139 PetscFunctionReturn(0); 140 } 141 142 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector) 143 { 144 PC_IS *pcis = (PC_IS *)pc->data; 145 146 PetscFunctionBegin; 147 PetscCall(VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD)); 148 PetscCall(VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD)); 149 /* Apply partition of unity */ 150 PetscCall(VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector)); 151 PetscFunctionReturn(0); 152 } 153 154 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y) 155 { 156 PC_IS *pcis = (PC_IS *)pc->data; 157 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 158 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 159 160 PetscFunctionBegin; 161 /* get local boundary part of global vector */ 162 PetscCall(VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD)); 163 PetscCall(VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD)); 164 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */ 165 PetscInt i; 166 PetscScalar *array_y; 167 const PetscScalar *array_D; 168 PetscCall(VecGetArray(y, &array_y)); 169 PetscCall(VecGetArrayRead(pcis->D, &array_D)); 170 for (i = 0; i < deluxe_ctx->n_simple; i++) array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]]; 171 PetscCall(VecRestoreArrayRead(pcis->D, &array_D)); 172 PetscCall(VecRestoreArray(y, &array_y)); 173 } 174 /* sequential part : all problems and Schur applications collapsed into a single matrix vector multiplication or a matvec and a solve */ 175 if (deluxe_ctx->seq_mat) { 176 PetscInt i; 177 for (i = 0; i < deluxe_ctx->seq_n; i++) { 178 if (deluxe_ctx->change) { 179 Mat change; 180 181 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 182 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD)); 183 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 184 PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 185 } else { 186 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 187 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD)); 188 } 189 if (deluxe_ctx->seq_mat_inv_sum[i]) { 190 PetscScalar *x; 191 192 PetscCall(VecGetArray(deluxe_ctx->seq_work1[i], &x)); 193 PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i], x)); 194 PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i], &x)); 195 PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 196 PetscCall(VecResetArray(deluxe_ctx->seq_work2[i])); 197 } 198 PetscCall(MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i])); 199 if (deluxe_ctx->change) { 200 if (deluxe_ctx->change_with_qr) { 201 Mat change; 202 203 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL)); 204 PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 205 } else { 206 PetscCall(KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i])); 207 } 208 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE)); 209 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE)); 210 } else { 211 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE)); 212 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE)); 213 } 214 } 215 } 216 PetscFunctionReturn(0); 217 } 218 219 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector) 220 { 221 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 222 223 PetscFunctionBegin; 224 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 225 PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 2); 226 PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 3); 227 PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!"); 228 PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector)); 229 PetscFunctionReturn(0); 230 } 231 232 PetscErrorCode PCBDDCScalingSetUp(PC pc) 233 { 234 PC_IS *pcis = (PC_IS *)pc->data; 235 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 236 237 PetscFunctionBegin; 238 PetscValidHeaderSpecific(pc, PC_CLASSID, 1); 239 PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0)); 240 /* create work vector for the operator */ 241 PetscCall(VecDestroy(&pcbddc->work_scaling)); 242 PetscCall(VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling)); 243 /* always rebuild pcis->D */ 244 if (pcis->use_stiffness_scaling) { 245 PetscScalar *a; 246 PetscInt i, n; 247 248 PetscCall(MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N)); 249 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD)); 250 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD)); 251 PetscCall(VecAbs(pcis->D)); 252 PetscCall(VecGetLocalSize(pcis->D, &n)); 253 PetscCall(VecGetArray(pcis->D, &a)); 254 for (i = 0; i < n; i++) 255 if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0; 256 PetscCall(VecRestoreArray(pcis->D, &a)); 257 } 258 PetscCall(VecSet(pcis->vec1_global, 0.0)); 259 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 260 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 261 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 262 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 263 PetscCall(VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B)); 264 /* now setup */ 265 if (pcbddc->use_deluxe_scaling) { 266 if (!pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingCreate_Deluxe(pc)); 267 PetscCall(PCBDDCScalingSetUp_Deluxe(pc)); 268 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe)); 269 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe)); 270 } else { 271 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic)); 272 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic)); 273 } 274 PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0)); 275 276 /* test */ 277 if (pcbddc->dbg_flag) { 278 Mat B0_B = NULL; 279 Vec B0_Bv = NULL, B0_Bv2 = NULL; 280 Vec vec2_global; 281 PetscViewer viewer = pcbddc->dbg_viewer; 282 PetscReal error; 283 284 /* extension -> from local to parallel */ 285 PetscCall(VecSet(pcis->vec1_global, 0.0)); 286 PetscCall(VecSetRandom(pcis->vec1_B, NULL)); 287 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 288 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 289 PetscCall(VecDuplicate(pcis->vec1_global, &vec2_global)); 290 PetscCall(VecCopy(pcis->vec1_global, vec2_global)); 291 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 292 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 293 if (pcbddc->benign_n) { 294 IS is_dummy; 295 296 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy)); 297 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B)); 298 PetscCall(ISDestroy(&is_dummy)); 299 PetscCall(MatCreateVecs(B0_B, NULL, &B0_Bv)); 300 PetscCall(VecDuplicate(B0_Bv, &B0_Bv2)); 301 PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv)); 302 } 303 PetscCall(PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global)); 304 if (pcbddc->benign_saddle_point) { 305 PetscReal errorl = 0.; 306 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 307 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD)); 308 if (pcbddc->benign_n) { 309 PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv2)); 310 PetscCall(VecAXPY(B0_Bv, -1.0, B0_Bv2)); 311 PetscCall(VecNorm(B0_Bv, NORM_INFINITY, &errorl)); 312 } 313 PetscCallMPI(MPI_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc))); 314 PetscCall(PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error)); 315 } 316 PetscCall(VecAXPY(pcis->vec1_global, -1.0, vec2_global)); 317 PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error)); 318 PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error)); 319 PetscCall(VecDestroy(&vec2_global)); 320 321 /* restriction -> from parallel to local */ 322 PetscCall(VecSet(pcis->vec1_global, 0.0)); 323 PetscCall(VecSetRandom(pcis->vec1_B, NULL)); 324 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 325 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 326 PetscCall(PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B)); 327 PetscCall(VecScale(pcis->vec1_B, -1.0)); 328 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 329 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE)); 330 PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error)); 331 PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error)); 332 PetscCall(MatDestroy(&B0_B)); 333 PetscCall(VecDestroy(&B0_Bv)); 334 PetscCall(VecDestroy(&B0_Bv2)); 335 } 336 PetscFunctionReturn(0); 337 } 338 339 PetscErrorCode PCBDDCScalingDestroy(PC pc) 340 { 341 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 342 343 PetscFunctionBegin; 344 if (pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingDestroy_Deluxe(pc)); 345 PetscCall(VecDestroy(&pcbddc->work_scaling)); 346 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL)); 347 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL)); 348 PetscFunctionReturn(0); 349 } 350 351 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc) 352 { 353 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 354 PCBDDCDeluxeScaling deluxe_ctx; 355 356 PetscFunctionBegin; 357 PetscCall(PetscNew(&deluxe_ctx)); 358 pcbddc->deluxe_ctx = deluxe_ctx; 359 PetscFunctionReturn(0); 360 } 361 362 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc) 363 { 364 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 365 366 PetscFunctionBegin; 367 PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx)); 368 PetscCall(PetscFree(pcbddc->deluxe_ctx)); 369 PetscFunctionReturn(0); 370 } 371 372 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx) 373 { 374 PetscInt i; 375 376 PetscFunctionBegin; 377 PetscCall(PetscFree(deluxe_ctx->idx_simple_B)); 378 deluxe_ctx->n_simple = 0; 379 for (i = 0; i < deluxe_ctx->seq_n; i++) { 380 PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i])); 381 PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i])); 382 PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i])); 383 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 384 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 385 } 386 PetscCall(PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum)); 387 PetscCall(PetscFree(deluxe_ctx->workspace)); 388 deluxe_ctx->seq_n = 0; 389 PetscFunctionReturn(0); 390 } 391 392 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc) 393 { 394 PC_IS *pcis = (PC_IS *)pc->data; 395 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 396 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 397 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 398 399 PetscFunctionBegin; 400 /* reset data structures if the topology has changed */ 401 if (pcbddc->recompute_topography) PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx)); 402 403 /* Compute data structures to solve sequential problems */ 404 PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc)); 405 406 /* diagonal scaling on interface dofs not contained in cc */ 407 if (sub_schurs->is_vertices || sub_schurs->is_dir) { 408 PetscInt n_com, n_dir; 409 n_com = 0; 410 if (sub_schurs->is_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_com)); 411 n_dir = 0; 412 if (sub_schurs->is_dir) PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir)); 413 if (!deluxe_ctx->n_simple) { 414 deluxe_ctx->n_simple = n_dir + n_com; 415 PetscCall(PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B)); 416 if (sub_schurs->is_vertices) { 417 PetscInt nmap; 418 const PetscInt *idxs; 419 420 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs)); 421 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B)); 422 PetscCheck(nmap == n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_com); 423 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs)); 424 } 425 if (sub_schurs->is_dir) { 426 PetscInt nmap; 427 const PetscInt *idxs; 428 429 PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs)); 430 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com)); 431 PetscCheck(nmap == n_dir, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_dir); 432 PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs)); 433 } 434 PetscCall(PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B)); 435 } else { 436 PetscCheck(deluxe_ctx->n_simple == n_dir + n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT, n_dir + n_com, deluxe_ctx->n_simple); 437 } 438 } else { 439 deluxe_ctx->n_simple = 0; 440 deluxe_ctx->idx_simple_B = NULL; 441 } 442 PetscFunctionReturn(0); 443 } 444 445 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc) 446 { 447 PC_BDDC *pcbddc = (PC_BDDC *)pc->data; 448 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx; 449 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 450 PetscScalar *matdata, *matdata2; 451 PetscInt i, max_subset_size, cum, cum2; 452 const PetscInt *idxs; 453 PetscBool newsetup = PETSC_FALSE; 454 455 PetscFunctionBegin; 456 PetscCheck(sub_schurs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Missing PCBDDCSubSchurs"); 457 if (!sub_schurs->n_subs) PetscFunctionReturn(0); 458 459 /* Allocate arrays for subproblems */ 460 if (!deluxe_ctx->seq_n) { 461 deluxe_ctx->seq_n = sub_schurs->n_subs; 462 PetscCall(PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum)); 463 newsetup = PETSC_TRUE; 464 } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT, deluxe_ctx->seq_n, sub_schurs->n_subs); 465 466 /* the change of basis is just a reference to sub_schurs->change (if any) */ 467 deluxe_ctx->change = sub_schurs->change; 468 deluxe_ctx->change_with_qr = sub_schurs->change_with_qr; 469 470 /* Create objects for deluxe */ 471 max_subset_size = 0; 472 for (i = 0; i < sub_schurs->n_subs; i++) { 473 PetscInt subset_size; 474 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 475 max_subset_size = PetscMax(subset_size, max_subset_size); 476 } 477 if (newsetup) PetscCall(PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace)); 478 cum = cum2 = 0; 479 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs)); 480 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata)); 481 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2)); 482 for (i = 0; i < deluxe_ctx->seq_n; i++) { 483 PetscInt subset_size; 484 485 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size)); 486 if (newsetup) { 487 IS sub; 488 /* work vectors */ 489 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i])); 490 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i])); 491 492 /* scatters */ 493 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub)); 494 PetscCall(VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i])); 495 PetscCall(ISDestroy(&sub)); 496 } 497 498 /* S_E_j */ 499 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 500 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i])); 501 502 /* \sum_k S^k_E_j */ 503 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 504 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i])); 505 PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef)); 506 PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian)); 507 if (sub_schurs->is_hermitian) { 508 PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL)); 509 } else { 510 PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL)); 511 } 512 if (pcbddc->deluxe_singlemat) { 513 Mat X, Y; 514 if (!sub_schurs->is_hermitian) { 515 PetscCall(MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X)); 516 } else { 517 PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i])); 518 X = deluxe_ctx->seq_mat[i]; 519 } 520 PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y)); 521 if (!sub_schurs->is_hermitian) { 522 PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y)); 523 } else { 524 PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y)); 525 } 526 527 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i])); 528 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i])); 529 PetscCall(MatDestroy(&X)); 530 if (deluxe_ctx->change) { 531 Mat C, CY; 532 PetscCheck(deluxe_ctx->change_with_qr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only QR based change of basis"); 533 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &C, NULL)); 534 PetscCall(MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &CY)); 535 PetscCall(MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y)); 536 PetscCall(MatDestroy(&CY)); 537 PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */ 538 } 539 PetscCall(MatTranspose(Y, MAT_INPLACE_MATRIX, &Y)); 540 deluxe_ctx->seq_mat[i] = Y; 541 } 542 cum += subset_size; 543 cum2 += subset_size * subset_size; 544 } 545 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs)); 546 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata)); 547 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2)); 548 if (pcbddc->deluxe_singlemat) { 549 deluxe_ctx->change = NULL; 550 deluxe_ctx->change_with_qr = PETSC_FALSE; 551 } 552 553 if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) { 554 for (i = 0; i < deluxe_ctx->seq_n; i++) { 555 if (newsetup) { 556 PC pc; 557 558 PetscCall(KSPGetPC(deluxe_ctx->change[i], &pc)); 559 PetscCall(PCSetType(pc, PCLU)); 560 PetscCall(KSPSetFromOptions(deluxe_ctx->change[i])); 561 } 562 PetscCall(KSPSetUp(deluxe_ctx->change[i])); 563 } 564 } 565 PetscFunctionReturn(0); 566 } 567