1 #include <petsc/private/pcbddcimpl.h>
2 #include <petsc/private/pcbddcprivateimpl.h>
3 #include <petscblaslapack.h>
4 #include <../src/mat/impls/dense/seq/dense.h>
5
6 /* prototypes for deluxe functions */
7 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC);
8 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC);
9 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC);
10 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC);
11 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling);
12
PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)13 static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A, Mat B, Mat X)
14 {
15 Mat_SeqDense *mat = (Mat_SeqDense *)A->data;
16 const PetscScalar *b;
17 PetscScalar *x;
18 PetscInt n;
19 PetscBLASInt nrhs, info, m;
20 PetscBool flg;
21
22 PetscFunctionBegin;
23 PetscCall(PetscBLASIntCast(A->rmap->n, &m));
24 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
25 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
26 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
27 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
28
29 PetscCall(MatGetSize(B, NULL, &n));
30 PetscCall(PetscBLASIntCast(n, &nrhs));
31 PetscCall(MatDenseGetArrayRead(B, &b));
32 PetscCall(MatDenseGetArray(X, &x));
33 PetscCall(PetscArraycpy(x, b, m * nrhs));
34 PetscCall(MatDenseRestoreArrayRead(B, &b));
35
36 PetscCheck(A->factortype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only LU factor supported");
37 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &m, &nrhs, mat->v, &mat->lda, mat->pivots, x, &m, &info));
38 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "GETRS - Bad solve");
39
40 PetscCall(MatDenseRestoreArray(X, &x));
41 PetscCall(PetscLogFlops(nrhs * (2.0 * m * m - m)));
42 PetscFunctionReturn(PETSC_SUCCESS);
43 }
44
PCBDDCScalingExtension_Basic(PC pc,Vec local_interface_vector,Vec global_vector)45 static PetscErrorCode PCBDDCScalingExtension_Basic(PC pc, Vec local_interface_vector, Vec global_vector)
46 {
47 PC_IS *pcis = (PC_IS *)pc->data;
48 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
49
50 PetscFunctionBegin;
51 /* Apply partition of unity */
52 PetscCall(VecPointwiseMult(pcbddc->work_scaling, pcis->D, local_interface_vector));
53 PetscCall(VecSet(global_vector, 0.0));
54 PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE));
55 PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, global_vector, ADD_VALUES, SCATTER_REVERSE));
56 PetscFunctionReturn(PETSC_SUCCESS);
57 }
58
PCBDDCScalingExtension_Deluxe(PC pc,Vec x,Vec y)59 static PetscErrorCode PCBDDCScalingExtension_Deluxe(PC pc, Vec x, Vec y)
60 {
61 PC_IS *pcis = (PC_IS *)pc->data;
62 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
63 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
64
65 PetscFunctionBegin;
66 PetscCall(VecSet(pcbddc->work_scaling, 0.0));
67 PetscCall(VecSet(y, 0.0));
68 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
69 PetscInt i;
70 const PetscScalar *array_x, *array_D;
71 PetscScalar *array;
72 PetscCall(VecGetArrayRead(x, &array_x));
73 PetscCall(VecGetArrayRead(pcis->D, &array_D));
74 PetscCall(VecGetArray(pcbddc->work_scaling, &array));
75 for (i = 0; i < deluxe_ctx->n_simple; i++) array[deluxe_ctx->idx_simple_B[i]] = array_x[deluxe_ctx->idx_simple_B[i]] * array_D[deluxe_ctx->idx_simple_B[i]];
76 PetscCall(VecRestoreArray(pcbddc->work_scaling, &array));
77 PetscCall(VecRestoreArrayRead(pcis->D, &array_D));
78 PetscCall(VecRestoreArrayRead(x, &array_x));
79 }
80 /* sequential part : all problems and Schur applications collapsed into a single matrix-vector multiplication or a matvec and a solve */
81 if (deluxe_ctx->seq_mat) {
82 PetscInt i;
83 for (i = 0; i < deluxe_ctx->seq_n; i++) {
84 if (deluxe_ctx->change) {
85 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
86 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
87 if (deluxe_ctx->change_with_qr) {
88 Mat change;
89
90 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
91 PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
92 } else {
93 PetscCall(KSPSolve(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
94 }
95 } else {
96 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
97 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], x, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
98 }
99 PetscCall(MatMultTranspose(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
100 if (deluxe_ctx->seq_mat_inv_sum[i]) {
101 PetscScalar *x;
102
103 PetscCall(VecGetArray(deluxe_ctx->seq_work2[i], &x));
104 PetscCall(VecPlaceArray(deluxe_ctx->seq_work1[i], x));
105 PetscCall(VecRestoreArray(deluxe_ctx->seq_work2[i], &x));
106 PetscCall(MatSolveTranspose(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
107 PetscCall(VecResetArray(deluxe_ctx->seq_work1[i]));
108 }
109 if (deluxe_ctx->change) {
110 Mat change;
111
112 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
113 PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
114 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
115 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
116 } else {
117 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
118 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], pcbddc->work_scaling, INSERT_VALUES, SCATTER_REVERSE));
119 }
120 }
121 }
122 /* put local boundary part in global vector */
123 PetscCall(VecScatterBegin(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE));
124 PetscCall(VecScatterEnd(pcis->global_to_B, pcbddc->work_scaling, y, ADD_VALUES, SCATTER_REVERSE));
125 PetscFunctionReturn(PETSC_SUCCESS);
126 }
127
PCBDDCScalingExtension(PC pc,Vec local_interface_vector,Vec global_vector)128 PetscErrorCode PCBDDCScalingExtension(PC pc, Vec local_interface_vector, Vec global_vector)
129 {
130 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
131
132 PetscFunctionBegin;
133 PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
134 PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 2);
135 PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 3);
136 PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!");
137 PetscUseMethod(pc, "PCBDDCScalingExtension_C", (PC, Vec, Vec), (pc, local_interface_vector, global_vector));
138 PetscFunctionReturn(PETSC_SUCCESS);
139 }
140
PCBDDCScalingRestriction_Basic(PC pc,Vec global_vector,Vec local_interface_vector)141 static PetscErrorCode PCBDDCScalingRestriction_Basic(PC pc, Vec global_vector, Vec local_interface_vector)
142 {
143 PC_IS *pcis = (PC_IS *)pc->data;
144
145 PetscFunctionBegin;
146 PetscCall(VecScatterBegin(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD));
147 PetscCall(VecScatterEnd(pcis->global_to_B, global_vector, local_interface_vector, INSERT_VALUES, SCATTER_FORWARD));
148 /* Apply partition of unity */
149 PetscCall(VecPointwiseMult(local_interface_vector, pcis->D, local_interface_vector));
150 PetscFunctionReturn(PETSC_SUCCESS);
151 }
152
PCBDDCScalingRestriction_Deluxe(PC pc,Vec x,Vec y)153 static PetscErrorCode PCBDDCScalingRestriction_Deluxe(PC pc, Vec x, Vec y)
154 {
155 PC_IS *pcis = (PC_IS *)pc->data;
156 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
157 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
158
159 PetscFunctionBegin;
160 /* get local boundary part of global vector */
161 PetscCall(VecScatterBegin(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD));
162 PetscCall(VecScatterEnd(pcis->global_to_B, x, y, INSERT_VALUES, SCATTER_FORWARD));
163 if (deluxe_ctx->n_simple) { /* scale deluxe vertices using diagonal scaling */
164 PetscInt i;
165 PetscScalar *array_y;
166 const PetscScalar *array_D;
167 PetscCall(VecGetArray(y, &array_y));
168 PetscCall(VecGetArrayRead(pcis->D, &array_D));
169 for (i = 0; i < deluxe_ctx->n_simple; i++) array_y[deluxe_ctx->idx_simple_B[i]] *= array_D[deluxe_ctx->idx_simple_B[i]];
170 PetscCall(VecRestoreArrayRead(pcis->D, &array_D));
171 PetscCall(VecRestoreArray(y, &array_y));
172 }
173 /* sequential part : all problems and Schur applications collapsed into a single matrix-vector multiplication or a matvec and a solve */
174 if (deluxe_ctx->seq_mat) {
175 PetscInt i;
176 for (i = 0; i < deluxe_ctx->seq_n; i++) {
177 if (deluxe_ctx->change) {
178 Mat change;
179
180 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
181 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work2[i], INSERT_VALUES, SCATTER_FORWARD));
182 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
183 PetscCall(MatMultTranspose(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
184 } else {
185 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
186 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], y, deluxe_ctx->seq_work1[i], INSERT_VALUES, SCATTER_FORWARD));
187 }
188 if (deluxe_ctx->seq_mat_inv_sum[i]) {
189 PetscScalar *x;
190
191 PetscCall(VecGetArray(deluxe_ctx->seq_work1[i], &x));
192 PetscCall(VecPlaceArray(deluxe_ctx->seq_work2[i], x));
193 PetscCall(VecRestoreArray(deluxe_ctx->seq_work1[i], &x));
194 PetscCall(MatSolve(deluxe_ctx->seq_mat_inv_sum[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
195 PetscCall(VecResetArray(deluxe_ctx->seq_work2[i]));
196 }
197 PetscCall(MatMult(deluxe_ctx->seq_mat[i], deluxe_ctx->seq_work1[i], deluxe_ctx->seq_work2[i]));
198 if (deluxe_ctx->change) {
199 if (deluxe_ctx->change_with_qr) {
200 Mat change;
201
202 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &change, NULL));
203 PetscCall(MatMult(change, deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
204 } else {
205 PetscCall(KSPSolveTranspose(deluxe_ctx->change[i], deluxe_ctx->seq_work2[i], deluxe_ctx->seq_work1[i]));
206 }
207 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE));
208 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work1[i], y, INSERT_VALUES, SCATTER_REVERSE));
209 } else {
210 PetscCall(VecScatterBegin(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE));
211 PetscCall(VecScatterEnd(deluxe_ctx->seq_scctx[i], deluxe_ctx->seq_work2[i], y, INSERT_VALUES, SCATTER_REVERSE));
212 }
213 }
214 }
215 PetscFunctionReturn(PETSC_SUCCESS);
216 }
217
PCBDDCScalingRestriction(PC pc,Vec global_vector,Vec local_interface_vector)218 PetscErrorCode PCBDDCScalingRestriction(PC pc, Vec global_vector, Vec local_interface_vector)
219 {
220 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
221
222 PetscFunctionBegin;
223 PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
224 PetscValidHeaderSpecific(global_vector, VEC_CLASSID, 2);
225 PetscValidHeaderSpecific(local_interface_vector, VEC_CLASSID, 3);
226 PetscCheck(local_interface_vector != pcbddc->work_scaling, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vector cannot be pcbddc->work_scaling!");
227 PetscUseMethod(pc, "PCBDDCScalingRestriction_C", (PC, Vec, Vec), (pc, global_vector, local_interface_vector));
228 PetscFunctionReturn(PETSC_SUCCESS);
229 }
230
PCBDDCScalingSetUp(PC pc)231 PetscErrorCode PCBDDCScalingSetUp(PC pc)
232 {
233 PC_IS *pcis = (PC_IS *)pc->data;
234 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
235
236 PetscFunctionBegin;
237 PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
238 PetscCall(PetscLogEventBegin(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0));
239 /* create work vector for the operator */
240 PetscCall(VecDestroy(&pcbddc->work_scaling));
241 PetscCall(VecDuplicate(pcis->vec1_B, &pcbddc->work_scaling));
242 /* always rebuild pcis->D */
243 if (pcis->use_stiffness_scaling) {
244 PetscScalar *a;
245 PetscInt i, n;
246
247 PetscCall(MatGetDiagonal(pcbddc->local_mat, pcis->vec1_N));
248 PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD));
249 PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->D, INSERT_VALUES, SCATTER_FORWARD));
250 PetscCall(VecAbs(pcis->D));
251 PetscCall(VecGetLocalSize(pcis->D, &n));
252 PetscCall(VecGetArray(pcis->D, &a));
253 for (i = 0; i < n; i++)
254 if (PetscAbsScalar(a[i]) < PETSC_SMALL) a[i] = 1.0;
255 PetscCall(VecRestoreArray(pcis->D, &a));
256 }
257 PetscCall(VecSet(pcis->vec1_global, 0.0));
258 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
259 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->D, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
260 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
261 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
262 PetscCall(VecPointwiseDivide(pcis->D, pcis->D, pcis->vec1_B));
263 /* now setup */
264 if (pcbddc->use_deluxe_scaling) {
265 if (!pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingCreate_Deluxe(pc));
266 PetscCall(PCBDDCScalingSetUp_Deluxe(pc));
267 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Deluxe));
268 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Deluxe));
269 } else {
270 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", PCBDDCScalingRestriction_Basic));
271 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", PCBDDCScalingExtension_Basic));
272 }
273 PetscCall(PetscLogEventEnd(PC_BDDC_Scaling[pcbddc->current_level], pc, 0, 0, 0));
274
275 /* test */
276 if (pcbddc->dbg_flag) {
277 Mat B0_B = NULL;
278 Vec B0_Bv = NULL, B0_Bv2 = NULL;
279 Vec vec2_global;
280 PetscViewer viewer = pcbddc->dbg_viewer;
281 PetscReal error;
282
283 /* extension -> from local to parallel */
284 PetscCall(VecSet(pcis->vec1_global, 0.0));
285 PetscCall(VecSetRandom(pcis->vec1_B, NULL));
286 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
287 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
288 PetscCall(VecDuplicate(pcis->vec1_global, &vec2_global));
289 PetscCall(VecCopy(pcis->vec1_global, vec2_global));
290 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
291 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
292 if (pcbddc->benign_n) {
293 IS is_dummy;
294
295 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
296 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
297 PetscCall(ISDestroy(&is_dummy));
298 PetscCall(MatCreateVecs(B0_B, NULL, &B0_Bv));
299 PetscCall(VecDuplicate(B0_Bv, &B0_Bv2));
300 PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv));
301 }
302 PetscCall(PCBDDCScalingExtension(pc, pcis->vec1_B, pcis->vec1_global));
303 if (pcbddc->benign_saddle_point) {
304 PetscReal errorl = 0.;
305 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
306 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_global, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
307 if (pcbddc->benign_n) {
308 PetscCall(MatMult(B0_B, pcis->vec1_B, B0_Bv2));
309 PetscCall(VecAXPY(B0_Bv, -1.0, B0_Bv2));
310 PetscCall(VecNorm(B0_Bv, NORM_INFINITY, &errorl));
311 }
312 PetscCallMPI(MPIU_Allreduce(&errorl, &error, 1, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)pc)));
313 PetscCall(PetscViewerASCIIPrintf(viewer, "Error benign extension %1.14e\n", (double)error));
314 }
315 PetscCall(VecAXPY(pcis->vec1_global, -1.0, vec2_global));
316 PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error));
317 PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling extension %1.14e\n", (double)error));
318 PetscCall(VecDestroy(&vec2_global));
319
320 /* restriction -> from parallel to local */
321 PetscCall(VecSet(pcis->vec1_global, 0.0));
322 PetscCall(VecSetRandom(pcis->vec1_B, NULL));
323 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
324 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
325 PetscCall(PCBDDCScalingRestriction(pc, pcis->vec1_global, pcis->vec1_B));
326 PetscCall(VecScale(pcis->vec1_B, -1.0));
327 PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
328 PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
329 PetscCall(VecNorm(pcis->vec1_global, NORM_INFINITY, &error));
330 PetscCall(PetscViewerASCIIPrintf(viewer, "Error scaling restriction %1.14e\n", (double)error));
331 PetscCall(MatDestroy(&B0_B));
332 PetscCall(VecDestroy(&B0_Bv));
333 PetscCall(VecDestroy(&B0_Bv2));
334 }
335 PetscFunctionReturn(PETSC_SUCCESS);
336 }
337
PCBDDCScalingDestroy(PC pc)338 PetscErrorCode PCBDDCScalingDestroy(PC pc)
339 {
340 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
341
342 PetscFunctionBegin;
343 if (pcbddc->deluxe_ctx) PetscCall(PCBDDCScalingDestroy_Deluxe(pc));
344 PetscCall(VecDestroy(&pcbddc->work_scaling));
345 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingRestriction_C", NULL));
346 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCBDDCScalingExtension_C", NULL));
347 PetscFunctionReturn(PETSC_SUCCESS);
348 }
349
PCBDDCScalingCreate_Deluxe(PC pc)350 static PetscErrorCode PCBDDCScalingCreate_Deluxe(PC pc)
351 {
352 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
353 PCBDDCDeluxeScaling deluxe_ctx;
354
355 PetscFunctionBegin;
356 PetscCall(PetscNew(&deluxe_ctx));
357 pcbddc->deluxe_ctx = deluxe_ctx;
358 PetscFunctionReturn(PETSC_SUCCESS);
359 }
360
PCBDDCScalingDestroy_Deluxe(PC pc)361 static PetscErrorCode PCBDDCScalingDestroy_Deluxe(PC pc)
362 {
363 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
364
365 PetscFunctionBegin;
366 PetscCall(PCBDDCScalingReset_Deluxe_Solvers(pcbddc->deluxe_ctx));
367 PetscCall(PetscFree(pcbddc->deluxe_ctx));
368 PetscFunctionReturn(PETSC_SUCCESS);
369 }
370
PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)371 static PetscErrorCode PCBDDCScalingReset_Deluxe_Solvers(PCBDDCDeluxeScaling deluxe_ctx)
372 {
373 PetscInt i;
374
375 PetscFunctionBegin;
376 PetscCall(PetscFree(deluxe_ctx->idx_simple_B));
377 deluxe_ctx->n_simple = 0;
378 for (i = 0; i < deluxe_ctx->seq_n; i++) {
379 PetscCall(VecScatterDestroy(&deluxe_ctx->seq_scctx[i]));
380 PetscCall(VecDestroy(&deluxe_ctx->seq_work1[i]));
381 PetscCall(VecDestroy(&deluxe_ctx->seq_work2[i]));
382 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
383 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
384 }
385 PetscCall(PetscFree5(deluxe_ctx->seq_scctx, deluxe_ctx->seq_work1, deluxe_ctx->seq_work2, deluxe_ctx->seq_mat, deluxe_ctx->seq_mat_inv_sum));
386 PetscCall(PetscFree(deluxe_ctx->workspace));
387 deluxe_ctx->seq_n = 0;
388 PetscFunctionReturn(PETSC_SUCCESS);
389 }
390
PCBDDCScalingSetUp_Deluxe(PC pc)391 static PetscErrorCode PCBDDCScalingSetUp_Deluxe(PC pc)
392 {
393 PC_IS *pcis = (PC_IS *)pc->data;
394 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
395 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
396 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
397
398 PetscFunctionBegin;
399 /* reset data structures if the topology has changed */
400 if (pcbddc->recompute_topography) PetscCall(PCBDDCScalingReset_Deluxe_Solvers(deluxe_ctx));
401
402 /* Compute data structures to solve sequential problems */
403 PetscCall(PCBDDCScalingSetUp_Deluxe_Private(pc));
404
405 /* diagonal scaling on interface dofs not contained in cc */
406 if (sub_schurs->is_vertices || sub_schurs->is_dir) {
407 PetscInt n_com, n_dir;
408 n_com = 0;
409 if (sub_schurs->is_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_com));
410 n_dir = 0;
411 if (sub_schurs->is_dir) PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
412 if (!deluxe_ctx->n_simple) {
413 deluxe_ctx->n_simple = n_dir + n_com;
414 PetscCall(PetscMalloc1(deluxe_ctx->n_simple, &deluxe_ctx->idx_simple_B));
415 if (n_com) {
416 PetscInt nmap;
417 const PetscInt *idxs;
418
419 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
420 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_com, idxs, &nmap, deluxe_ctx->idx_simple_B));
421 PetscCheck(nmap == n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (is_vertices)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_com);
422 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
423 }
424 if (n_dir) {
425 PetscInt nmap;
426 const PetscInt *idxs;
427
428 PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
429 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_dir, idxs, &nmap, deluxe_ctx->idx_simple_B + n_com));
430 PetscCheck(nmap == n_dir, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error when mapping simply scaled dofs (sub_schurs->is_dir)! %" PetscInt_FMT " != %" PetscInt_FMT, nmap, n_dir);
431 PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
432 }
433 PetscCall(PetscSortInt(deluxe_ctx->n_simple, deluxe_ctx->idx_simple_B));
434 } else {
435 PetscCheck(deluxe_ctx->n_simple == n_dir + n_com, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of simply scaled dofs %" PetscInt_FMT " is different from the previous one computed %" PetscInt_FMT, n_dir + n_com, deluxe_ctx->n_simple);
436 }
437 } else {
438 deluxe_ctx->n_simple = 0;
439 deluxe_ctx->idx_simple_B = NULL;
440 }
441 PetscFunctionReturn(PETSC_SUCCESS);
442 }
443
PCBDDCScalingSetUp_Deluxe_Private(PC pc)444 static PetscErrorCode PCBDDCScalingSetUp_Deluxe_Private(PC pc)
445 {
446 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
447 PCBDDCDeluxeScaling deluxe_ctx = pcbddc->deluxe_ctx;
448 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
449 PetscScalar *matdata, *matdata2;
450 PetscInt i, max_subset_size, cum, cum2;
451 const PetscInt *idxs;
452 PetscBool newsetup = PETSC_FALSE;
453
454 PetscFunctionBegin;
455 PetscCheck(sub_schurs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Missing PCBDDCSubSchurs");
456 if (!sub_schurs->n_subs) PetscFunctionReturn(PETSC_SUCCESS);
457
458 /* Allocate arrays for subproblems */
459 if (!deluxe_ctx->seq_n) {
460 deluxe_ctx->seq_n = sub_schurs->n_subs;
461 PetscCall(PetscCalloc5(deluxe_ctx->seq_n, &deluxe_ctx->seq_scctx, deluxe_ctx->seq_n, &deluxe_ctx->seq_work1, deluxe_ctx->seq_n, &deluxe_ctx->seq_work2, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat, deluxe_ctx->seq_n, &deluxe_ctx->seq_mat_inv_sum));
462 newsetup = PETSC_TRUE;
463 } else PetscCheck(deluxe_ctx->seq_n == sub_schurs->n_subs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of deluxe subproblems %" PetscInt_FMT " is different from the sub_schurs %" PetscInt_FMT, deluxe_ctx->seq_n, sub_schurs->n_subs);
464
465 /* the change of basis is just a reference to sub_schurs->change (if any) */
466 deluxe_ctx->change = sub_schurs->change;
467 deluxe_ctx->change_with_qr = sub_schurs->change_with_qr;
468
469 /* Create objects for deluxe */
470 max_subset_size = 0;
471 for (i = 0; i < sub_schurs->n_subs; i++) {
472 PetscInt subset_size;
473 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
474 max_subset_size = PetscMax(subset_size, max_subset_size);
475 }
476 if (newsetup) PetscCall(PetscMalloc1(2 * max_subset_size, &deluxe_ctx->workspace));
477 cum = cum2 = 0;
478 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
479 PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &matdata));
480 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &matdata2));
481 for (i = 0; i < deluxe_ctx->seq_n; i++) {
482 PetscInt subset_size;
483
484 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
485 if (newsetup) {
486 IS sub;
487 /* work vectors */
488 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace, &deluxe_ctx->seq_work1[i]));
489 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, deluxe_ctx->workspace + subset_size, &deluxe_ctx->seq_work2[i]));
490
491 /* scatters */
492 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subset_size, idxs + cum, PETSC_COPY_VALUES, &sub));
493 PetscCall(VecScatterCreate(pcbddc->work_scaling, sub, deluxe_ctx->seq_work1[i], NULL, &deluxe_ctx->seq_scctx[i]));
494 PetscCall(ISDestroy(&sub));
495 }
496
497 /* S_E_j */
498 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
499 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata + cum2, &deluxe_ctx->seq_mat[i]));
500
501 /* \sum_k S^k_E_j */
502 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
503 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, matdata2 + cum2, &deluxe_ctx->seq_mat_inv_sum[i]));
504 PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_SPD, sub_schurs->is_posdef));
505 PetscCall(MatSetOption(deluxe_ctx->seq_mat_inv_sum[i], MAT_HERMITIAN, sub_schurs->is_hermitian));
506 switch (sub_schurs->mat_factor_type) {
507 case MAT_FACTOR_CHOLESKY:
508 PetscCall(MatCholeskyFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL));
509 break;
510 case MAT_FACTOR_LU:
511 PetscCall(MatLUFactor(deluxe_ctx->seq_mat_inv_sum[i], NULL, NULL, NULL));
512 break;
513 default:
514 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
515 }
516 if (pcbddc->deluxe_singlemat) {
517 Mat X, Y;
518 if (!sub_schurs->is_hermitian) {
519 PetscCall(MatTranspose(deluxe_ctx->seq_mat[i], MAT_INITIAL_MATRIX, &X));
520 } else {
521 PetscCall(PetscObjectReference((PetscObject)deluxe_ctx->seq_mat[i]));
522 X = deluxe_ctx->seq_mat[i];
523 }
524 PetscCall(MatDuplicate(X, MAT_DO_NOT_COPY_VALUES, &Y));
525 if (!sub_schurs->is_hermitian) {
526 PetscCall(PCBDDCMatTransposeMatSolve_SeqDense(deluxe_ctx->seq_mat_inv_sum[i], X, Y));
527 } else {
528 PetscCall(MatMatSolve(deluxe_ctx->seq_mat_inv_sum[i], X, Y));
529 }
530 PetscCall(MatDestroy(&deluxe_ctx->seq_mat_inv_sum[i]));
531 PetscCall(MatDestroy(&deluxe_ctx->seq_mat[i]));
532 PetscCall(MatDestroy(&X));
533 if (deluxe_ctx->change) {
534 Mat C, CY;
535 PetscCheck(deluxe_ctx->change_with_qr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only QR based change of basis");
536 PetscCall(KSPGetOperators(deluxe_ctx->change[i], &C, NULL));
537 PetscCall(MatMatMult(C, Y, MAT_INITIAL_MATRIX, PETSC_CURRENT, &CY));
538 PetscCall(MatMatTransposeMult(CY, C, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y));
539 PetscCall(MatDestroy(&CY));
540 PetscCall(MatProductClear(Y)); /* clear internal matproduct structure of Y since CY is destroyed */
541 }
542 PetscCall(MatTranspose(Y, MAT_INPLACE_MATRIX, &Y));
543 deluxe_ctx->seq_mat[i] = Y;
544 }
545 cum += subset_size;
546 cum2 += subset_size * subset_size;
547 }
548 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
549 PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &matdata));
550 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &matdata2));
551 if (pcbddc->deluxe_singlemat) {
552 deluxe_ctx->change = NULL;
553 deluxe_ctx->change_with_qr = PETSC_FALSE;
554 }
555
556 if (deluxe_ctx->change && !deluxe_ctx->change_with_qr) {
557 for (i = 0; i < deluxe_ctx->seq_n; i++) {
558 if (newsetup) {
559 PC pc;
560
561 PetscCall(KSPGetPC(deluxe_ctx->change[i], &pc));
562 PetscCall(PCSetType(pc, PCLU));
563 PetscCall(KSPSetFromOptions(deluxe_ctx->change[i]));
564 }
565 PetscCall(KSPSetUp(deluxe_ctx->change[i]));
566 }
567 }
568 PetscFunctionReturn(PETSC_SUCCESS);
569 }
570