1 #include <petsc/private/pcimpl.h>
2 #include <petsc/private/matimpl.h>
3 #include <petscdm.h>
4 #include <h2opusconf.h>
5
6 /* Use GPU only if H2OPUS is configured for GPU */
7 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
8 #define PETSC_H2OPUS_USE_GPU
9 #endif
10
11 typedef struct {
12 Mat A;
13 Mat M;
14 PetscScalar s0;
15
16 /* sampler for Newton-Schultz */
17 Mat S;
18 PetscInt hyperorder;
19 Vec wns[4];
20 Mat wnsmat[4];
21
22 /* convergence testing */
23 Mat T;
24 Vec w;
25
26 /* Support for PCSetCoordinates */
27 PetscInt sdim;
28 PetscInt nlocc;
29 PetscReal *coords;
30
31 /* Newton-Schultz customization */
32 PetscInt maxits;
33 PetscReal rtol, atol;
34 PetscBool monitor;
35 PetscBool useapproximatenorms;
36 NormType normtype;
37
38 /* Used when pmat != MATH2OPUS */
39 PetscReal eta;
40 PetscInt leafsize;
41 PetscInt max_rank;
42 PetscInt bs;
43 PetscReal mrtol;
44
45 /* CPU/GPU */
46 PetscBool forcecpu;
47 PetscBool boundtocpu;
48 } PC_H2OPUS;
49
50 PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *);
51
PCH2OpusInferCoordinates_Private(PC pc)52 static PetscErrorCode PCH2OpusInferCoordinates_Private(PC pc)
53 {
54 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
55 DM dm;
56 PetscBool isdmda;
57
58 PetscFunctionBegin;
59 if (pch2opus->sdim) PetscFunctionReturn(PETSC_SUCCESS);
60 PetscCall(PCGetDM(pc, &dm));
61 if (!dm) PetscCall(MatGetDM(pc->useAmat ? pc->mat : pc->pmat, &dm));
62 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isdmda));
63 if (isdmda) {
64 Vec c;
65 const PetscScalar *coords;
66 PetscInt n, sdim;
67
68 PetscCall(DMGetCoordinates(dm, &c));
69 PetscCall(DMGetDimension(dm, &sdim));
70 PetscCall(VecGetLocalSize(c, &n));
71 PetscCall(VecGetArrayRead(c, &coords));
72 PetscCall(PCSetCoordinates(pc, sdim, n / sdim, (PetscScalar *)coords));
73 PetscCall(VecRestoreArrayRead(c, &coords));
74 }
75 PetscFunctionReturn(PETSC_SUCCESS);
76 }
77
PCReset_H2OPUS(PC pc)78 static PetscErrorCode PCReset_H2OPUS(PC pc)
79 {
80 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
81
82 PetscFunctionBegin;
83 pch2opus->sdim = 0;
84 pch2opus->nlocc = 0;
85 PetscCall(PetscFree(pch2opus->coords));
86 PetscFunctionReturn(PETSC_SUCCESS);
87 }
88
PCSetCoordinates_H2OPUS(PC pc,PetscInt sdim,PetscInt nlocc,PetscReal * coords)89 static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
90 {
91 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
92 PetscBool reset = PETSC_TRUE;
93
94 PetscFunctionBegin;
95 if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
96 PetscCall(PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset));
97 reset = (PetscBool)!reset;
98 }
99 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
100 if (reset) {
101 PetscCall(PCReset_H2OPUS(pc));
102 PetscCall(PetscMalloc1(sdim * nlocc, &pch2opus->coords));
103 PetscCall(PetscArraycpy(pch2opus->coords, coords, sdim * nlocc));
104 pch2opus->sdim = sdim;
105 pch2opus->nlocc = nlocc;
106 }
107 PetscFunctionReturn(PETSC_SUCCESS);
108 }
109
PCDestroy_H2OPUS(PC pc)110 static PetscErrorCode PCDestroy_H2OPUS(PC pc)
111 {
112 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
113
114 PetscFunctionBegin;
115 PetscCall(PCReset_H2OPUS(pc));
116 PetscCall(MatDestroy(&pch2opus->A));
117 PetscCall(MatDestroy(&pch2opus->M));
118 PetscCall(MatDestroy(&pch2opus->T));
119 PetscCall(VecDestroy(&pch2opus->w));
120 PetscCall(MatDestroy(&pch2opus->S));
121 PetscCall(VecDestroy(&pch2opus->wns[0]));
122 PetscCall(VecDestroy(&pch2opus->wns[1]));
123 PetscCall(VecDestroy(&pch2opus->wns[2]));
124 PetscCall(VecDestroy(&pch2opus->wns[3]));
125 PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
126 PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
127 PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
128 PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
129 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL));
130 PetscCall(PetscFree(pc->data));
131 PetscFunctionReturn(PETSC_SUCCESS);
132 }
133
PCSetFromOptions_H2OPUS(PC pc,PetscOptionItems PetscOptionsObject)134 static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems PetscOptionsObject)
135 {
136 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
137
138 PetscFunctionBegin;
139 PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options");
140 PetscCall(PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL));
141 PetscCall(PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL));
142 PetscCall(PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL));
143 PetscCall(PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL));
144 PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL));
145 PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL));
146 PetscCall(PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL));
147 PetscCall(PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL));
148 PetscCall(PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL));
149 PetscCall(PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL));
150 PetscCall(PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL));
151 PetscCall(PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL));
152 PetscOptionsHeadEnd();
153 PetscFunctionReturn(PETSC_SUCCESS);
154 }
155
156 typedef struct {
157 Mat A;
158 Mat M;
159 Vec w;
160 } AAtCtx;
161
MatMult_AAt(Mat A,Vec x,Vec y)162 static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
163 {
164 AAtCtx *aat;
165
166 PetscFunctionBegin;
167 PetscCall(MatShellGetContext(A, &aat));
168 /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */
169 PetscCall(MatMultTranspose(aat->A, x, aat->w));
170 PetscCall(MatMult(aat->A, aat->w, y));
171 PetscFunctionReturn(PETSC_SUCCESS);
172 }
173
PCH2OpusSetUpInit(PC pc)174 static PetscErrorCode PCH2OpusSetUpInit(PC pc)
175 {
176 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
177 Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt;
178 PetscInt M, m;
179 VecType vtype;
180 PetscReal n;
181 AAtCtx aat;
182
183 PetscFunctionBegin;
184 aat.A = A;
185 aat.M = pch2opus->M; /* unused so far */
186 PetscCall(MatCreateVecs(A, NULL, &aat.w));
187 PetscCall(MatGetSize(A, &M, NULL));
188 PetscCall(MatGetLocalSize(A, &m, NULL));
189 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt));
190 PetscCall(MatBindToCPU(AAt, pch2opus->boundtocpu));
191 PetscCall(MatShellSetOperation(AAt, MATOP_MULT, (PetscErrorCodeFn *)MatMult_AAt));
192 PetscCall(MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMult_AAt));
193 PetscCall(MatShellSetOperation(AAt, MATOP_NORM, (PetscErrorCodeFn *)MatNorm_H2OPUS));
194 PetscCall(MatGetVecType(A, &vtype));
195 PetscCall(MatShellSetVecType(AAt, vtype));
196 PetscCall(MatNorm(AAt, NORM_1, &n));
197 pch2opus->s0 = 1. / n;
198 PetscCall(VecDestroy(&aat.w));
199 PetscCall(MatDestroy(&AAt));
200 PetscFunctionReturn(PETSC_SUCCESS);
201 }
202
PCApplyKernel_H2OPUS(PC pc,Vec x,Vec y,PetscBool t)203 static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
204 {
205 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
206
207 PetscFunctionBegin;
208 if (t) PetscCall(MatMultTranspose(pch2opus->M, x, y));
209 else PetscCall(MatMult(pch2opus->M, x, y));
210 PetscFunctionReturn(PETSC_SUCCESS);
211 }
212
PCApplyMatKernel_H2OPUS(PC pc,Mat X,Mat Y,PetscBool t)213 static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
214 {
215 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
216
217 PetscFunctionBegin;
218 if (t) PetscCall(MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DETERMINE, &Y));
219 else PetscCall(MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y));
220 PetscFunctionReturn(PETSC_SUCCESS);
221 }
222
PCApplyMat_H2OPUS(PC pc,Mat X,Mat Y)223 static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
224 {
225 PetscFunctionBegin;
226 PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE));
227 PetscFunctionReturn(PETSC_SUCCESS);
228 }
229
PCApplyTransposeMat_H2OPUS(PC pc,Mat X,Mat Y)230 static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
231 {
232 PetscFunctionBegin;
233 PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE));
234 PetscFunctionReturn(PETSC_SUCCESS);
235 }
236
PCApply_H2OPUS(PC pc,Vec x,Vec y)237 static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
238 {
239 PetscFunctionBegin;
240 PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE));
241 PetscFunctionReturn(PETSC_SUCCESS);
242 }
243
PCApplyTranspose_H2OPUS(PC pc,Vec x,Vec y)244 static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
245 {
246 PetscFunctionBegin;
247 PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE));
248 PetscFunctionReturn(PETSC_SUCCESS);
249 }
250
251 /* used to test the norm of (M^-1 A - I) */
MatMultKernel_MAmI(Mat M,Vec x,Vec y,PetscBool t)252 static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
253 {
254 PC pc;
255 Mat A;
256 PC_H2OPUS *pch2opus;
257 PetscBool sideleft = PETSC_TRUE;
258
259 PetscFunctionBegin;
260 PetscCall(MatShellGetContext(M, &pc));
261 pch2opus = (PC_H2OPUS *)pc->data;
262 if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M, &pch2opus->w, NULL));
263 A = pch2opus->A;
264 PetscCall(VecBindToCPU(pch2opus->w, pch2opus->boundtocpu));
265 if (t) {
266 if (sideleft) {
267 PetscCall(PCApplyTranspose_H2OPUS(pc, x, pch2opus->w));
268 PetscCall(MatMultTranspose(A, pch2opus->w, y));
269 } else {
270 PetscCall(MatMultTranspose(A, x, pch2opus->w));
271 PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->w, y));
272 }
273 } else {
274 if (sideleft) {
275 PetscCall(MatMult(A, x, pch2opus->w));
276 PetscCall(PCApply_H2OPUS(pc, pch2opus->w, y));
277 } else {
278 PetscCall(PCApply_H2OPUS(pc, x, pch2opus->w));
279 PetscCall(MatMult(A, pch2opus->w, y));
280 }
281 }
282 PetscCall(VecAXPY(y, -1.0, x));
283 PetscFunctionReturn(PETSC_SUCCESS);
284 }
285
MatMult_MAmI(Mat A,Vec x,Vec y)286 static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
287 {
288 PetscFunctionBegin;
289 PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_FALSE));
290 PetscFunctionReturn(PETSC_SUCCESS);
291 }
292
MatMultTranspose_MAmI(Mat A,Vec x,Vec y)293 static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
294 {
295 PetscFunctionBegin;
296 PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_TRUE));
297 PetscFunctionReturn(PETSC_SUCCESS);
298 }
299
300 /* HyperPower kernel:
301 Y = R = x
302 for i = 1 . . . l - 1 do
303 R = (I - A * Xk) * R
304 Y = Y + R
305 Y = Xk * Y
306 */
MatMultKernel_Hyper(Mat M,Vec x,Vec y,PetscBool t)307 static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
308 {
309 PC pc;
310 Mat A;
311 PC_H2OPUS *pch2opus;
312
313 PetscFunctionBegin;
314 PetscCall(MatShellGetContext(M, &pc));
315 pch2opus = (PC_H2OPUS *)pc->data;
316 A = pch2opus->A;
317 PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
318 PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3]));
319 PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
320 PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
321 PetscCall(VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu));
322 PetscCall(VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu));
323 PetscCall(VecCopy(x, pch2opus->wns[0]));
324 PetscCall(VecCopy(x, pch2opus->wns[3]));
325 if (t) {
326 for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
327 PetscCall(MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1]));
328 PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2]));
329 PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
330 PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
331 }
332 PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y));
333 } else {
334 for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
335 PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
336 PetscCall(MatMult(A, pch2opus->wns[1], pch2opus->wns[2]));
337 PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
338 PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
339 }
340 PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[3], y));
341 }
342 PetscFunctionReturn(PETSC_SUCCESS);
343 }
344
MatMult_Hyper(Mat M,Vec x,Vec y)345 static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
346 {
347 PetscFunctionBegin;
348 PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_FALSE));
349 PetscFunctionReturn(PETSC_SUCCESS);
350 }
351
MatMultTranspose_Hyper(Mat M,Vec x,Vec y)352 static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
353 {
354 PetscFunctionBegin;
355 PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_TRUE));
356 PetscFunctionReturn(PETSC_SUCCESS);
357 }
358
359 /* Hyper power kernel, MatMat version */
MatMatMultKernel_Hyper(Mat M,Mat X,Mat Y,PetscBool t)360 static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
361 {
362 PC pc;
363 Mat A;
364 PC_H2OPUS *pch2opus;
365 PetscInt i;
366
367 PetscFunctionBegin;
368 PetscCall(MatShellGetContext(M, &pc));
369 pch2opus = (PC_H2OPUS *)pc->data;
370 A = pch2opus->A;
371 if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
372 PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
373 PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
374 }
375 if (!pch2opus->wnsmat[0]) {
376 PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
377 PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
378 }
379 if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
380 PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
381 PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
382 }
383 if (!pch2opus->wnsmat[2]) {
384 PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2]));
385 PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3]));
386 }
387 PetscCall(MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
388 PetscCall(MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN));
389 if (t) {
390 for (i = 0; i < pch2opus->hyperorder - 1; i++) {
391 PetscCall(MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[1]));
392 PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2]));
393 PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
394 PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
395 }
396 PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
397 } else {
398 for (i = 0; i < pch2opus->hyperorder - 1; i++) {
399 PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
400 PetscCall(MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[2]));
401 PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
402 PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
403 }
404 PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
405 }
406 PetscFunctionReturn(PETSC_SUCCESS);
407 }
408
MatMatMultNumeric_Hyper(Mat M,Mat X,Mat Y,PetscCtx ctx)409 static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, PetscCtx ctx)
410 {
411 PetscFunctionBegin;
412 PetscCall(MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE));
413 PetscFunctionReturn(PETSC_SUCCESS);
414 }
415
416 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
MatMultKernel_NS(Mat M,Vec x,Vec y,PetscBool t)417 static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
418 {
419 PC pc;
420 Mat A;
421 PC_H2OPUS *pch2opus;
422
423 PetscFunctionBegin;
424 PetscCall(MatShellGetContext(M, &pc));
425 pch2opus = (PC_H2OPUS *)pc->data;
426 A = pch2opus->A;
427 PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
428 PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
429 PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
430 if (t) {
431 PetscCall(PCApplyTranspose_H2OPUS(pc, x, y));
432 PetscCall(MatMultTranspose(A, y, pch2opus->wns[1]));
433 PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0]));
434 PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[0]));
435 } else {
436 PetscCall(PCApply_H2OPUS(pc, x, y));
437 PetscCall(MatMult(A, y, pch2opus->wns[0]));
438 PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
439 PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[1]));
440 }
441 PetscFunctionReturn(PETSC_SUCCESS);
442 }
443
MatMult_NS(Mat M,Vec x,Vec y)444 static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
445 {
446 PetscFunctionBegin;
447 PetscCall(MatMultKernel_NS(M, x, y, PETSC_FALSE));
448 PetscFunctionReturn(PETSC_SUCCESS);
449 }
450
MatMultTranspose_NS(Mat M,Vec x,Vec y)451 static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
452 {
453 PetscFunctionBegin;
454 PetscCall(MatMultKernel_NS(M, x, y, PETSC_TRUE));
455 PetscFunctionReturn(PETSC_SUCCESS);
456 }
457
458 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
MatMatMultKernel_NS(Mat M,Mat X,Mat Y,PetscBool t)459 static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
460 {
461 PC pc;
462 Mat A;
463 PC_H2OPUS *pch2opus;
464
465 PetscFunctionBegin;
466 PetscCall(MatShellGetContext(M, &pc));
467 pch2opus = (PC_H2OPUS *)pc->data;
468 A = pch2opus->A;
469 if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
470 PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
471 PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
472 }
473 if (!pch2opus->wnsmat[0]) {
474 PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
475 PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
476 }
477 if (t) {
478 PetscCall(PCApplyTransposeMat_H2OPUS(pc, X, Y));
479 PetscCall(MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[1]));
480 PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0]));
481 PetscCall(MatScale(Y, 2.));
482 PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
483 } else {
484 PetscCall(PCApplyMat_H2OPUS(pc, X, Y));
485 PetscCall(MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[0]));
486 PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
487 PetscCall(MatScale(Y, 2.));
488 PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN));
489 }
490 PetscFunctionReturn(PETSC_SUCCESS);
491 }
492
MatMatMultNumeric_NS(Mat M,Mat X,Mat Y,PetscCtx ctx)493 static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, PetscCtx ctx)
494 {
495 PetscFunctionBegin;
496 PetscCall(MatMatMultKernel_NS(M, X, Y, PETSC_FALSE));
497 PetscFunctionReturn(PETSC_SUCCESS);
498 }
499
PCH2OpusSetUpSampler_Private(PC pc)500 static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
501 {
502 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
503 Mat A = pc->useAmat ? pc->mat : pc->pmat;
504
505 PetscFunctionBegin;
506 if (!pch2opus->S) {
507 PetscInt M, N, m, n;
508
509 PetscCall(MatGetSize(A, &M, &N));
510 PetscCall(MatGetLocalSize(A, &m, &n));
511 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S));
512 PetscCall(MatSetBlockSizesFromMats(pch2opus->S, A, A));
513 #if defined(PETSC_H2OPUS_USE_GPU)
514 PetscCall(MatShellSetVecType(pch2opus->S, VECCUDA));
515 #endif
516 }
517 if (pch2opus->hyperorder >= 2) {
518 PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Hyper));
519 PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Hyper));
520 PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE));
521 PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA));
522 } else {
523 PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_NS));
524 PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_NS));
525 PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE));
526 PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA));
527 }
528 PetscCall(MatPropagateSymmetryOptions(A, pch2opus->S));
529 PetscCall(MatBindToCPU(pch2opus->S, pch2opus->boundtocpu));
530 /* XXX */
531 PetscCall(MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE));
532 PetscFunctionReturn(PETSC_SUCCESS);
533 }
534
PCSetUp_H2OPUS(PC pc)535 static PetscErrorCode PCSetUp_H2OPUS(PC pc)
536 {
537 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
538 Mat A = pc->useAmat ? pc->mat : pc->pmat;
539 NormType norm = pch2opus->normtype;
540 PetscReal initerr = 0.0, err;
541 PetscBool ish2opus;
542
543 PetscFunctionBegin;
544 if (!pch2opus->T) {
545 PetscInt M, N, m, n;
546 const char *prefix;
547
548 PetscCall(PCGetOptionsPrefix(pc, &prefix));
549 PetscCall(MatGetSize(A, &M, &N));
550 PetscCall(MatGetLocalSize(A, &m, &n));
551 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T));
552 PetscCall(MatSetBlockSizesFromMats(pch2opus->T, A, A));
553 PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT, (PetscErrorCodeFn *)MatMult_MAmI));
554 PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_MAmI));
555 PetscCall(MatShellSetOperation(pch2opus->T, MATOP_NORM, (PetscErrorCodeFn *)MatNorm_H2OPUS));
556 #if defined(PETSC_H2OPUS_USE_GPU)
557 PetscCall(MatShellSetVecType(pch2opus->T, VECCUDA));
558 #endif
559 PetscCall(MatSetOptionsPrefix(pch2opus->T, prefix));
560 PetscCall(MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_"));
561 }
562 PetscCall(MatDestroy(&pch2opus->A));
563 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus));
564 if (ish2opus) {
565 PetscCall(PetscObjectReference((PetscObject)A));
566 pch2opus->A = A;
567 } else {
568 const char *prefix;
569
570 /* See if we can infer coordinates from the DM */
571 if (!pch2opus->sdim) PetscCall(PCH2OpusInferCoordinates_Private(pc));
572 PetscCall(MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A));
573 /* XXX */
574 PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
575 PetscCall(PCGetOptionsPrefix(pc, &prefix));
576 PetscCall(MatSetOptionsPrefix(pch2opus->A, prefix));
577 PetscCall(MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_"));
578 PetscCall(MatSetFromOptions(pch2opus->A));
579 PetscCall(MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY));
580 PetscCall(MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY));
581 /* XXX */
582 PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
583
584 /* always perform construction on the GPU unless forcecpu is true */
585 PetscCall(MatBindToCPU(pch2opus->A, pch2opus->forcecpu));
586 }
587 #if defined(PETSC_H2OPUS_USE_GPU)
588 pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu;
589 #endif
590 PetscCall(MatBindToCPU(pch2opus->T, pch2opus->boundtocpu));
591 if (pch2opus->M) { /* see if we can reuse M as initial guess */
592 PetscReal reuse;
593
594 PetscCall(MatBindToCPU(pch2opus->M, pch2opus->boundtocpu));
595 PetscCall(MatNorm(pch2opus->T, norm, &reuse));
596 if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M));
597 }
598 if (!pch2opus->M) {
599 const char *prefix;
600 PetscCall(MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M));
601 PetscCall(PCGetOptionsPrefix(pc, &prefix));
602 PetscCall(MatSetOptionsPrefix(pch2opus->M, prefix));
603 PetscCall(MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_"));
604 PetscCall(MatSetFromOptions(pch2opus->M));
605 PetscCall(PCH2OpusSetUpInit(pc));
606 PetscCall(MatScale(pch2opus->M, pch2opus->s0));
607 }
608 /* A and M have the same h2 matrix nonzero structure, save on reordering routines */
609 PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE));
610 PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE));
611 if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &initerr));
612 if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
613 err = initerr;
614 if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr)));
615 if (initerr > pch2opus->atol && !pc->failedreason) {
616 PetscInt i;
617
618 PetscCall(PCH2OpusSetUpSampler_Private(pc));
619 for (i = 0; i < pch2opus->maxits; i++) {
620 Mat M;
621 const char *prefix;
622
623 PetscCall(MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M));
624 PetscCall(MatGetOptionsPrefix(pch2opus->M, &prefix));
625 PetscCall(MatSetOptionsPrefix(M, prefix));
626 PetscCall(MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE));
627 PetscCall(MatSetFromOptions(M));
628 PetscCall(MatH2OpusSetNativeMult(M, PETSC_TRUE));
629 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
630 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
631 PetscCall(MatDestroy(&pch2opus->M));
632 pch2opus->M = M;
633 if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &err));
634 if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr)));
635 if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
636 if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break;
637 }
638 }
639 /* cleanup setup workspace */
640 PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE));
641 PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE));
642 PetscCall(VecDestroy(&pch2opus->wns[0]));
643 PetscCall(VecDestroy(&pch2opus->wns[1]));
644 PetscCall(VecDestroy(&pch2opus->wns[2]));
645 PetscCall(VecDestroy(&pch2opus->wns[3]));
646 PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
647 PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
648 PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
649 PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
650 PetscFunctionReturn(PETSC_SUCCESS);
651 }
652
PCView_H2OPUS(PC pc,PetscViewer viewer)653 static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
654 {
655 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
656 PetscBool isascii;
657
658 PetscFunctionBegin;
659 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
660 if (isascii) {
661 if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
662 PetscCall(PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n"));
663 PetscCall(PetscViewerASCIIPushTab(viewer));
664 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
665 PetscCall(MatView(pch2opus->A, viewer));
666 PetscCall(PetscViewerPopFormat(viewer));
667 PetscCall(PetscViewerASCIIPopTab(viewer));
668 }
669 if (pch2opus->M) {
670 PetscCall(PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n"));
671 PetscCall(PetscViewerASCIIPushTab(viewer));
672 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
673 PetscCall(MatView(pch2opus->M, viewer));
674 PetscCall(PetscViewerPopFormat(viewer));
675 PetscCall(PetscViewerASCIIPopTab(viewer));
676 }
677 PetscCall(PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0));
678 }
679 PetscFunctionReturn(PETSC_SUCCESS);
680 }
681
682 /*MC
683 PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package.
684
685 Options Database Keys:
686 + -pc_type h2opus - pc type to "h2opus" during a call to `PCSetFromOptions()`
687 . -pc_h2opus_maxits - maximum number of iterations for Newton-Schultz
688 . -pc_h2opus_monitor - monitor Newton-Schultz convergence
689 . -pc_h2opus_atol - absolute tolerance
690 . -pc_h2opus_rtol - relative tolerance
691 . -pc_h2opus_norm_type - normtype
692 . -pc_h2opus_hyperorder - Hyper power order of sampling
693 . -pc_h2opus_leafsize - leaf size when constructed from kernel
694 . -pc_h2opus_eta - admissibility condition tolerance
695 . -pc_h2opus_maxrank - maximum rank when constructed from matvecs
696 . -pc_h2opus_samples - number of samples to be taken concurrently when constructing from matvecs
697 . -pc_h2opus_mrtol - relative tolerance for construction from sampling
698 - -pc_h2opus_forcecpu - force construction of preconditioner on CPU
699
700 Level: intermediate
701
702 .seealso: [](ch_ksp), `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()`
703 M*/
704
PCCreate_H2OPUS(PC pc)705 PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
706 {
707 PC_H2OPUS *pch2opus;
708
709 PetscFunctionBegin;
710 PetscCall(PetscNew(&pch2opus));
711 pc->data = (void *)pch2opus;
712
713 pch2opus->atol = 1.e-2;
714 pch2opus->rtol = 1.e-6;
715 pch2opus->maxits = 50;
716 pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */
717 pch2opus->normtype = NORM_2;
718
719 /* these are needed when we are sampling the pmat */
720 pch2opus->eta = PETSC_DECIDE;
721 pch2opus->leafsize = PETSC_DECIDE;
722 pch2opus->max_rank = PETSC_DECIDE;
723 pch2opus->bs = PETSC_DECIDE;
724 pch2opus->mrtol = PETSC_DECIDE;
725 pch2opus->boundtocpu = PetscDefined(H2OPUS_USE_GPU) ? PETSC_FALSE : PETSC_TRUE;
726 pc->ops->destroy = PCDestroy_H2OPUS;
727 pc->ops->setup = PCSetUp_H2OPUS;
728 pc->ops->apply = PCApply_H2OPUS;
729 pc->ops->matapply = PCApplyMat_H2OPUS;
730 pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
731 pc->ops->reset = PCReset_H2OPUS;
732 pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
733 pc->ops->view = PCView_H2OPUS;
734
735 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS));
736 PetscFunctionReturn(PETSC_SUCCESS);
737 }
738