xref: /petsc/src/ksp/pc/impls/h2opus/pch2opus.c (revision ae1ee55146a7ad071171b861759b85940c7e5c67) !
1 #include <petsc/private/pcimpl.h>
2 #include <petsc/private/matimpl.h>
3 #include <petscdm.h>
4 #include <h2opusconf.h>
5 
6 /* Use GPU only if H2OPUS is configured for GPU */
7 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
8   #define PETSC_H2OPUS_USE_GPU
9 #endif
10 
11 typedef struct {
12   Mat         A;
13   Mat         M;
14   PetscScalar s0;
15 
16   /* sampler for Newton-Schultz */
17   Mat      S;
18   PetscInt hyperorder;
19   Vec      wns[4];
20   Mat      wnsmat[4];
21 
22   /* convergence testing */
23   Mat T;
24   Vec w;
25 
26   /* Support for PCSetCoordinates */
27   PetscInt   sdim;
28   PetscInt   nlocc;
29   PetscReal *coords;
30 
31   /* Newton-Schultz customization */
32   PetscInt  maxits;
33   PetscReal rtol, atol;
34   PetscBool monitor;
35   PetscBool useapproximatenorms;
36   NormType  normtype;
37 
38   /* Used when pmat != MATH2OPUS */
39   PetscReal eta;
40   PetscInt  leafsize;
41   PetscInt  max_rank;
42   PetscInt  bs;
43   PetscReal mrtol;
44 
45   /* CPU/GPU */
46   PetscBool forcecpu;
47   PetscBool boundtocpu;
48 } PC_H2OPUS;
49 
50 PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *);
51 
PCH2OpusInferCoordinates_Private(PC pc)52 static PetscErrorCode PCH2OpusInferCoordinates_Private(PC pc)
53 {
54   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
55   DM         dm;
56   PetscBool  isdmda;
57 
58   PetscFunctionBegin;
59   if (pch2opus->sdim) PetscFunctionReturn(PETSC_SUCCESS);
60   PetscCall(PCGetDM(pc, &dm));
61   if (!dm) PetscCall(MatGetDM(pc->useAmat ? pc->mat : pc->pmat, &dm));
62   PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isdmda));
63   if (isdmda) {
64     Vec                c;
65     const PetscScalar *coords;
66     PetscInt           n, sdim;
67 
68     PetscCall(DMGetCoordinates(dm, &c));
69     PetscCall(DMGetDimension(dm, &sdim));
70     PetscCall(VecGetLocalSize(c, &n));
71     PetscCall(VecGetArrayRead(c, &coords));
72     PetscCall(PCSetCoordinates(pc, sdim, n / sdim, (PetscScalar *)coords));
73     PetscCall(VecRestoreArrayRead(c, &coords));
74   }
75   PetscFunctionReturn(PETSC_SUCCESS);
76 }
77 
PCReset_H2OPUS(PC pc)78 static PetscErrorCode PCReset_H2OPUS(PC pc)
79 {
80   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
81 
82   PetscFunctionBegin;
83   pch2opus->sdim  = 0;
84   pch2opus->nlocc = 0;
85   PetscCall(PetscFree(pch2opus->coords));
86   PetscFunctionReturn(PETSC_SUCCESS);
87 }
88 
PCSetCoordinates_H2OPUS(PC pc,PetscInt sdim,PetscInt nlocc,PetscReal * coords)89 static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
90 {
91   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
92   PetscBool  reset    = PETSC_TRUE;
93 
94   PetscFunctionBegin;
95   if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
96     PetscCall(PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset));
97     reset = (PetscBool)!reset;
98   }
99   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
100   if (reset) {
101     PetscCall(PCReset_H2OPUS(pc));
102     PetscCall(PetscMalloc1(sdim * nlocc, &pch2opus->coords));
103     PetscCall(PetscArraycpy(pch2opus->coords, coords, sdim * nlocc));
104     pch2opus->sdim  = sdim;
105     pch2opus->nlocc = nlocc;
106   }
107   PetscFunctionReturn(PETSC_SUCCESS);
108 }
109 
PCDestroy_H2OPUS(PC pc)110 static PetscErrorCode PCDestroy_H2OPUS(PC pc)
111 {
112   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
113 
114   PetscFunctionBegin;
115   PetscCall(PCReset_H2OPUS(pc));
116   PetscCall(MatDestroy(&pch2opus->A));
117   PetscCall(MatDestroy(&pch2opus->M));
118   PetscCall(MatDestroy(&pch2opus->T));
119   PetscCall(VecDestroy(&pch2opus->w));
120   PetscCall(MatDestroy(&pch2opus->S));
121   PetscCall(VecDestroy(&pch2opus->wns[0]));
122   PetscCall(VecDestroy(&pch2opus->wns[1]));
123   PetscCall(VecDestroy(&pch2opus->wns[2]));
124   PetscCall(VecDestroy(&pch2opus->wns[3]));
125   PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
126   PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
127   PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
128   PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
129   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL));
130   PetscCall(PetscFree(pc->data));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
PCSetFromOptions_H2OPUS(PC pc,PetscOptionItems PetscOptionsObject)134 static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems PetscOptionsObject)
135 {
136   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
137 
138   PetscFunctionBegin;
139   PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options");
140   PetscCall(PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL));
141   PetscCall(PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL));
142   PetscCall(PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL));
143   PetscCall(PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL));
144   PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL));
145   PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL));
146   PetscCall(PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL));
147   PetscCall(PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL));
148   PetscCall(PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL));
149   PetscCall(PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL));
150   PetscCall(PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL));
151   PetscCall(PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL));
152   PetscOptionsHeadEnd();
153   PetscFunctionReturn(PETSC_SUCCESS);
154 }
155 
156 typedef struct {
157   Mat A;
158   Mat M;
159   Vec w;
160 } AAtCtx;
161 
MatMult_AAt(Mat A,Vec x,Vec y)162 static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
163 {
164   AAtCtx *aat;
165 
166   PetscFunctionBegin;
167   PetscCall(MatShellGetContext(A, &aat));
168   /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */
169   PetscCall(MatMultTranspose(aat->A, x, aat->w));
170   PetscCall(MatMult(aat->A, aat->w, y));
171   PetscFunctionReturn(PETSC_SUCCESS);
172 }
173 
PCH2OpusSetUpInit(PC pc)174 static PetscErrorCode PCH2OpusSetUpInit(PC pc)
175 {
176   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
177   Mat        A        = pc->useAmat ? pc->mat : pc->pmat, AAt;
178   PetscInt   M, m;
179   VecType    vtype;
180   PetscReal  n;
181   AAtCtx     aat;
182 
183   PetscFunctionBegin;
184   aat.A = A;
185   aat.M = pch2opus->M; /* unused so far */
186   PetscCall(MatCreateVecs(A, NULL, &aat.w));
187   PetscCall(MatGetSize(A, &M, NULL));
188   PetscCall(MatGetLocalSize(A, &m, NULL));
189   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt));
190   PetscCall(MatBindToCPU(AAt, pch2opus->boundtocpu));
191   PetscCall(MatShellSetOperation(AAt, MATOP_MULT, (PetscErrorCodeFn *)MatMult_AAt));
192   PetscCall(MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMult_AAt));
193   PetscCall(MatShellSetOperation(AAt, MATOP_NORM, (PetscErrorCodeFn *)MatNorm_H2OPUS));
194   PetscCall(MatGetVecType(A, &vtype));
195   PetscCall(MatShellSetVecType(AAt, vtype));
196   PetscCall(MatNorm(AAt, NORM_1, &n));
197   pch2opus->s0 = 1. / n;
198   PetscCall(VecDestroy(&aat.w));
199   PetscCall(MatDestroy(&AAt));
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
PCApplyKernel_H2OPUS(PC pc,Vec x,Vec y,PetscBool t)203 static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
204 {
205   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
206 
207   PetscFunctionBegin;
208   if (t) PetscCall(MatMultTranspose(pch2opus->M, x, y));
209   else PetscCall(MatMult(pch2opus->M, x, y));
210   PetscFunctionReturn(PETSC_SUCCESS);
211 }
212 
PCApplyMatKernel_H2OPUS(PC pc,Mat X,Mat Y,PetscBool t)213 static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
214 {
215   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
216 
217   PetscFunctionBegin;
218   if (t) PetscCall(MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DETERMINE, &Y));
219   else PetscCall(MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y));
220   PetscFunctionReturn(PETSC_SUCCESS);
221 }
222 
PCApplyMat_H2OPUS(PC pc,Mat X,Mat Y)223 static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
224 {
225   PetscFunctionBegin;
226   PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE));
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
PCApplyTransposeMat_H2OPUS(PC pc,Mat X,Mat Y)230 static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
231 {
232   PetscFunctionBegin;
233   PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE));
234   PetscFunctionReturn(PETSC_SUCCESS);
235 }
236 
PCApply_H2OPUS(PC pc,Vec x,Vec y)237 static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
238 {
239   PetscFunctionBegin;
240   PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE));
241   PetscFunctionReturn(PETSC_SUCCESS);
242 }
243 
PCApplyTranspose_H2OPUS(PC pc,Vec x,Vec y)244 static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
245 {
246   PetscFunctionBegin;
247   PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 /* used to test the norm of (M^-1 A - I) */
MatMultKernel_MAmI(Mat M,Vec x,Vec y,PetscBool t)252 static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
253 {
254   PC         pc;
255   Mat        A;
256   PC_H2OPUS *pch2opus;
257   PetscBool  sideleft = PETSC_TRUE;
258 
259   PetscFunctionBegin;
260   PetscCall(MatShellGetContext(M, &pc));
261   pch2opus = (PC_H2OPUS *)pc->data;
262   if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M, &pch2opus->w, NULL));
263   A = pch2opus->A;
264   PetscCall(VecBindToCPU(pch2opus->w, pch2opus->boundtocpu));
265   if (t) {
266     if (sideleft) {
267       PetscCall(PCApplyTranspose_H2OPUS(pc, x, pch2opus->w));
268       PetscCall(MatMultTranspose(A, pch2opus->w, y));
269     } else {
270       PetscCall(MatMultTranspose(A, x, pch2opus->w));
271       PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->w, y));
272     }
273   } else {
274     if (sideleft) {
275       PetscCall(MatMult(A, x, pch2opus->w));
276       PetscCall(PCApply_H2OPUS(pc, pch2opus->w, y));
277     } else {
278       PetscCall(PCApply_H2OPUS(pc, x, pch2opus->w));
279       PetscCall(MatMult(A, pch2opus->w, y));
280     }
281   }
282   PetscCall(VecAXPY(y, -1.0, x));
283   PetscFunctionReturn(PETSC_SUCCESS);
284 }
285 
MatMult_MAmI(Mat A,Vec x,Vec y)286 static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
287 {
288   PetscFunctionBegin;
289   PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_FALSE));
290   PetscFunctionReturn(PETSC_SUCCESS);
291 }
292 
MatMultTranspose_MAmI(Mat A,Vec x,Vec y)293 static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
294 {
295   PetscFunctionBegin;
296   PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_TRUE));
297   PetscFunctionReturn(PETSC_SUCCESS);
298 }
299 
300 /* HyperPower kernel:
301 Y = R = x
302 for i = 1 . . . l - 1 do
303   R = (I - A * Xk) * R
304   Y = Y + R
305 Y = Xk * Y
306 */
MatMultKernel_Hyper(Mat M,Vec x,Vec y,PetscBool t)307 static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
308 {
309   PC         pc;
310   Mat        A;
311   PC_H2OPUS *pch2opus;
312 
313   PetscFunctionBegin;
314   PetscCall(MatShellGetContext(M, &pc));
315   pch2opus = (PC_H2OPUS *)pc->data;
316   A        = pch2opus->A;
317   PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
318   PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3]));
319   PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
320   PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
321   PetscCall(VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu));
322   PetscCall(VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu));
323   PetscCall(VecCopy(x, pch2opus->wns[0]));
324   PetscCall(VecCopy(x, pch2opus->wns[3]));
325   if (t) {
326     for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
327       PetscCall(MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1]));
328       PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2]));
329       PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
330       PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
331     }
332     PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y));
333   } else {
334     for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) {
335       PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
336       PetscCall(MatMult(A, pch2opus->wns[1], pch2opus->wns[2]));
337       PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2]));
338       PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0]));
339     }
340     PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[3], y));
341   }
342   PetscFunctionReturn(PETSC_SUCCESS);
343 }
344 
MatMult_Hyper(Mat M,Vec x,Vec y)345 static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
346 {
347   PetscFunctionBegin;
348   PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_FALSE));
349   PetscFunctionReturn(PETSC_SUCCESS);
350 }
351 
MatMultTranspose_Hyper(Mat M,Vec x,Vec y)352 static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
353 {
354   PetscFunctionBegin;
355   PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_TRUE));
356   PetscFunctionReturn(PETSC_SUCCESS);
357 }
358 
359 /* Hyper power kernel, MatMat version */
MatMatMultKernel_Hyper(Mat M,Mat X,Mat Y,PetscBool t)360 static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
361 {
362   PC         pc;
363   Mat        A;
364   PC_H2OPUS *pch2opus;
365   PetscInt   i;
366 
367   PetscFunctionBegin;
368   PetscCall(MatShellGetContext(M, &pc));
369   pch2opus = (PC_H2OPUS *)pc->data;
370   A        = pch2opus->A;
371   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
372     PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
373     PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
374   }
375   if (!pch2opus->wnsmat[0]) {
376     PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
377     PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
378   }
379   if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
380     PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
381     PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
382   }
383   if (!pch2opus->wnsmat[2]) {
384     PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2]));
385     PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3]));
386   }
387   PetscCall(MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
388   PetscCall(MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN));
389   if (t) {
390     for (i = 0; i < pch2opus->hyperorder - 1; i++) {
391       PetscCall(MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[1]));
392       PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2]));
393       PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
394       PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
395     }
396     PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
397   } else {
398     for (i = 0; i < pch2opus->hyperorder - 1; i++) {
399       PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
400       PetscCall(MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[2]));
401       PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN));
402       PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
403     }
404     PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y));
405   }
406   PetscFunctionReturn(PETSC_SUCCESS);
407 }
408 
MatMatMultNumeric_Hyper(Mat M,Mat X,Mat Y,PetscCtx ctx)409 static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, PetscCtx ctx)
410 {
411   PetscFunctionBegin;
412   PetscCall(MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE));
413   PetscFunctionReturn(PETSC_SUCCESS);
414 }
415 
416 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
MatMultKernel_NS(Mat M,Vec x,Vec y,PetscBool t)417 static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
418 {
419   PC         pc;
420   Mat        A;
421   PC_H2OPUS *pch2opus;
422 
423   PetscFunctionBegin;
424   PetscCall(MatShellGetContext(M, &pc));
425   pch2opus = (PC_H2OPUS *)pc->data;
426   A        = pch2opus->A;
427   PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
428   PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu));
429   PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu));
430   if (t) {
431     PetscCall(PCApplyTranspose_H2OPUS(pc, x, y));
432     PetscCall(MatMultTranspose(A, y, pch2opus->wns[1]));
433     PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0]));
434     PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[0]));
435   } else {
436     PetscCall(PCApply_H2OPUS(pc, x, y));
437     PetscCall(MatMult(A, y, pch2opus->wns[0]));
438     PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1]));
439     PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[1]));
440   }
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 
MatMult_NS(Mat M,Vec x,Vec y)444 static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
445 {
446   PetscFunctionBegin;
447   PetscCall(MatMultKernel_NS(M, x, y, PETSC_FALSE));
448   PetscFunctionReturn(PETSC_SUCCESS);
449 }
450 
MatMultTranspose_NS(Mat M,Vec x,Vec y)451 static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
452 {
453   PetscFunctionBegin;
454   PetscCall(MatMultKernel_NS(M, x, y, PETSC_TRUE));
455   PetscFunctionReturn(PETSC_SUCCESS);
456 }
457 
458 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
MatMatMultKernel_NS(Mat M,Mat X,Mat Y,PetscBool t)459 static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
460 {
461   PC         pc;
462   Mat        A;
463   PC_H2OPUS *pch2opus;
464 
465   PetscFunctionBegin;
466   PetscCall(MatShellGetContext(M, &pc));
467   pch2opus = (PC_H2OPUS *)pc->data;
468   A        = pch2opus->A;
469   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
470     PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
471     PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
472   }
473   if (!pch2opus->wnsmat[0]) {
474     PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0]));
475     PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1]));
476   }
477   if (t) {
478     PetscCall(PCApplyTransposeMat_H2OPUS(pc, X, Y));
479     PetscCall(MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[1]));
480     PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0]));
481     PetscCall(MatScale(Y, 2.));
482     PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN));
483   } else {
484     PetscCall(PCApplyMat_H2OPUS(pc, X, Y));
485     PetscCall(MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[0]));
486     PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1]));
487     PetscCall(MatScale(Y, 2.));
488     PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN));
489   }
490   PetscFunctionReturn(PETSC_SUCCESS);
491 }
492 
MatMatMultNumeric_NS(Mat M,Mat X,Mat Y,PetscCtx ctx)493 static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, PetscCtx ctx)
494 {
495   PetscFunctionBegin;
496   PetscCall(MatMatMultKernel_NS(M, X, Y, PETSC_FALSE));
497   PetscFunctionReturn(PETSC_SUCCESS);
498 }
499 
PCH2OpusSetUpSampler_Private(PC pc)500 static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
501 {
502   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
503   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;
504 
505   PetscFunctionBegin;
506   if (!pch2opus->S) {
507     PetscInt M, N, m, n;
508 
509     PetscCall(MatGetSize(A, &M, &N));
510     PetscCall(MatGetLocalSize(A, &m, &n));
511     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S));
512     PetscCall(MatSetBlockSizesFromMats(pch2opus->S, A, A));
513 #if defined(PETSC_H2OPUS_USE_GPU)
514     PetscCall(MatShellSetVecType(pch2opus->S, VECCUDA));
515 #endif
516   }
517   if (pch2opus->hyperorder >= 2) {
518     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Hyper));
519     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Hyper));
520     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE));
521     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA));
522   } else {
523     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_NS));
524     PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_NS));
525     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE));
526     PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA));
527   }
528   PetscCall(MatPropagateSymmetryOptions(A, pch2opus->S));
529   PetscCall(MatBindToCPU(pch2opus->S, pch2opus->boundtocpu));
530   /* XXX */
531   PetscCall(MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE));
532   PetscFunctionReturn(PETSC_SUCCESS);
533 }
534 
PCSetUp_H2OPUS(PC pc)535 static PetscErrorCode PCSetUp_H2OPUS(PC pc)
536 {
537   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
538   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;
539   NormType   norm     = pch2opus->normtype;
540   PetscReal  initerr  = 0.0, err;
541   PetscBool  ish2opus;
542 
543   PetscFunctionBegin;
544   if (!pch2opus->T) {
545     PetscInt    M, N, m, n;
546     const char *prefix;
547 
548     PetscCall(PCGetOptionsPrefix(pc, &prefix));
549     PetscCall(MatGetSize(A, &M, &N));
550     PetscCall(MatGetLocalSize(A, &m, &n));
551     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T));
552     PetscCall(MatSetBlockSizesFromMats(pch2opus->T, A, A));
553     PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT, (PetscErrorCodeFn *)MatMult_MAmI));
554     PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_MAmI));
555     PetscCall(MatShellSetOperation(pch2opus->T, MATOP_NORM, (PetscErrorCodeFn *)MatNorm_H2OPUS));
556 #if defined(PETSC_H2OPUS_USE_GPU)
557     PetscCall(MatShellSetVecType(pch2opus->T, VECCUDA));
558 #endif
559     PetscCall(MatSetOptionsPrefix(pch2opus->T, prefix));
560     PetscCall(MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_"));
561   }
562   PetscCall(MatDestroy(&pch2opus->A));
563   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus));
564   if (ish2opus) {
565     PetscCall(PetscObjectReference((PetscObject)A));
566     pch2opus->A = A;
567   } else {
568     const char *prefix;
569 
570     /* See if we can infer coordinates from the DM */
571     if (!pch2opus->sdim) PetscCall(PCH2OpusInferCoordinates_Private(pc));
572     PetscCall(MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A));
573     /* XXX */
574     PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
575     PetscCall(PCGetOptionsPrefix(pc, &prefix));
576     PetscCall(MatSetOptionsPrefix(pch2opus->A, prefix));
577     PetscCall(MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_"));
578     PetscCall(MatSetFromOptions(pch2opus->A));
579     PetscCall(MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY));
580     PetscCall(MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY));
581     /* XXX */
582     PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE));
583 
584     /* always perform construction on the GPU unless forcecpu is true */
585     PetscCall(MatBindToCPU(pch2opus->A, pch2opus->forcecpu));
586   }
587 #if defined(PETSC_H2OPUS_USE_GPU)
588   pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu;
589 #endif
590   PetscCall(MatBindToCPU(pch2opus->T, pch2opus->boundtocpu));
591   if (pch2opus->M) { /* see if we can reuse M as initial guess */
592     PetscReal reuse;
593 
594     PetscCall(MatBindToCPU(pch2opus->M, pch2opus->boundtocpu));
595     PetscCall(MatNorm(pch2opus->T, norm, &reuse));
596     if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M));
597   }
598   if (!pch2opus->M) {
599     const char *prefix;
600     PetscCall(MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M));
601     PetscCall(PCGetOptionsPrefix(pc, &prefix));
602     PetscCall(MatSetOptionsPrefix(pch2opus->M, prefix));
603     PetscCall(MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_"));
604     PetscCall(MatSetFromOptions(pch2opus->M));
605     PetscCall(PCH2OpusSetUpInit(pc));
606     PetscCall(MatScale(pch2opus->M, pch2opus->s0));
607   }
608   /* A and M have the same h2 matrix nonzero structure, save on reordering routines */
609   PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE));
610   PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE));
611   if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &initerr));
612   if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
613   err = initerr;
614   if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr)));
615   if (initerr > pch2opus->atol && !pc->failedreason) {
616     PetscInt i;
617 
618     PetscCall(PCH2OpusSetUpSampler_Private(pc));
619     for (i = 0; i < pch2opus->maxits; i++) {
620       Mat         M;
621       const char *prefix;
622 
623       PetscCall(MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M));
624       PetscCall(MatGetOptionsPrefix(pch2opus->M, &prefix));
625       PetscCall(MatSetOptionsPrefix(M, prefix));
626       PetscCall(MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE));
627       PetscCall(MatSetFromOptions(M));
628       PetscCall(MatH2OpusSetNativeMult(M, PETSC_TRUE));
629       PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
630       PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
631       PetscCall(MatDestroy(&pch2opus->M));
632       pch2opus->M = M;
633       if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &err));
634       if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr)));
635       if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
636       if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break;
637     }
638   }
639   /* cleanup setup workspace */
640   PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE));
641   PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE));
642   PetscCall(VecDestroy(&pch2opus->wns[0]));
643   PetscCall(VecDestroy(&pch2opus->wns[1]));
644   PetscCall(VecDestroy(&pch2opus->wns[2]));
645   PetscCall(VecDestroy(&pch2opus->wns[3]));
646   PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
647   PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
648   PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
649   PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
650   PetscFunctionReturn(PETSC_SUCCESS);
651 }
652 
PCView_H2OPUS(PC pc,PetscViewer viewer)653 static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
654 {
655   PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data;
656   PetscBool  isascii;
657 
658   PetscFunctionBegin;
659   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
660   if (isascii) {
661     if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
662       PetscCall(PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n"));
663       PetscCall(PetscViewerASCIIPushTab(viewer));
664       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
665       PetscCall(MatView(pch2opus->A, viewer));
666       PetscCall(PetscViewerPopFormat(viewer));
667       PetscCall(PetscViewerASCIIPopTab(viewer));
668     }
669     if (pch2opus->M) {
670       PetscCall(PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n"));
671       PetscCall(PetscViewerASCIIPushTab(viewer));
672       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL));
673       PetscCall(MatView(pch2opus->M, viewer));
674       PetscCall(PetscViewerPopFormat(viewer));
675       PetscCall(PetscViewerASCIIPopTab(viewer));
676     }
677     PetscCall(PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0));
678   }
679   PetscFunctionReturn(PETSC_SUCCESS);
680 }
681 
682 /*MC
683      PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package.
684 
685    Options Database Keys:
686 +   -pc_type h2opus - pc type to "h2opus" during a call to `PCSetFromOptions()`
687 .   -pc_h2opus_maxits - maximum number of iterations for Newton-Schultz
688 .   -pc_h2opus_monitor - monitor Newton-Schultz convergence
689 .   -pc_h2opus_atol - absolute tolerance
690 .   -pc_h2opus_rtol - relative tolerance
691 .   -pc_h2opus_norm_type - normtype
692 .   -pc_h2opus_hyperorder - Hyper power order of sampling
693 .   -pc_h2opus_leafsize - leaf size when constructed from kernel
694 .   -pc_h2opus_eta - admissibility condition tolerance
695 .   -pc_h2opus_maxrank - maximum rank when constructed from matvecs
696 .   -pc_h2opus_samples - number of samples to be taken concurrently when constructing from matvecs
697 .   -pc_h2opus_mrtol - relative tolerance for construction from sampling
698 -   -pc_h2opus_forcecpu - force construction of preconditioner on CPU
699 
700    Level: intermediate
701 
702 .seealso: [](ch_ksp), `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()`
703 M*/
704 
PCCreate_H2OPUS(PC pc)705 PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
706 {
707   PC_H2OPUS *pch2opus;
708 
709   PetscFunctionBegin;
710   PetscCall(PetscNew(&pch2opus));
711   pc->data = (void *)pch2opus;
712 
713   pch2opus->atol       = 1.e-2;
714   pch2opus->rtol       = 1.e-6;
715   pch2opus->maxits     = 50;
716   pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */
717   pch2opus->normtype   = NORM_2;
718 
719   /* these are needed when we are sampling the pmat */
720   pch2opus->eta           = PETSC_DECIDE;
721   pch2opus->leafsize      = PETSC_DECIDE;
722   pch2opus->max_rank      = PETSC_DECIDE;
723   pch2opus->bs            = PETSC_DECIDE;
724   pch2opus->mrtol         = PETSC_DECIDE;
725   pch2opus->boundtocpu    = PetscDefined(H2OPUS_USE_GPU) ? PETSC_FALSE : PETSC_TRUE;
726   pc->ops->destroy        = PCDestroy_H2OPUS;
727   pc->ops->setup          = PCSetUp_H2OPUS;
728   pc->ops->apply          = PCApply_H2OPUS;
729   pc->ops->matapply       = PCApplyMat_H2OPUS;
730   pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
731   pc->ops->reset          = PCReset_H2OPUS;
732   pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
733   pc->ops->view           = PCView_H2OPUS;
734 
735   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS));
736   PetscFunctionReturn(PETSC_SUCCESS);
737 }
738