xref: /petsc/src/ksp/pc/impls/h2opus/pch2opus.c (revision daa037dfd3c3bec8dc8659548d2b20b07c1dc6de)
1 #include <petsc/private/pcimpl.h>
2 #include <petsc/private/matimpl.h>
3 #include <h2opusconf.h>
4 
5 /* Use GPU only if H2OPUS is configured for GPU */
6 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU)
7 #define PETSC_H2OPUS_USE_GPU
8 #endif
9 
10 typedef struct {
11   Mat         A;
12   Mat         M;
13   PetscScalar s0;
14 
15   /* sampler for Newton-Schultz */
16   Mat      S;
17   PetscInt hyperorder;
18   Vec      wns[4];
19   Mat      wnsmat[4];
20 
21   /* convergence testing */
22   Mat       T;
23   Vec       w;
24   PetscBool testMA;
25 
26   /* Support for PCSetCoordinates */
27   PetscInt  sdim;
28   PetscInt  nlocc;
29   PetscReal *coords;
30 
31   /* Newton-Schultz customization */
32   PetscInt  maxits;
33   PetscReal rtol,atol;
34   PetscBool monitor;
35   PetscBool useapproximatenorms;
36   NormType  normtype;
37 
38   /* Used when pmat != MATH2OPUS */
39   PetscReal eta;
40   PetscInt  leafsize;
41   PetscInt  max_rank;
42   PetscInt  bs;
43   PetscReal mrtol;
44 
45   PetscBool boundtocpu;
46 } PC_H2OPUS;
47 
48 PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat,NormType,PetscReal*);
49 
50 static PetscErrorCode PCReset_H2OPUS(PC pc)
51 {
52   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
53 
54   PetscFunctionBegin;
55   pch2opus->sdim  = 0;
56   pch2opus->nlocc = 0;
57   PetscCall(PetscFree(pch2opus->coords));
58   PetscCall(MatDestroy(&pch2opus->A));
59   PetscCall(MatDestroy(&pch2opus->M));
60   PetscCall(MatDestroy(&pch2opus->T));
61   PetscCall(VecDestroy(&pch2opus->w));
62   PetscCall(MatDestroy(&pch2opus->S));
63   PetscCall(VecDestroy(&pch2opus->wns[0]));
64   PetscCall(VecDestroy(&pch2opus->wns[1]));
65   PetscCall(VecDestroy(&pch2opus->wns[2]));
66   PetscCall(VecDestroy(&pch2opus->wns[3]));
67   PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
68   PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
69   PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
70   PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
71   PetscFunctionReturn(0);
72 }
73 
74 static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords)
75 {
76   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
77   PetscBool  reset    = PETSC_TRUE;
78 
79   PetscFunctionBegin;
80   if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) {
81     PetscCall(PetscArraycmp(pch2opus->coords,coords,sdim*nlocc,&reset));
82     reset = (PetscBool)!reset;
83   }
84   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&reset,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc)));
85   if (reset) {
86     PetscCall(PCReset_H2OPUS(pc));
87     PetscCall(PetscMalloc1(sdim*nlocc,&pch2opus->coords));
88     PetscCall(PetscArraycpy(pch2opus->coords,coords,sdim*nlocc));
89     pch2opus->sdim  = sdim;
90     pch2opus->nlocc = nlocc;
91   }
92   PetscFunctionReturn(0);
93 }
94 
95 static PetscErrorCode PCDestroy_H2OPUS(PC pc)
96 {
97   PetscFunctionBegin;
98   PetscCall(PCReset_H2OPUS(pc));
99   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL));
100   PetscCall(PetscFree(pc->data));
101   PetscFunctionReturn(0);
102 }
103 
104 static PetscErrorCode PCSetFromOptions_H2OPUS(PetscOptionItems *PetscOptionsObject,PC pc)
105 {
106   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
107 
108   PetscFunctionBegin;
109   PetscCall(PetscOptionsHead(PetscOptionsObject,"H2OPUS options"));
110   PetscCall(PetscOptionsInt("-pc_h2opus_maxits","Maximum number of iterations for Newton-Schultz",NULL,pch2opus->maxits,&pch2opus->maxits,NULL));
111   PetscCall(PetscOptionsBool("-pc_h2opus_monitor","Monitor Newton-Schultz convergence",NULL,pch2opus->monitor,&pch2opus->monitor,NULL));
112   PetscCall(PetscOptionsReal("-pc_h2opus_atol","Absolute tolerance",NULL,pch2opus->atol,&pch2opus->atol,NULL));
113   PetscCall(PetscOptionsReal("-pc_h2opus_rtol","Relative tolerance",NULL,pch2opus->rtol,&pch2opus->rtol,NULL));
114   PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type","Norm type for convergence monitoring",NULL,NormTypes,(PetscEnum)pch2opus->normtype,(PetscEnum*)&pch2opus->normtype,NULL));
115   PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder","Hyper power order of sampling",NULL,pch2opus->hyperorder,&pch2opus->hyperorder,NULL));
116   PetscCall(PetscOptionsInt("-pc_h2opus_leafsize","Leaf size when constructed from kernel",NULL,pch2opus->leafsize,&pch2opus->leafsize,NULL));
117   PetscCall(PetscOptionsReal("-pc_h2opus_eta","Admissibility condition tolerance",NULL,pch2opus->eta,&pch2opus->eta,NULL));
118   PetscCall(PetscOptionsInt("-pc_h2opus_maxrank","Maximum rank when constructed from matvecs",NULL,pch2opus->max_rank,&pch2opus->max_rank,NULL));
119   PetscCall(PetscOptionsInt("-pc_h2opus_samples","Number of samples to be taken concurrently when constructing from matvecs",NULL,pch2opus->bs,&pch2opus->bs,NULL));
120   PetscCall(PetscOptionsReal("-pc_h2opus_mrtol","Relative tolerance for construction from sampling",NULL,pch2opus->mrtol,&pch2opus->mrtol,NULL));
121   PetscCall(PetscOptionsTail());
122   PetscFunctionReturn(0);
123 }
124 
125 typedef struct {
126   Mat A;
127   Mat M;
128   Vec w;
129 } AAtCtx;
130 
131 static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y)
132 {
133   AAtCtx *aat;
134 
135   PetscFunctionBegin;
136   PetscCall(MatShellGetContext(A,(void*)&aat));
137   /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */
138   PetscCall(MatMultTranspose(aat->A,x,aat->w));
139   PetscCall(MatMult(aat->A,aat->w,y));
140   PetscFunctionReturn(0);
141 }
142 
143 static PetscErrorCode PCH2OpusSetUpInit(PC pc)
144 {
145   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
146   Mat        A        = pc->useAmat ? pc->mat : pc->pmat, AAt;
147   PetscInt   M,m;
148   VecType    vtype;
149   PetscReal  n;
150   AAtCtx     aat;
151 
152   PetscFunctionBegin;
153   aat.A = A;
154   aat.M = pch2opus->M; /* unused so far */
155   PetscCall(MatCreateVecs(A,NULL,&aat.w));
156   PetscCall(MatGetSize(A,&M,NULL));
157   PetscCall(MatGetLocalSize(A,&m,NULL));
158   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A),m,m,M,M,&aat,&AAt));
159   PetscCall(MatBindToCPU(AAt,pch2opus->boundtocpu));
160   PetscCall(MatShellSetOperation(AAt,MATOP_MULT,(void (*)(void))MatMult_AAt));
161   PetscCall(MatShellSetOperation(AAt,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMult_AAt));
162   PetscCall(MatShellSetOperation(AAt,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS));
163   PetscCall(MatGetVecType(A,&vtype));
164   PetscCall(MatShellSetVecType(AAt,vtype));
165   PetscCall(MatNorm(AAt,NORM_1,&n));
166   pch2opus->s0 = 1./n;
167   PetscCall(VecDestroy(&aat.w));
168   PetscCall(MatDestroy(&AAt));
169   PetscFunctionReturn(0);
170 }
171 
172 static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t)
173 {
174   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
175 
176   PetscFunctionBegin;
177   if (t) PetscCall(MatMultTranspose(pch2opus->M,x,y));
178   else  PetscCall(MatMult(pch2opus->M,x,y));
179   PetscFunctionReturn(0);
180 }
181 
182 static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t)
183 {
184   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
185 
186   PetscFunctionBegin;
187   if (t) PetscCall(MatTransposeMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y));
188   else   PetscCall(MatMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y));
189   PetscFunctionReturn(0);
190 }
191 
192 static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y)
193 {
194   PetscFunctionBegin;
195   PetscCall(PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_FALSE));
196   PetscFunctionReturn(0);
197 }
198 
199 static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y)
200 {
201   PetscFunctionBegin;
202   PetscCall(PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_TRUE));
203   PetscFunctionReturn(0);
204 }
205 
206 static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y)
207 {
208   PetscFunctionBegin;
209   PetscCall(PCApplyKernel_H2OPUS(pc,x,y,PETSC_FALSE));
210   PetscFunctionReturn(0);
211 }
212 
213 static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y)
214 {
215   PetscFunctionBegin;
216   PetscCall(PCApplyKernel_H2OPUS(pc,x,y,PETSC_TRUE));
217   PetscFunctionReturn(0);
218 }
219 
220 /* used to test the norm of (M^-1 A - I) */
221 static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t)
222 {
223   PC         pc;
224   Mat        A;
225   PC_H2OPUS *pch2opus;
226   PetscBool  sideleft = PETSC_TRUE;
227 
228   PetscFunctionBegin;
229   PetscCall(MatShellGetContext(M,(void*)&pc));
230   pch2opus = (PC_H2OPUS*)pc->data;
231   if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M,&pch2opus->w,NULL));
232   A = pch2opus->A;
233   PetscCall(VecBindToCPU(pch2opus->w,pch2opus->boundtocpu));
234   if (t) {
235     if (sideleft) {
236       PetscCall(PCApplyTranspose_H2OPUS(pc,x,pch2opus->w));
237       PetscCall(MatMultTranspose(A,pch2opus->w,y));
238     } else {
239       PetscCall(MatMultTranspose(A,x,pch2opus->w));
240       PetscCall(PCApplyTranspose_H2OPUS(pc,pch2opus->w,y));
241     }
242   } else {
243     if (sideleft) {
244       PetscCall(MatMult(A,x,pch2opus->w));
245       PetscCall(PCApply_H2OPUS(pc,pch2opus->w,y));
246     } else {
247       PetscCall(PCApply_H2OPUS(pc,x,pch2opus->w));
248       PetscCall(MatMult(A,pch2opus->w,y));
249     }
250   }
251   if (!pch2opus->testMA) PetscCall(VecAXPY(y,-1.0,x));
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y)
256 {
257   PetscFunctionBegin;
258   PetscCall(MatMultKernel_MAmI(A,x,y,PETSC_FALSE));
259   PetscFunctionReturn(0);
260 }
261 
262 static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y)
263 {
264   PetscFunctionBegin;
265   PetscCall(MatMultKernel_MAmI(A,x,y,PETSC_TRUE));
266   PetscFunctionReturn(0);
267 }
268 
269 /* HyperPower kernel:
270 Y = R = x
271 for i = 1 . . . l - 1 do
272   R = (I - A * Xk) * R
273   Y = Y + R
274 Y = Xk * Y
275 */
276 static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t)
277 {
278   PC         pc;
279   Mat        A;
280   PC_H2OPUS *pch2opus;
281 
282   PetscFunctionBegin;
283   PetscCall(MatShellGetContext(M,(void*)&pc));
284   pch2opus = (PC_H2OPUS*)pc->data;
285   A = pch2opus->A;
286   PetscCall(MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
287   PetscCall(MatCreateVecs(pch2opus->M,pch2opus->wns[2] ? NULL : &pch2opus->wns[2],pch2opus->wns[3] ? NULL : &pch2opus->wns[3]));
288   PetscCall(VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu));
289   PetscCall(VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu));
290   PetscCall(VecBindToCPU(pch2opus->wns[2],pch2opus->boundtocpu));
291   PetscCall(VecBindToCPU(pch2opus->wns[3],pch2opus->boundtocpu));
292   PetscCall(VecCopy(x,pch2opus->wns[0]));
293   PetscCall(VecCopy(x,pch2opus->wns[3]));
294   if (t) {
295     for (PetscInt i=0;i<pch2opus->hyperorder-1;i++) {
296       PetscCall(MatMultTranspose(A,pch2opus->wns[0],pch2opus->wns[1]));
297       PetscCall(PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[2]));
298       PetscCall(VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]));
299       PetscCall(VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]));
300     }
301     PetscCall(PCApplyTranspose_H2OPUS(pc,pch2opus->wns[3],y));
302   } else {
303     for (PetscInt i=0;i<pch2opus->hyperorder-1;i++) {
304       PetscCall(PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]));
305       PetscCall(MatMult(A,pch2opus->wns[1],pch2opus->wns[2]));
306       PetscCall(VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]));
307       PetscCall(VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]));
308     }
309     PetscCall(PCApply_H2OPUS(pc,pch2opus->wns[3],y));
310   }
311   PetscFunctionReturn(0);
312 }
313 
314 static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y)
315 {
316   PetscFunctionBegin;
317   PetscCall(MatMultKernel_Hyper(M,x,y,PETSC_FALSE));
318   PetscFunctionReturn(0);
319 }
320 
321 static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y)
322 {
323   PetscFunctionBegin;
324   PetscCall(MatMultKernel_Hyper(M,x,y,PETSC_TRUE));
325   PetscFunctionReturn(0);
326 }
327 
328 /* Hyper power kernel, MatMat version */
329 static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t)
330 {
331   PC         pc;
332   Mat        A;
333   PC_H2OPUS *pch2opus;
334   PetscInt   i;
335 
336   PetscFunctionBegin;
337   PetscCall(MatShellGetContext(M,(void*)&pc));
338   pch2opus = (PC_H2OPUS*)pc->data;
339   A = pch2opus->A;
340   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
341     PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
342     PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
343   }
344   if (!pch2opus->wnsmat[0]) {
345     PetscCall(MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]));
346     PetscCall(MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]));
347   }
348   if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) {
349     PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
350     PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
351   }
352   if (!pch2opus->wnsmat[2]) {
353     PetscCall(MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[2]));
354     PetscCall(MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[3]));
355   }
356   PetscCall(MatCopy(X,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN));
357   PetscCall(MatCopy(X,pch2opus->wnsmat[3],SAME_NONZERO_PATTERN));
358   if (t) {
359     for (i=0;i<pch2opus->hyperorder-1;i++) {
360       PetscCall(MatTransposeMatMult(A,pch2opus->wnsmat[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]));
361       PetscCall(PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[2]));
362       PetscCall(MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN));
363       PetscCall(MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN));
364     }
365     PetscCall(PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[3],Y));
366   } else {
367     for (i=0;i<pch2opus->hyperorder-1;i++) {
368       PetscCall(PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]));
369       PetscCall(MatMatMult(A,pch2opus->wnsmat[1],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[2]));
370       PetscCall(MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN));
371       PetscCall(MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN));
372     }
373     PetscCall(PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[3],Y));
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y,void *ctx)
379 {
380   PetscFunctionBegin;
381   PetscCall(MatMatMultKernel_Hyper(M,X,Y,PETSC_FALSE));
382   PetscFunctionReturn(0);
383 }
384 
385 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */
386 static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t)
387 {
388   PC         pc;
389   Mat        A;
390   PC_H2OPUS *pch2opus;
391 
392   PetscFunctionBegin;
393   PetscCall(MatShellGetContext(M,(void*)&pc));
394   pch2opus = (PC_H2OPUS*)pc->data;
395   A = pch2opus->A;
396   PetscCall(MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]));
397   PetscCall(VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu));
398   PetscCall(VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu));
399   if (t) {
400     PetscCall(PCApplyTranspose_H2OPUS(pc,x,y));
401     PetscCall(MatMultTranspose(A,y,pch2opus->wns[1]));
402     PetscCall(PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[0]));
403     PetscCall(VecAXPBY(y,-1.,2.,pch2opus->wns[0]));
404   } else {
405     PetscCall(PCApply_H2OPUS(pc,x,y));
406     PetscCall(MatMult(A,y,pch2opus->wns[0]));
407     PetscCall(PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]));
408     PetscCall(VecAXPBY(y,-1.,2.,pch2opus->wns[1]));
409   }
410   PetscFunctionReturn(0);
411 }
412 
413 static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y)
414 {
415   PetscFunctionBegin;
416   PetscCall(MatMultKernel_NS(M,x,y,PETSC_FALSE));
417   PetscFunctionReturn(0);
418 }
419 
420 static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y)
421 {
422   PetscFunctionBegin;
423   PetscCall(MatMultKernel_NS(M,x,y,PETSC_TRUE));
424   PetscFunctionReturn(0);
425 }
426 
427 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */
428 static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t)
429 {
430   PC         pc;
431   Mat        A;
432   PC_H2OPUS *pch2opus;
433 
434   PetscFunctionBegin;
435   PetscCall(MatShellGetContext(M,(void*)&pc));
436   pch2opus = (PC_H2OPUS*)pc->data;
437   A = pch2opus->A;
438   if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) {
439     PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
440     PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
441   }
442   if (!pch2opus->wnsmat[0]) {
443     PetscCall(MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]));
444     PetscCall(MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]));
445   }
446   if (t) {
447     PetscCall(PCApplyTransposeMat_H2OPUS(pc,X,Y));
448     PetscCall(MatTransposeMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]));
449     PetscCall(PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[0]));
450     PetscCall(MatScale(Y,2.));
451     PetscCall(MatAXPY(Y,-1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN));
452   } else {
453     PetscCall(PCApplyMat_H2OPUS(pc,X,Y));
454     PetscCall(MatMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[0]));
455     PetscCall(PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]));
456     PetscCall(MatScale(Y,2.));
457     PetscCall(MatAXPY(Y,-1.,pch2opus->wnsmat[1],SAME_NONZERO_PATTERN));
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx)
463 {
464   PetscFunctionBegin;
465   PetscCall(MatMatMultKernel_NS(M,X,Y,PETSC_FALSE));
466   PetscFunctionReturn(0);
467 }
468 
469 static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc)
470 {
471   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
472   Mat        A        = pc->useAmat ? pc->mat : pc->pmat;
473 
474   PetscFunctionBegin;
475   if (!pch2opus->S) {
476     PetscInt M,N,m,n;
477 
478     PetscCall(MatGetSize(A,&M,&N));
479     PetscCall(MatGetLocalSize(A,&m,&n));
480     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A),m,n,M,N,pc,&pch2opus->S));
481     PetscCall(MatSetBlockSizesFromMats(pch2opus->S,A,A));
482 #if defined(PETSC_H2OPUS_USE_GPU)
483     PetscCall(MatShellSetVecType(pch2opus->S,VECCUDA));
484 #endif
485   }
486   if (pch2opus->hyperorder >= 2) {
487     PetscCall(MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_Hyper));
488     PetscCall(MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_Hyper));
489     PetscCall(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSE,MATDENSE));
490     PetscCall(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSECUDA,MATDENSECUDA));
491   } else {
492     PetscCall(MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_NS));
493     PetscCall(MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_NS));
494     PetscCall(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSE,MATDENSE));
495     PetscCall(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSECUDA,MATDENSECUDA));
496   }
497   PetscCall(MatPropagateSymmetryOptions(A,pch2opus->S));
498   PetscCall(MatBindToCPU(pch2opus->S,pch2opus->boundtocpu));
499   /* XXX */
500   PetscCall(MatSetOption(pch2opus->S,MAT_SYMMETRIC,PETSC_TRUE));
501   PetscFunctionReturn(0);
502 }
503 
504 static PetscErrorCode PCSetUp_H2OPUS(PC pc)
505 {
506   PC_H2OPUS *pch2opus  = (PC_H2OPUS*)pc->data;
507   Mat        A         = pc->useAmat ? pc->mat : pc->pmat;
508   NormType   norm      = pch2opus->normtype;
509   PetscReal  initerr   = 0.0,err;
510   PetscReal  initerrMA = 0.0,errMA;
511   PetscBool  ish2opus;
512 
513   PetscFunctionBegin;
514   if (!pch2opus->T) {
515     PetscInt    M,N,m,n;
516     const char *prefix;
517 
518     PetscCall(PCGetOptionsPrefix(pc,&prefix));
519     PetscCall(MatGetSize(A,&M,&N));
520     PetscCall(MatGetLocalSize(A,&m,&n));
521     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat),m,n,M,N,pc,&pch2opus->T));
522     PetscCall(MatSetBlockSizesFromMats(pch2opus->T,A,A));
523     PetscCall(MatShellSetOperation(pch2opus->T,MATOP_MULT,(void (*)(void))MatMult_MAmI));
524     PetscCall(MatShellSetOperation(pch2opus->T,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_MAmI));
525     PetscCall(MatShellSetOperation(pch2opus->T,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS));
526 #if defined(PETSC_H2OPUS_USE_GPU)
527     PetscCall(MatShellSetVecType(pch2opus->T,VECCUDA));
528 #endif
529     PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)pch2opus->T));
530     PetscCall(MatSetOptionsPrefix(pch2opus->T,prefix));
531     PetscCall(MatAppendOptionsPrefix(pch2opus->T,"pc_h2opus_est_"));
532   }
533   PetscCall(MatDestroy(&pch2opus->A));
534   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus));
535   if (ish2opus) {
536     PetscCall(PetscObjectReference((PetscObject)A));
537     pch2opus->A = A;
538   } else {
539     const char *prefix;
540     PetscCall(MatCreateH2OpusFromMat(A,pch2opus->sdim,pch2opus->coords,PETSC_FALSE,pch2opus->eta,pch2opus->leafsize,pch2opus->max_rank,pch2opus->bs,pch2opus->mrtol,&pch2opus->A));
541     /* XXX */
542     PetscCall(MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE));
543     PetscCall(PCGetOptionsPrefix(pc,&prefix));
544     PetscCall(MatSetOptionsPrefix(pch2opus->A,prefix));
545     PetscCall(MatAppendOptionsPrefix(pch2opus->A,"pc_h2opus_init_"));
546     PetscCall(MatSetFromOptions(pch2opus->A));
547     PetscCall(MatAssemblyBegin(pch2opus->A,MAT_FINAL_ASSEMBLY));
548     PetscCall(MatAssemblyEnd(pch2opus->A,MAT_FINAL_ASSEMBLY));
549     /* XXX */
550     PetscCall(MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE));
551   }
552 #if defined(PETSC_H2OPUS_USE_GPU)
553   pch2opus->boundtocpu = pch2opus->A->boundtocpu;
554 #endif
555   PetscCall(MatBindToCPU(pch2opus->T,pch2opus->boundtocpu));
556   if (pch2opus->M) { /* see if we can reuse M as initial guess */
557     PetscReal reuse;
558 
559     PetscCall(MatBindToCPU(pch2opus->M,pch2opus->boundtocpu));
560     PetscCall(MatNorm(pch2opus->T,norm,&reuse));
561     if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M));
562   }
563   if (!pch2opus->M) {
564     const char *prefix;
565     PetscCall(MatDuplicate(pch2opus->A,MAT_COPY_VALUES,&pch2opus->M));
566     PetscCall(PCGetOptionsPrefix(pc,&prefix));
567     PetscCall(MatSetOptionsPrefix(pch2opus->M,prefix));
568     PetscCall(MatAppendOptionsPrefix(pch2opus->M,"pc_h2opus_inv_"));
569     PetscCall(MatSetFromOptions(pch2opus->M));
570     PetscCall(PCH2OpusSetUpInit(pc));
571     PetscCall(MatScale(pch2opus->M,pch2opus->s0));
572   }
573   /* A and M have the same h2 matrix structure, save on reordering routines */
574   PetscCall(MatH2OpusSetNativeMult(pch2opus->A,PETSC_TRUE));
575   PetscCall(MatH2OpusSetNativeMult(pch2opus->M,PETSC_TRUE));
576   if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
577     PetscCall(MatNorm(pch2opus->T,norm,&initerr));
578     pch2opus->testMA = PETSC_TRUE;
579     PetscCall(MatNorm(pch2opus->T,norm,&initerrMA));
580     pch2opus->testMA = PETSC_FALSE;
581   }
582   if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR;
583   err   = initerr;
584   errMA = initerrMA;
585   if (pch2opus->monitor) {
586     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)err,(double)(err/initerr)));
587     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A||     NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA)));
588   }
589   if (initerr > pch2opus->atol && !pc->failedreason) {
590     PetscInt i;
591 
592     PetscCall(PCH2OpusSetUpSampler_Private(pc));
593     for (i = 0; i < pch2opus->maxits; i++) {
594       Mat         M;
595       const char* prefix;
596 
597       PetscCall(MatDuplicate(pch2opus->M,MAT_SHARE_NONZERO_PATTERN,&M));
598       PetscCall(MatGetOptionsPrefix(pch2opus->M,&prefix));
599       PetscCall(MatSetOptionsPrefix(M,prefix));
600       PetscCall(MatH2OpusSetSamplingMat(M,pch2opus->S,PETSC_DECIDE,PETSC_DECIDE));
601       PetscCall(MatSetFromOptions(M));
602       PetscCall(MatH2OpusSetNativeMult(M,PETSC_TRUE));
603       PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY));
604       PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY));
605 
606       PetscCall(MatDestroy(&pch2opus->M));
607       pch2opus->M = M;
608       if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) {
609         PetscCall(MatNorm(pch2opus->T,norm,&err));
610         pch2opus->testMA = PETSC_TRUE;
611         PetscCall(MatNorm(pch2opus->T,norm,&errMA));
612         pch2opus->testMA = PETSC_FALSE;
613       }
614       if (pch2opus->monitor) {
615         PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)err,(double)(err/initerr)));
616         PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A||     NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA)));
617       }
618       if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR;
619       if (err < pch2opus->atol || err < pch2opus->rtol*initerr || pc->failedreason) break;
620     }
621   }
622   /* cleanup setup workspace */
623   PetscCall(MatH2OpusSetNativeMult(pch2opus->A,PETSC_FALSE));
624   PetscCall(MatH2OpusSetNativeMult(pch2opus->M,PETSC_FALSE));
625   PetscCall(VecDestroy(&pch2opus->wns[0]));
626   PetscCall(VecDestroy(&pch2opus->wns[1]));
627   PetscCall(VecDestroy(&pch2opus->wns[2]));
628   PetscCall(VecDestroy(&pch2opus->wns[3]));
629   PetscCall(MatDestroy(&pch2opus->wnsmat[0]));
630   PetscCall(MatDestroy(&pch2opus->wnsmat[1]));
631   PetscCall(MatDestroy(&pch2opus->wnsmat[2]));
632   PetscCall(MatDestroy(&pch2opus->wnsmat[3]));
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer)
637 {
638   PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data;
639   PetscBool  isascii;
640 
641   PetscFunctionBegin;
642   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
643   if (isascii) {
644     if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) {
645       PetscCall(PetscViewerASCIIPrintf(viewer,"Initial approximation matrix\n"));
646       PetscCall(PetscViewerASCIIPushTab(viewer));
647       PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL));
648       PetscCall(MatView(pch2opus->A,viewer));
649       PetscCall(PetscViewerPopFormat(viewer));
650       PetscCall(PetscViewerASCIIPopTab(viewer));
651     }
652     if (pch2opus->M) {
653       PetscCall(PetscViewerASCIIPrintf(viewer,"Inner matrix constructed\n"));
654       PetscCall(PetscViewerASCIIPushTab(viewer));
655       PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL));
656       PetscCall(MatView(pch2opus->M,viewer));
657       PetscCall(PetscViewerPopFormat(viewer));
658       PetscCall(PetscViewerASCIIPopTab(viewer));
659     }
660     PetscCall(PetscViewerASCIIPrintf(viewer,"Initial scaling: %g\n",pch2opus->s0));
661   }
662   PetscFunctionReturn(0);
663 }
664 
665 PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc)
666 {
667   PC_H2OPUS *pch2opus;
668 
669   PetscFunctionBegin;
670   PetscCall(PetscNewLog(pc,&pch2opus));
671   pc->data = (void*)pch2opus;
672 
673   pch2opus->atol       = 1.e-2;
674   pch2opus->rtol       = 1.e-6;
675   pch2opus->maxits     = 50;
676   pch2opus->hyperorder = 1; /* defaults to basic NewtonSchultz */
677   pch2opus->normtype   = NORM_2;
678 
679   /* these are needed when we are sampling the pmat */
680   pch2opus->eta        = PETSC_DECIDE;
681   pch2opus->leafsize   = PETSC_DECIDE;
682   pch2opus->max_rank   = PETSC_DECIDE;
683   pch2opus->bs         = PETSC_DECIDE;
684   pch2opus->mrtol      = PETSC_DECIDE;
685 #if defined(PETSC_H2OPUS_USE_GPU)
686   pch2opus->boundtocpu = PETSC_FALSE;
687 #else
688   pch2opus->boundtocpu = PETSC_TRUE;
689 #endif
690   pc->ops->destroy        = PCDestroy_H2OPUS;
691   pc->ops->setup          = PCSetUp_H2OPUS;
692   pc->ops->apply          = PCApply_H2OPUS;
693   pc->ops->matapply       = PCApplyMat_H2OPUS;
694   pc->ops->applytranspose = PCApplyTranspose_H2OPUS;
695   pc->ops->reset          = PCReset_H2OPUS;
696   pc->ops->setfromoptions = PCSetFromOptions_H2OPUS;
697   pc->ops->view           = PCView_H2OPUS;
698 
699   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_H2OPUS));
700   PetscFunctionReturn(0);
701 }
702