xref: /petsc/src/mat/impls/htool/htool.cxx (revision dbbe0bcd3f3a8fbab5a45420dc06f8387e5764c6)
1 #include <../src/mat/impls/htool/htool.hpp> /*I "petscmat.h" I*/
2 #include <petscblaslapack.h>
3 #include <set>
4 
5 const char *const MatHtoolCompressorTypes[] = { "sympartialACA", "fullACA", "SVD" };
6 const char *const MatHtoolClusteringTypes[] = { "PCARegular", "PCAGeometric", "BoundingBox1Regular", "BoundingBox1Geometric" };
7 const char HtoolCitation[] = "@article{marchand2020two,\n"
8 "  Author = {Marchand, Pierre and Claeys, Xavier and Jolivet, Pierre and Nataf, Fr\\'ed\\'eric and Tournier, Pierre-Henri},\n"
9 "  Title = {Two-level preconditioning for $h$-version boundary element approximation of hypersingular operator with {GenEO}},\n"
10 "  Year = {2020},\n"
11 "  Publisher = {Elsevier},\n"
12 "  Journal = {Numerische Mathematik},\n"
13 "  Volume = {146},\n"
14 "  Pages = {597--628},\n"
15 "  Url = {https://github.com/htool-ddm/htool}\n"
16 "}\n";
17 static PetscBool HtoolCite = PETSC_FALSE;
18 
19 static PetscErrorCode MatGetDiagonal_Htool(Mat A,Vec v)
20 {
21   Mat_Htool      *a = (Mat_Htool*)A->data;
22   PetscScalar    *x;
23   PetscBool      flg;
24 
25   PetscFunctionBegin;
26   PetscCall(MatHasCongruentLayouts(A,&flg));
27   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only congruent layouts supported");
28   PetscCall(VecGetArrayWrite(v,&x));
29   a->hmatrix->copy_local_diagonal(x);
30   PetscCall(VecRestoreArrayWrite(v,&x));
31   PetscCall(VecScale(v,a->s));
32   PetscFunctionReturn(0);
33 }
34 
35 static PetscErrorCode MatGetDiagonalBlock_Htool(Mat A,Mat *b)
36 {
37   Mat_Htool      *a = (Mat_Htool*)A->data;
38   Mat            B;
39   PetscScalar    *ptr;
40   PetscBool      flg;
41 
42   PetscFunctionBegin;
43   PetscCall(MatHasCongruentLayouts(A,&flg));
44   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only congruent layouts supported");
45   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B)); /* same logic as in MatGetDiagonalBlock_MPIDense() */
46   if (!B) {
47     PetscCall(MatCreateDense(PETSC_COMM_SELF,A->rmap->n,A->rmap->n,A->rmap->n,A->rmap->n,NULL,&B));
48     PetscCall(MatDenseGetArrayWrite(B,&ptr));
49     a->hmatrix->copy_local_diagonal_block(ptr);
50     PetscCall(MatDenseRestoreArrayWrite(B,&ptr));
51     PetscCall(MatPropagateSymmetryOptions(A,B));
52     PetscCall(MatScale(B,a->s));
53     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
54     *b   = B;
55     PetscCall(MatDestroy(&B));
56   } else *b = B;
57   PetscFunctionReturn(0);
58 }
59 
60 static PetscErrorCode MatMult_Htool(Mat A,Vec x,Vec y)
61 {
62   Mat_Htool         *a = (Mat_Htool*)A->data;
63   const PetscScalar *in;
64   PetscScalar       *out;
65 
66   PetscFunctionBegin;
67   PetscCall(VecGetArrayRead(x,&in));
68   PetscCall(VecGetArrayWrite(y,&out));
69   a->hmatrix->mvprod_local_to_local(in,out);
70   PetscCall(VecRestoreArrayRead(x,&in));
71   PetscCall(VecRestoreArrayWrite(y,&out));
72   PetscCall(VecScale(y,a->s));
73   PetscFunctionReturn(0);
74 }
75 
76 /* naive implementation of MatMultAdd() needed for FEM-BEM coupling via MATNEST */
77 static PetscErrorCode MatMultAdd_Htool(Mat A,Vec v1,Vec v2,Vec v3)
78 {
79   Mat_Htool         *a = (Mat_Htool*)A->data;
80   Vec               tmp;
81   const PetscScalar scale = a->s;
82 
83   PetscFunctionBegin;
84   PetscCall(VecDuplicate(v2,&tmp));
85   PetscCall(VecCopy(v2,v3)); /* no-op in MatMultAdd(bA->m[i][j],bx[j],by[i],by[i]) since VecCopy() checks for x == y */
86   a->s = 1.0; /* set s to 1.0 since VecAXPY() may be used to scale the MatMult() output Vec */
87   PetscCall(MatMult_Htool(A,v1,tmp));
88   PetscCall(VecAXPY(v3,scale,tmp));
89   PetscCall(VecDestroy(&tmp));
90   a->s = scale; /* set s back to its original value */
91   PetscFunctionReturn(0);
92 }
93 
94 static PetscErrorCode MatMultTranspose_Htool(Mat A,Vec x,Vec y)
95 {
96   Mat_Htool         *a = (Mat_Htool*)A->data;
97   const PetscScalar *in;
98   PetscScalar       *out;
99 
100   PetscFunctionBegin;
101   PetscCall(VecGetArrayRead(x,&in));
102   PetscCall(VecGetArrayWrite(y,&out));
103   a->hmatrix->mvprod_transp_local_to_local(in,out);
104   PetscCall(VecRestoreArrayRead(x,&in));
105   PetscCall(VecRestoreArrayWrite(y,&out));
106   PetscCall(VecScale(y,a->s));
107   PetscFunctionReturn(0);
108 }
109 
110 static PetscErrorCode MatIncreaseOverlap_Htool(Mat A,PetscInt is_max,IS is[],PetscInt ov)
111 {
112   std::set<PetscInt> set;
113   const PetscInt     *idx;
114   PetscInt           *oidx,size,bs[2];
115   PetscMPIInt        csize;
116 
117   PetscFunctionBegin;
118   PetscCall(MatGetBlockSizes(A,bs,bs+1));
119   if (bs[0] != bs[1]) bs[0] = 1;
120   for (PetscInt i=0; i<is_max; ++i) {
121     /* basic implementation that adds indices by shifting an IS by -ov, -ov+1..., -1, 1..., ov-1, ov */
122     /* needed to avoid subdomain matrices to replicate A since it is dense                           */
123     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)is[i]),&csize));
124     PetscCheck(csize == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported parallel IS");
125     PetscCall(ISGetSize(is[i],&size));
126     PetscCall(ISGetIndices(is[i],&idx));
127     for (PetscInt j=0; j<size; ++j) {
128       set.insert(idx[j]);
129       for (PetscInt k=1; k<=ov; ++k) {               /* for each layer of overlap      */
130         if (idx[j] - k >= 0) set.insert(idx[j] - k); /* do not insert negative indices */
131         if (idx[j] + k < A->rmap->N && idx[j] + k < A->cmap->N) set.insert(idx[j] + k); /* do not insert indices greater than the dimension of A */
132       }
133     }
134     PetscCall(ISRestoreIndices(is[i],&idx));
135     PetscCall(ISDestroy(is+i));
136     if (bs[0] > 1) {
137       for (std::set<PetscInt>::iterator it=set.cbegin(); it!=set.cend(); it++) {
138         std::vector<PetscInt> block(bs[0]);
139         std::iota(block.begin(),block.end(),(*it/bs[0])*bs[0]);
140         set.insert(block.cbegin(),block.cend());
141       }
142     }
143     size = set.size(); /* size with overlap */
144     PetscCall(PetscMalloc1(size,&oidx));
145     for (const PetscInt j : set) *oidx++ = j;
146     oidx -= size;
147     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,size,oidx,PETSC_OWN_POINTER,is+i));
148   }
149   PetscFunctionReturn(0);
150 }
151 
152 static PetscErrorCode MatCreateSubMatrices_Htool(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
153 {
154   Mat_Htool         *a = (Mat_Htool*)A->data;
155   Mat               D,B,BT;
156   const PetscScalar *copy;
157   PetscScalar       *ptr;
158   const PetscInt    *idxr,*idxc,*it;
159   PetscInt          nrow,m,i;
160   PetscBool         flg;
161 
162   PetscFunctionBegin;
163   if (scall != MAT_REUSE_MATRIX) {
164     PetscCall(PetscCalloc1(n,submat));
165   }
166   for (i=0; i<n; ++i) {
167     PetscCall(ISGetLocalSize(irow[i],&nrow));
168     PetscCall(ISGetLocalSize(icol[i],&m));
169     PetscCall(ISGetIndices(irow[i],&idxr));
170     PetscCall(ISGetIndices(icol[i],&idxc));
171     if (scall != MAT_REUSE_MATRIX) {
172       PetscCall(MatCreateDense(PETSC_COMM_SELF,nrow,m,nrow,m,NULL,(*submat)+i));
173     }
174     PetscCall(MatDenseGetArrayWrite((*submat)[i],&ptr));
175     if (irow[i] == icol[i]) { /* same row and column IS? */
176       PetscCall(MatHasCongruentLayouts(A,&flg));
177       if (flg) {
178         PetscCall(ISSorted(irow[i],&flg));
179         if (flg) { /* sorted IS? */
180           it = std::lower_bound(idxr,idxr+nrow,A->rmap->rstart);
181           if (it != idxr+nrow && *it == A->rmap->rstart) { /* rmap->rstart in IS? */
182             if (std::distance(idxr,it) + A->rmap->n <= nrow) { /* long enough IS to store the local diagonal block? */
183               for (PetscInt j=0; j<A->rmap->n && flg; ++j) if (PetscUnlikely(it[j] != A->rmap->rstart+j)) flg = PETSC_FALSE;
184               if (flg) { /* complete local diagonal block in IS? */
185                 /* fast extraction when the local diagonal block is part of the submatrix, e.g., for PCASM or PCHPDDM
186                  *      [   B   C   E   ]
187                  *  A = [   B   D   E   ]
188                  *      [   B   F   E   ]
189                  */
190                 m = std::distance(idxr,it); /* shift of the coefficient (0,0) of block D from above */
191                 PetscCall(MatGetDiagonalBlock_Htool(A,&D));
192                 PetscCall(MatDenseGetArrayRead(D,&copy));
193                 for (PetscInt k=0; k<A->rmap->n; ++k) {
194                   PetscCall(PetscArraycpy(ptr+(m+k)*nrow+m,copy+k*A->rmap->n,A->rmap->n)); /* block D from above */
195                 }
196                 PetscCall(MatDenseRestoreArrayRead(D,&copy));
197                 if (m) {
198                   a->wrapper->copy_submatrix(nrow,m,idxr,idxc,ptr); /* vertical block B from above */
199                   /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
200                   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) {
201                     PetscCall(MatCreateDense(PETSC_COMM_SELF,A->rmap->n,m,A->rmap->n,m,ptr+m,&B));
202                     PetscCall(MatDenseSetLDA(B,nrow));
203                     PetscCall(MatCreateDense(PETSC_COMM_SELF,m,A->rmap->n,m,A->rmap->n,ptr+m*nrow,&BT));
204                     PetscCall(MatDenseSetLDA(BT,nrow));
205                     if (A->hermitian == PETSC_BOOL3_TRUE && PetscDefined(USE_COMPLEX)) {
206                       PetscCall(MatHermitianTranspose(B,MAT_REUSE_MATRIX,&BT));
207                     } else {
208                       PetscCall(MatTransposeSetPrecursor(B,BT));
209                       PetscCall(MatTranspose(B,MAT_REUSE_MATRIX,&BT));
210                     }
211                     PetscCall(MatDestroy(&B));
212                     PetscCall(MatDestroy(&BT));
213                   } else {
214                     for (PetscInt k=0; k<A->rmap->n; ++k) { /* block C from above */
215                       a->wrapper->copy_submatrix(m,1,idxr,idxc+m+k,ptr+(m+k)*nrow);
216                     }
217                   }
218                 }
219                 if (m+A->rmap->n != nrow) {
220                   a->wrapper->copy_submatrix(nrow,std::distance(it+A->rmap->n,idxr+nrow),idxr,idxc+m+A->rmap->n,ptr+(m+A->rmap->n)*nrow); /* vertical block E from above */
221                   /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
222                   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) {
223                     PetscCall(MatCreateDense(PETSC_COMM_SELF,A->rmap->n,nrow-(m+A->rmap->n),A->rmap->n,nrow-(m+A->rmap->n),ptr+(m+A->rmap->n)*nrow+m,&B));
224                     PetscCall(MatDenseSetLDA(B,nrow));
225                     PetscCall(MatCreateDense(PETSC_COMM_SELF,nrow-(m+A->rmap->n),A->rmap->n,nrow-(m+A->rmap->n),A->rmap->n,ptr+m*nrow+m+A->rmap->n,&BT));
226                     PetscCall(MatDenseSetLDA(BT,nrow));
227                     if (A->hermitian == PETSC_BOOL3_TRUE && PetscDefined(USE_COMPLEX)) {
228                       PetscCall(MatHermitianTranspose(B,MAT_REUSE_MATRIX,&BT));
229                     } else {
230                       PetscCall(MatTransposeSetPrecursor(B,BT));
231                       PetscCall(MatTranspose(B,MAT_REUSE_MATRIX,&BT));
232                     }
233                     PetscCall(MatDestroy(&B));
234                     PetscCall(MatDestroy(&BT));
235                   } else {
236                     for (PetscInt k=0; k<A->rmap->n; ++k) { /* block F from above */
237                       a->wrapper->copy_submatrix(std::distance(it+A->rmap->n,idxr+nrow),1,it+A->rmap->n,idxc+m+k,ptr+(m+k)*nrow+m+A->rmap->n);
238                     }
239                   }
240                 }
241               } /* complete local diagonal block not in IS */
242             } else flg = PETSC_FALSE; /* IS not long enough to store the local diagonal block */
243           } else flg = PETSC_FALSE; /* rmap->rstart not in IS */
244         } /* unsorted IS */
245       }
246     } else flg = PETSC_FALSE; /* different row and column IS */
247     if (!flg) a->wrapper->copy_submatrix(nrow,m,idxr,idxc,ptr); /* reassemble everything */
248     PetscCall(ISRestoreIndices(irow[i],&idxr));
249     PetscCall(ISRestoreIndices(icol[i],&idxc));
250     PetscCall(MatDenseRestoreArrayWrite((*submat)[i],&ptr));
251     PetscCall(MatScale((*submat)[i],a->s));
252   }
253   PetscFunctionReturn(0);
254 }
255 
256 static PetscErrorCode MatDestroy_Htool(Mat A)
257 {
258   Mat_Htool               *a = (Mat_Htool*)A->data;
259   PetscContainer          container;
260   MatHtoolKernelTranspose *kernelt;
261 
262   PetscFunctionBegin;
263   PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL));
264   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_seqdense_C",NULL));
265   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_mpidense_C",NULL));
266   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_seqdense_C",NULL));
267   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_mpidense_C",NULL));
268   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetHierarchicalMat_C",NULL));
269   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatHtoolSetKernel_C",NULL));
270   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationSource_C",NULL));
271   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationTarget_C",NULL));
272   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatHtoolUsePermutation_C",NULL));
273   PetscCall(PetscObjectQuery((PetscObject)A,"KernelTranspose",(PetscObject*)&container));
274   if (container) { /* created in MatTranspose_Htool() */
275     PetscCall(PetscContainerGetPointer(container,(void**)&kernelt));
276     PetscCall(MatDestroy(&kernelt->A));
277     PetscCall(PetscFree(kernelt));
278     PetscCall(PetscContainerDestroy(&container));
279     PetscCall(PetscObjectCompose((PetscObject)A,"KernelTranspose",NULL));
280   }
281   if (a->gcoords_source != a->gcoords_target) {
282     PetscCall(PetscFree(a->gcoords_source));
283   }
284   PetscCall(PetscFree(a->gcoords_target));
285   PetscCall(PetscFree2(a->work_source,a->work_target));
286   delete a->wrapper;
287   delete a->hmatrix;
288   PetscCall(PetscFree(A->data));
289   PetscFunctionReturn(0);
290 }
291 
292 static PetscErrorCode MatView_Htool(Mat A,PetscViewer pv)
293 {
294   Mat_Htool      *a = (Mat_Htool*)A->data;
295   PetscBool      flg;
296 
297   PetscFunctionBegin;
298   PetscCall(PetscObjectTypeCompare((PetscObject)pv,PETSCVIEWERASCII,&flg));
299   if (flg) {
300     PetscCall(PetscViewerASCIIPrintf(pv,"symmetry: %c\n",a->hmatrix->get_symmetry_type()));
301     if (PetscAbsScalar(a->s-1.0) > PETSC_MACHINE_EPSILON) {
302 #if defined(PETSC_USE_COMPLEX)
303       PetscCall(PetscViewerASCIIPrintf(pv,"scaling: %g+%gi\n",(double)PetscRealPart(a->s),(double)PetscImaginaryPart(a->s)));
304 #else
305       PetscCall(PetscViewerASCIIPrintf(pv,"scaling: %g\n",(double)a->s));
306 #endif
307     }
308     PetscCall(PetscViewerASCIIPrintf(pv,"minimum cluster size: %" PetscInt_FMT "\n",a->bs[0]));
309     PetscCall(PetscViewerASCIIPrintf(pv,"maximum block size: %" PetscInt_FMT "\n",a->bs[1]));
310     PetscCall(PetscViewerASCIIPrintf(pv,"epsilon: %g\n",(double)a->epsilon));
311     PetscCall(PetscViewerASCIIPrintf(pv,"eta: %g\n",(double)a->eta));
312     PetscCall(PetscViewerASCIIPrintf(pv,"minimum target depth: %" PetscInt_FMT "\n",a->depth[0]));
313     PetscCall(PetscViewerASCIIPrintf(pv,"minimum source depth: %" PetscInt_FMT "\n",a->depth[1]));
314     PetscCall(PetscViewerASCIIPrintf(pv,"compressor: %s\n",MatHtoolCompressorTypes[a->compressor]));
315     PetscCall(PetscViewerASCIIPrintf(pv,"clustering: %s\n",MatHtoolClusteringTypes[a->clustering]));
316     PetscCall(PetscViewerASCIIPrintf(pv,"compression ratio: %s\n",a->hmatrix->get_infos("Compression_ratio").c_str()));
317     PetscCall(PetscViewerASCIIPrintf(pv,"space saving: %s\n",a->hmatrix->get_infos("Space_saving").c_str()));
318     PetscCall(PetscViewerASCIIPrintf(pv,"number of dense (resp. low rank) matrices: %s (resp. %s)\n",a->hmatrix->get_infos("Number_of_dmat").c_str(),a->hmatrix->get_infos("Number_of_lrmat").c_str()));
319     PetscCall(PetscViewerASCIIPrintf(pv,"(minimum, mean, maximum) dense block sizes: (%s, %s, %s)\n",a->hmatrix->get_infos("Dense_block_size_min").c_str(),a->hmatrix->get_infos("Dense_block_size_mean").c_str(),a->hmatrix->get_infos("Dense_block_size_max").c_str()));
320     PetscCall(PetscViewerASCIIPrintf(pv,"(minimum, mean, maximum) low rank block sizes: (%s, %s, %s)\n",a->hmatrix->get_infos("Low_rank_block_size_min").c_str(),a->hmatrix->get_infos("Low_rank_block_size_mean").c_str(),a->hmatrix->get_infos("Low_rank_block_size_max").c_str()));
321     PetscCall(PetscViewerASCIIPrintf(pv,"(minimum, mean, maximum) ranks: (%s, %s, %s)\n",a->hmatrix->get_infos("Rank_min").c_str(),a->hmatrix->get_infos("Rank_mean").c_str(),a->hmatrix->get_infos("Rank_max").c_str()));
322   }
323   PetscFunctionReturn(0);
324 }
325 
326 static PetscErrorCode MatScale_Htool(Mat A,PetscScalar s)
327 {
328   Mat_Htool *a = (Mat_Htool*)A->data;
329 
330   PetscFunctionBegin;
331   a->s *= s;
332   PetscFunctionReturn(0);
333 }
334 
335 /* naive implementation of MatGetRow() needed for MatConvert_Nest_AIJ() */
336 static PetscErrorCode MatGetRow_Htool(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
337 {
338   Mat_Htool      *a = (Mat_Htool*)A->data;
339   PetscInt       *idxc;
340   PetscBLASInt   one = 1,bn;
341 
342   PetscFunctionBegin;
343   if (nz) *nz = A->cmap->N;
344   if (idx || v) { /* even if !idx, need to set idxc for htool::copy_submatrix() */
345     PetscCall(PetscMalloc1(A->cmap->N,&idxc));
346     for (PetscInt i=0; i<A->cmap->N; ++i) idxc[i] = i;
347   }
348   if (idx) *idx = idxc;
349   if (v) {
350     PetscCall(PetscMalloc1(A->cmap->N,v));
351     if (a->wrapper) a->wrapper->copy_submatrix(1,A->cmap->N,&row,idxc,*v);
352     else reinterpret_cast<htool::VirtualGenerator<PetscScalar>*>(a->kernelctx)->copy_submatrix(1,A->cmap->N,&row,idxc,*v);
353     PetscCall(PetscBLASIntCast(A->cmap->N,&bn));
354     PetscCallBLAS("BLASscal",BLASscal_(&bn,&a->s,*v,&one));
355   }
356   if (!idx) {
357     PetscCall(PetscFree(idxc));
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 static PetscErrorCode MatRestoreRow_Htool(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
363 {
364   PetscFunctionBegin;
365   if (nz) *nz = 0;
366   if (idx) {
367     PetscCall(PetscFree(*idx));
368   }
369   if (v) {
370     PetscCall(PetscFree(*v));
371   }
372   PetscFunctionReturn(0);
373 }
374 
375 static PetscErrorCode MatSetFromOptions_Htool(Mat A,PetscOptionItems *PetscOptionsObject)
376 {
377   Mat_Htool      *a = (Mat_Htool*)A->data;
378   PetscInt       n;
379   PetscBool      flg;
380 
381   PetscFunctionBegin;
382   PetscOptionsHeadBegin(PetscOptionsObject,"Htool options");
383   PetscCall(PetscOptionsInt("-mat_htool_min_cluster_size","Minimal leaf size in cluster tree",NULL,a->bs[0],a->bs,NULL));
384   PetscCall(PetscOptionsInt("-mat_htool_max_block_size","Maximal number of coefficients in a dense block",NULL,a->bs[1],a->bs + 1,NULL));
385   PetscCall(PetscOptionsReal("-mat_htool_epsilon","Relative error in Frobenius norm when approximating a block",NULL,a->epsilon,&a->epsilon,NULL));
386   PetscCall(PetscOptionsReal("-mat_htool_eta","Admissibility condition tolerance",NULL,a->eta,&a->eta,NULL));
387   PetscCall(PetscOptionsInt("-mat_htool_min_target_depth","Minimal cluster tree depth associated with the rows",NULL,a->depth[0],a->depth,NULL));
388   PetscCall(PetscOptionsInt("-mat_htool_min_source_depth","Minimal cluster tree depth associated with the columns",NULL,a->depth[1],a->depth + 1,NULL));
389   n = 0;
390   PetscCall(PetscOptionsEList("-mat_htool_compressor","Type of compression","MatHtoolCompressorType",MatHtoolCompressorTypes,PETSC_STATIC_ARRAY_LENGTH(MatHtoolCompressorTypes),MatHtoolCompressorTypes[MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA],&n,&flg));
391   if (flg) a->compressor = MatHtoolCompressorType(n);
392   n = 0;
393   PetscCall(PetscOptionsEList("-mat_htool_clustering","Type of clustering","MatHtoolClusteringType",MatHtoolClusteringTypes,PETSC_STATIC_ARRAY_LENGTH(MatHtoolClusteringTypes),MatHtoolClusteringTypes[MAT_HTOOL_CLUSTERING_PCA_REGULAR],&n,&flg));
394   if (flg) a->clustering = MatHtoolClusteringType(n);
395   PetscOptionsHeadEnd();
396   PetscFunctionReturn(0);
397 }
398 
399 static PetscErrorCode MatAssemblyEnd_Htool(Mat A,MatAssemblyType type)
400 {
401   Mat_Htool                                                    *a = (Mat_Htool*)A->data;
402   const PetscInt                                               *ranges;
403   PetscInt                                                     *offset;
404   PetscMPIInt                                                  size;
405   char                                                         S = PetscDefined(USE_COMPLEX) && A->hermitian == PETSC_BOOL3_TRUE ? 'H' : (A->symmetric == PETSC_BOOL3_TRUE ? 'S' : 'N'),uplo = S == 'N' ? 'N' : 'U';
406   htool::VirtualGenerator<PetscScalar>                         *generator = nullptr;
407   std::shared_ptr<htool::VirtualCluster>                       t,s = nullptr;
408   std::shared_ptr<htool::VirtualLowRankGenerator<PetscScalar>> compressor = nullptr;
409 
410   PetscFunctionBegin;
411   PetscCall(PetscCitationsRegister(HtoolCitation,&HtoolCite));
412   delete a->wrapper;
413   delete a->hmatrix;
414   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
415   PetscCall(PetscMalloc1(2*size,&offset));
416   PetscCall(MatGetOwnershipRanges(A,&ranges));
417   for (PetscInt i=0; i<size; ++i) {
418     offset[2*i] = ranges[i];
419     offset[2*i+1] = ranges[i+1] - ranges[i];
420   }
421   switch (a->clustering) {
422   case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
423     t = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
424     break;
425   case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
426     t = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
427     break;
428   case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
429     t = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::RegularSplitting>>>(a->dim);
430     break;
431   default:
432     t = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::RegularSplitting>>>(a->dim);
433   }
434   t->set_minclustersize(a->bs[0]);
435   t->build(A->rmap->N,a->gcoords_target,offset);
436   if (a->kernel) a->wrapper = new WrapperHtool(A->rmap->N,A->cmap->N,a->dim,a->kernel,a->kernelctx);
437   else {
438     a->wrapper = NULL;
439     generator = reinterpret_cast<htool::VirtualGenerator<PetscScalar>*>(a->kernelctx);
440   }
441   if (a->gcoords_target != a->gcoords_source) {
442     PetscCall(MatGetOwnershipRangesColumn(A,&ranges));
443     for (PetscInt i=0; i<size; ++i) {
444       offset[2*i] = ranges[i];
445       offset[2*i+1] = ranges[i+1] - ranges[i];
446     }
447     switch (a->clustering) {
448     case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
449       s = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
450       break;
451     case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
452       s = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
453       break;
454     case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
455       s = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::RegularSplitting>>>(a->dim);
456       break;
457     default:
458       s = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::RegularSplitting>>>(a->dim);
459     }
460     s->set_minclustersize(a->bs[0]);
461     s->build(A->cmap->N,a->gcoords_source,offset);
462     S = uplo = 'N';
463   }
464   PetscCall(PetscFree(offset));
465   switch (a->compressor) {
466   case MAT_HTOOL_COMPRESSOR_FULL_ACA:
467     compressor = std::make_shared<htool::fullACA<PetscScalar>>();
468     break;
469   case MAT_HTOOL_COMPRESSOR_SVD:
470     compressor = std::make_shared<htool::SVD<PetscScalar>>();
471     break;
472   default:
473     compressor = std::make_shared<htool::sympartialACA<PetscScalar>>();
474   }
475   a->hmatrix = dynamic_cast<htool::VirtualHMatrix<PetscScalar>*>(new htool::HMatrix<PetscScalar>(t,s ? s : t,a->epsilon,a->eta,S,uplo));
476   a->hmatrix->set_compression(compressor);
477   a->hmatrix->set_maxblocksize(a->bs[1]);
478   a->hmatrix->set_mintargetdepth(a->depth[0]);
479   a->hmatrix->set_minsourcedepth(a->depth[1]);
480   if (s) a->hmatrix->build(a->wrapper ? *a->wrapper : *generator,a->gcoords_target,a->gcoords_source);
481   else   a->hmatrix->build(a->wrapper ? *a->wrapper : *generator,a->gcoords_target);
482   PetscFunctionReturn(0);
483 }
484 
485 static PetscErrorCode MatProductNumeric_Htool(Mat C)
486 {
487   Mat_Product       *product = C->product;
488   Mat_Htool         *a = (Mat_Htool*)product->A->data;
489   const PetscScalar *in;
490   PetscScalar       *out;
491   PetscInt          N,lda;
492 
493   PetscFunctionBegin;
494   MatCheckProduct(C,1);
495   PetscCall(MatGetSize(C,NULL,&N));
496   PetscCall(MatDenseGetLDA(C,&lda));
497   PetscCheck(lda == C->rmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")",lda,C->rmap->n);
498   PetscCall(MatDenseGetArrayRead(product->B,&in));
499   PetscCall(MatDenseGetArrayWrite(C,&out));
500   switch (product->type) {
501   case MATPRODUCT_AB:
502     a->hmatrix->mvprod_local_to_local(in,out,N);
503     break;
504   case MATPRODUCT_AtB:
505     a->hmatrix->mvprod_transp_local_to_local(in,out,N);
506     break;
507   default:
508     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProductType %s is not supported",MatProductTypes[product->type]);
509   }
510   PetscCall(MatDenseRestoreArrayWrite(C,&out));
511   PetscCall(MatDenseRestoreArrayRead(product->B,&in));
512   PetscCall(MatScale(C,a->s));
513   PetscFunctionReturn(0);
514 }
515 
516 static PetscErrorCode MatProductSymbolic_Htool(Mat C)
517 {
518   Mat_Product    *product = C->product;
519   Mat            A,B;
520   PetscBool      flg;
521 
522   PetscFunctionBegin;
523   MatCheckProduct(C,1);
524   A = product->A;
525   B = product->B;
526   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,""));
527   PetscCheck(flg,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatProduct_AB not supported for %s",((PetscObject)product->B)->type_name);
528   switch (product->type) {
529   case MATPRODUCT_AB:
530     if (C->rmap->n == PETSC_DECIDE || C->cmap->n == PETSC_DECIDE || C->rmap->N == PETSC_DECIDE || C->cmap->N == PETSC_DECIDE) {
531       PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
532     }
533     break;
534   case MATPRODUCT_AtB:
535     if (C->rmap->n == PETSC_DECIDE || C->cmap->n == PETSC_DECIDE || C->rmap->N == PETSC_DECIDE || C->cmap->N == PETSC_DECIDE) {
536       PetscCall(MatSetSizes(C,A->cmap->n,B->cmap->n,A->cmap->N,B->cmap->N));
537     }
538     break;
539   default:
540     SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"ProductType %s is not supported",MatProductTypes[product->type]);
541   }
542   PetscCall(MatSetType(C,MATDENSE));
543   PetscCall(MatSetUp(C));
544   PetscCall(MatSetOption(C,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
545   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
546   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
547   C->ops->productsymbolic = NULL;
548   C->ops->productnumeric = MatProductNumeric_Htool;
549   PetscFunctionReturn(0);
550 }
551 
552 static PetscErrorCode MatProductSetFromOptions_Htool(Mat C)
553 {
554   PetscFunctionBegin;
555   MatCheckProduct(C,1);
556   if (C->product->type == MATPRODUCT_AB || C->product->type == MATPRODUCT_AtB) C->ops->productsymbolic = MatProductSymbolic_Htool;
557   PetscFunctionReturn(0);
558 }
559 
560 static PetscErrorCode MatHtoolGetHierarchicalMat_Htool(Mat A,const htool::VirtualHMatrix<PetscScalar> **hmatrix)
561 {
562   Mat_Htool *a = (Mat_Htool*)A->data;
563 
564   PetscFunctionBegin;
565   *hmatrix = a->hmatrix;
566   PetscFunctionReturn(0);
567 }
568 
569 /*@C
570      MatHtoolGetHierarchicalMat - Retrieves the opaque pointer to a Htool virtual matrix stored in a MATHTOOL.
571 
572    Input Parameter:
573 .     A - hierarchical matrix
574 
575    Output Parameter:
576 .     hmatrix - opaque pointer to a Htool virtual matrix
577 
578    Level: advanced
579 
580 .seealso: `MATHTOOL`
581 @*/
582 PETSC_EXTERN PetscErrorCode MatHtoolGetHierarchicalMat(Mat A,const htool::VirtualHMatrix<PetscScalar> **hmatrix)
583 {
584   PetscFunctionBegin;
585   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
586   PetscValidPointer(hmatrix,2);
587   PetscTryMethod(A,"MatHtoolGetHierarchicalMat_C",(Mat,const htool::VirtualHMatrix<PetscScalar>**),(A,hmatrix));
588   PetscFunctionReturn(0);
589 }
590 
591 static PetscErrorCode MatHtoolSetKernel_Htool(Mat A,MatHtoolKernel kernel,void *kernelctx)
592 {
593   Mat_Htool *a = (Mat_Htool*)A->data;
594 
595   PetscFunctionBegin;
596   a->kernel    = kernel;
597   a->kernelctx = kernelctx;
598   delete a->wrapper;
599   if (a->kernel) a->wrapper = new WrapperHtool(A->rmap->N,A->cmap->N,a->dim,a->kernel,a->kernelctx);
600   PetscFunctionReturn(0);
601 }
602 
603 /*@C
604      MatHtoolSetKernel - Sets the kernel and context used for the assembly of a MATHTOOL.
605 
606    Input Parameters:
607 +     A - hierarchical matrix
608 .     kernel - computational kernel (or NULL)
609 -     kernelctx - kernel context (if kernel is NULL, the pointer must be of type htool::VirtualGenerator<PetscScalar>*)
610 
611    Level: advanced
612 
613 .seealso: `MATHTOOL`, `MatCreateHtoolFromKernel()`
614 @*/
615 PETSC_EXTERN PetscErrorCode MatHtoolSetKernel(Mat A,MatHtoolKernel kernel,void *kernelctx)
616 {
617   PetscFunctionBegin;
618   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
619   if (!kernelctx) PetscValidFunction(kernel,2);
620   if (!kernel)    PetscValidPointer(kernelctx,3);
621   PetscTryMethod(A,"MatHtoolSetKernel_C",(Mat,MatHtoolKernel,void*),(A,kernel,kernelctx));
622   PetscFunctionReturn(0);
623 }
624 
625 static PetscErrorCode MatHtoolGetPermutationSource_Htool(Mat A,IS* is)
626 {
627   Mat_Htool             *a = (Mat_Htool*)A->data;
628   std::vector<PetscInt> source;
629 
630   PetscFunctionBegin;
631   source = a->hmatrix->get_source_cluster()->get_local_perm();
632   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),source.size(),source.data(),PETSC_COPY_VALUES,is));
633   PetscCall(ISSetPermutation(*is));
634   PetscFunctionReturn(0);
635 }
636 
637 /*@C
638      MatHtoolGetPermutationSource - Gets the permutation associated to the source cluster.
639 
640    Input Parameter:
641 .     A - hierarchical matrix
642 
643    Output Parameter:
644 .     is - permutation
645 
646    Level: advanced
647 
648 .seealso: `MATHTOOL`, `MatHtoolGetPermutationTarget()`, `MatHtoolUsePermutation()`
649 @*/
650 PETSC_EXTERN PetscErrorCode MatHtoolGetPermutationSource(Mat A,IS* is)
651 {
652   PetscFunctionBegin;
653   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
654   if (!is) PetscValidPointer(is,2);
655   PetscTryMethod(A,"MatHtoolGetPermutationSource_C",(Mat,IS*),(A,is));
656   PetscFunctionReturn(0);
657 }
658 
659 static PetscErrorCode MatHtoolGetPermutationTarget_Htool(Mat A,IS* is)
660 {
661   Mat_Htool             *a = (Mat_Htool*)A->data;
662   std::vector<PetscInt> target;
663 
664   PetscFunctionBegin;
665   target = a->hmatrix->get_target_cluster()->get_local_perm();
666   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),target.size(),target.data(),PETSC_COPY_VALUES,is));
667   PetscCall(ISSetPermutation(*is));
668   PetscFunctionReturn(0);
669 }
670 
671 /*@C
672      MatHtoolGetPermutationTarget - Gets the permutation associated to the target cluster.
673 
674    Input Parameter:
675 .     A - hierarchical matrix
676 
677    Output Parameter:
678 .     is - permutation
679 
680    Level: advanced
681 
682 .seealso: `MATHTOOL`, `MatHtoolGetPermutationSource()`, `MatHtoolUsePermutation()`
683 @*/
684 PETSC_EXTERN PetscErrorCode MatHtoolGetPermutationTarget(Mat A,IS* is)
685 {
686   PetscFunctionBegin;
687   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
688   if (!is) PetscValidPointer(is,2);
689   PetscTryMethod(A,"MatHtoolGetPermutationTarget_C",(Mat,IS*),(A,is));
690   PetscFunctionReturn(0);
691 }
692 
693 static PetscErrorCode MatHtoolUsePermutation_Htool(Mat A,PetscBool use)
694 {
695   Mat_Htool *a = (Mat_Htool*)A->data;
696 
697   PetscFunctionBegin;
698   a->hmatrix->set_use_permutation(use);
699   PetscFunctionReturn(0);
700 }
701 
702 /*@C
703      MatHtoolUsePermutation - Sets whether MATHTOOL should permute input (resp. output) vectors following its internal source (resp. target) permutation.
704 
705    Input Parameters:
706 +     A - hierarchical matrix
707 -     use - Boolean value
708 
709    Level: advanced
710 
711 .seealso: `MATHTOOL`, `MatHtoolGetPermutationSource()`, `MatHtoolGetPermutationTarget()`
712 @*/
713 PETSC_EXTERN PetscErrorCode MatHtoolUsePermutation(Mat A,PetscBool use)
714 {
715   PetscFunctionBegin;
716   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
717   PetscValidLogicalCollectiveBool(A,use,2);
718   PetscTryMethod(A,"MatHtoolUsePermutation_C",(Mat,PetscBool),(A,use));
719   PetscFunctionReturn(0);
720 }
721 
722 static PetscErrorCode MatConvert_Htool_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
723 {
724   Mat            C;
725   Mat_Htool      *a = (Mat_Htool*)A->data;
726   PetscInt       lda;
727   PetscScalar    *array;
728 
729   PetscFunctionBegin;
730   if (reuse == MAT_REUSE_MATRIX) {
731     C = *B;
732     PetscCheck(C->rmap->n == A->rmap->n && C->cmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible dimensions");
733     PetscCall(MatDenseGetLDA(C,&lda));
734     PetscCheck(lda == C->rmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")",lda,C->rmap->n);
735   } else {
736     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C));
737     PetscCall(MatSetSizes(C,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
738     PetscCall(MatSetType(C,MATDENSE));
739     PetscCall(MatSetUp(C));
740     PetscCall(MatSetOption(C,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
741   }
742   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
743   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
744   PetscCall(MatDenseGetArrayWrite(C,&array));
745   a->hmatrix->copy_local_dense_perm(array);
746   PetscCall(MatDenseRestoreArrayWrite(C,&array));
747   PetscCall(MatScale(C,a->s));
748   if (reuse == MAT_INPLACE_MATRIX) {
749     PetscCall(MatHeaderReplace(A,&C));
750   } else *B = C;
751   PetscFunctionReturn(0);
752 }
753 
754 static PetscErrorCode GenEntriesTranspose(PetscInt sdim,PetscInt M,PetscInt N,const PetscInt *rows,const PetscInt *cols,PetscScalar *ptr,void *ctx)
755 {
756   MatHtoolKernelTranspose *generator = (MatHtoolKernelTranspose*)ctx;
757   PetscScalar             *tmp;
758 
759   PetscFunctionBegin;
760   generator->kernel(sdim,N,M,cols,rows,ptr,generator->kernelctx);
761   PetscCall(PetscMalloc1(M*N,&tmp));
762   PetscCall(PetscArraycpy(tmp,ptr,M*N));
763   for (PetscInt i=0; i<M; ++i) {
764     for (PetscInt j=0; j<N; ++j) ptr[i+j*M] = tmp[j+i*N];
765   }
766   PetscCall(PetscFree(tmp));
767   PetscFunctionReturn(0);
768 }
769 
770 /* naive implementation which keeps a reference to the original Mat */
771 static PetscErrorCode MatTranspose_Htool(Mat A,MatReuse reuse,Mat *B)
772 {
773   Mat                     C;
774   Mat_Htool               *a = (Mat_Htool*)A->data,*c;
775   PetscInt                M = A->rmap->N,N = A->cmap->N,m = A->rmap->n,n = A->cmap->n;
776   PetscContainer          container;
777   MatHtoolKernelTranspose *kernelt;
778 
779   PetscFunctionBegin;
780   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*B));
781   PetscCheck(reuse != MAT_INPLACE_MATRIX,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatTranspose() with MAT_INPLACE_MATRIX not supported");
782   if (reuse == MAT_INITIAL_MATRIX) {
783     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C));
784     PetscCall(MatSetSizes(C,n,m,N,M));
785     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
786     PetscCall(MatSetUp(C));
787     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)C),&container));
788     PetscCall(PetscNew(&kernelt));
789     PetscCall(PetscContainerSetPointer(container,kernelt));
790     PetscCall(PetscObjectCompose((PetscObject)C,"KernelTranspose",(PetscObject)container));
791   } else {
792     C = *B;
793     PetscCall(PetscObjectQuery((PetscObject)C,"KernelTranspose",(PetscObject*)&container));
794     PetscCheck(container,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatTranspose() with MAT_INITIAL_MATRIX first");
795     PetscCall(PetscContainerGetPointer(container,(void**)&kernelt));
796   }
797   c                  = (Mat_Htool*)C->data;
798   c->dim             = a->dim;
799   c->s               = a->s;
800   c->kernel          = GenEntriesTranspose;
801   if (kernelt->A != A) {
802     PetscCall(MatDestroy(&kernelt->A));
803     kernelt->A       = A;
804     PetscCall(PetscObjectReference((PetscObject)A));
805   }
806   kernelt->kernel    = a->kernel;
807   kernelt->kernelctx = a->kernelctx;
808   c->kernelctx       = kernelt;
809   if (reuse == MAT_INITIAL_MATRIX) {
810     PetscCall(PetscMalloc1(N*c->dim,&c->gcoords_target));
811     PetscCall(PetscArraycpy(c->gcoords_target,a->gcoords_source,N*c->dim));
812     if (a->gcoords_target != a->gcoords_source) {
813       PetscCall(PetscMalloc1(M*c->dim,&c->gcoords_source));
814       PetscCall(PetscArraycpy(c->gcoords_source,a->gcoords_target,M*c->dim));
815     } else c->gcoords_source = c->gcoords_target;
816     PetscCall(PetscCalloc2(M,&c->work_source,N,&c->work_target));
817   }
818   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
819   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
820   if (reuse == MAT_INITIAL_MATRIX) *B = C;
821   PetscFunctionReturn(0);
822 }
823 
824 /*@C
825      MatCreateHtoolFromKernel - Creates a MATHTOOL from a user-supplied kernel.
826 
827    Input Parameters:
828 +     comm - MPI communicator
829 .     m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
830 .     n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
831 .     M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
832 .     N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
833 .     spacedim - dimension of the space coordinates
834 .     coords_target - coordinates of the target
835 .     coords_source - coordinates of the source
836 .     kernel - computational kernel (or NULL)
837 -     kernelctx - kernel context (if kernel is NULL, the pointer must be of type htool::VirtualGenerator<PetscScalar>*)
838 
839    Output Parameter:
840 .     B - matrix
841 
842    Options Database Keys:
843 +     -mat_htool_min_cluster_size <PetscInt> - minimal leaf size in cluster tree
844 .     -mat_htool_max_block_size <PetscInt> - maximal number of coefficients in a dense block
845 .     -mat_htool_epsilon <PetscReal> - relative error in Frobenius norm when approximating a block
846 .     -mat_htool_eta <PetscReal> - admissibility condition tolerance
847 .     -mat_htool_min_target_depth <PetscInt> - minimal cluster tree depth associated with the rows
848 .     -mat_htool_min_source_depth <PetscInt> - minimal cluster tree depth associated with the columns
849 .     -mat_htool_compressor <sympartialACA, fullACA, SVD> - type of compression
850 -     -mat_htool_clustering <PCARegular, PCAGeometric, BounbingBox1Regular, BoundingBox1Geometric> - type of clustering
851 
852    Level: intermediate
853 
854 .seealso: `MatCreate()`, `MATHTOOL`, `PCSetCoordinates()`, `MatHtoolSetKernel()`, `MatHtoolCompressorType`, `MATH2OPUS`, `MatCreateH2OpusFromKernel()`
855 @*/
856 PetscErrorCode MatCreateHtoolFromKernel(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt spacedim,const PetscReal coords_target[],const PetscReal coords_source[],MatHtoolKernel kernel,void *kernelctx,Mat *B)
857 {
858   Mat            A;
859   Mat_Htool      *a;
860 
861   PetscFunctionBegin;
862   PetscCall(MatCreate(comm,&A));
863   PetscValidLogicalCollectiveInt(A,spacedim,6);
864   PetscValidRealPointer(coords_target,7);
865   PetscValidRealPointer(coords_source,8);
866   if (!kernelctx) PetscValidFunction(kernel,9);
867   if (!kernel)    PetscValidPointer(kernelctx,10);
868   PetscCall(MatSetSizes(A,m,n,M,N));
869   PetscCall(MatSetType(A,MATHTOOL));
870   PetscCall(MatSetUp(A));
871   a            = (Mat_Htool*)A->data;
872   a->dim       = spacedim;
873   a->s         = 1.0;
874   a->kernel    = kernel;
875   a->kernelctx = kernelctx;
876   PetscCall(PetscCalloc1(A->rmap->N*spacedim,&a->gcoords_target));
877   PetscCall(PetscArraycpy(a->gcoords_target+A->rmap->rstart*spacedim,coords_target,A->rmap->n*spacedim));
878   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,a->gcoords_target,A->rmap->N*spacedim,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)A))); /* global target coordinates */
879   if (coords_target != coords_source) {
880     PetscCall(PetscCalloc1(A->cmap->N*spacedim,&a->gcoords_source));
881     PetscCall(PetscArraycpy(a->gcoords_source+A->cmap->rstart*spacedim,coords_source,A->cmap->n*spacedim));
882     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,a->gcoords_source,A->cmap->N*spacedim,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)A))); /* global source coordinates */
883   } else a->gcoords_source = a->gcoords_target;
884   PetscCall(PetscCalloc2(A->cmap->N,&a->work_source,A->rmap->N,&a->work_target));
885   *B = A;
886   PetscFunctionReturn(0);
887 }
888 
889 /*MC
890      MATHTOOL = "htool" - A matrix type for hierarchical matrices using the Htool package.
891 
892   Use ./configure --download-htool to install PETSc to use Htool.
893 
894    Options Database Keys:
895 .     -mat_type htool - matrix type to "htool" during a call to MatSetFromOptions()
896 
897    Level: beginner
898 
899 .seealso: `MATH2OPUS`, `MATDENSE`, `MatCreateHtoolFromKernel()`, `MatHtoolSetKernel()`
900 M*/
901 PETSC_EXTERN PetscErrorCode MatCreate_Htool(Mat A)
902 {
903   Mat_Htool *a;
904 
905   PetscFunctionBegin;
906   PetscCall(PetscNewLog(A,&a));
907   A->data = (void*)a;
908   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATHTOOL));
909   PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps)));
910   A->ops->getdiagonal       = MatGetDiagonal_Htool;
911   A->ops->getdiagonalblock  = MatGetDiagonalBlock_Htool;
912   A->ops->mult              = MatMult_Htool;
913   A->ops->multadd           = MatMultAdd_Htool;
914   A->ops->multtranspose     = MatMultTranspose_Htool;
915   if (!PetscDefined(USE_COMPLEX)) A->ops->multhermitiantranspose = MatMultTranspose_Htool;
916   A->ops->increaseoverlap   = MatIncreaseOverlap_Htool;
917   A->ops->createsubmatrices = MatCreateSubMatrices_Htool;
918   A->ops->transpose         = MatTranspose_Htool;
919   A->ops->destroy           = MatDestroy_Htool;
920   A->ops->view              = MatView_Htool;
921   A->ops->setfromoptions    = MatSetFromOptions_Htool;
922   A->ops->scale             = MatScale_Htool;
923   A->ops->getrow            = MatGetRow_Htool;
924   A->ops->restorerow        = MatRestoreRow_Htool;
925   A->ops->assemblyend       = MatAssemblyEnd_Htool;
926   a->dim                    = 0;
927   a->gcoords_target         = NULL;
928   a->gcoords_source         = NULL;
929   a->s                      = 1.0;
930   a->bs[0]                  = 10;
931   a->bs[1]                  = 1000000;
932   a->epsilon                = PetscSqrtReal(PETSC_SMALL);
933   a->eta                    = 10.0;
934   a->depth[0]               = 0;
935   a->depth[1]               = 0;
936   a->compressor             = MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA;
937   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_seqdense_C",MatProductSetFromOptions_Htool));
938   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_mpidense_C",MatProductSetFromOptions_Htool));
939   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_seqdense_C",MatConvert_Htool_Dense));
940   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_mpidense_C",MatConvert_Htool_Dense));
941   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetHierarchicalMat_C",MatHtoolGetHierarchicalMat_Htool));
942   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatHtoolSetKernel_C",MatHtoolSetKernel_Htool));
943   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationSource_C",MatHtoolGetPermutationSource_Htool));
944   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationTarget_C",MatHtoolGetPermutationTarget_Htool));
945   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatHtoolUsePermutation_C",MatHtoolUsePermutation_Htool));
946   PetscFunctionReturn(0);
947 }
948