xref: /petsc/src/mat/impls/htool/htool.cxx (revision b122ec5aa1bd4469eb4e0673542fb7de3f411254)
1 #include <../src/mat/impls/htool/htool.hpp> /*I "petscmat.h" I*/
2 #include <petscblaslapack.h>
3 #include <set>
4 
5 #define ALEN(a) (sizeof(a)/sizeof((a)[0]))
6 const char *const MatHtoolCompressorTypes[] = { "sympartialACA", "fullACA", "SVD" };
7 const char *const MatHtoolClusteringTypes[] = { "PCARegular", "PCAGeometric", "BoundingBox1Regular", "BoundingBox1Geometric" };
8 const char HtoolCitation[] = "@article{marchand2020two,\n"
9 "  Author = {Marchand, Pierre and Claeys, Xavier and Jolivet, Pierre and Nataf, Fr\\'ed\\'eric and Tournier, Pierre-Henri},\n"
10 "  Title = {Two-level preconditioning for $h$-version boundary element approximation of hypersingular operator with {GenEO}},\n"
11 "  Year = {2020},\n"
12 "  Publisher = {Elsevier},\n"
13 "  Journal = {Numerische Mathematik},\n"
14 "  Volume = {146},\n"
15 "  Pages = {597--628},\n"
16 "  Url = {https://github.com/htool-ddm/htool}\n"
17 "}\n";
18 static PetscBool HtoolCite = PETSC_FALSE;
19 
20 static PetscErrorCode MatGetDiagonal_Htool(Mat A,Vec v)
21 {
22   Mat_Htool      *a = (Mat_Htool*)A->data;
23   PetscScalar    *x;
24   PetscBool      flg;
25 
26   PetscFunctionBegin;
27   CHKERRQ(MatHasCongruentLayouts(A,&flg));
28   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only congruent layouts supported");
29   CHKERRQ(VecGetArrayWrite(v,&x));
30   a->hmatrix->copy_local_diagonal(x);
31   CHKERRQ(VecRestoreArrayWrite(v,&x));
32   CHKERRQ(VecScale(v,a->s));
33   PetscFunctionReturn(0);
34 }
35 
36 static PetscErrorCode MatGetDiagonalBlock_Htool(Mat A,Mat *b)
37 {
38   Mat_Htool      *a = (Mat_Htool*)A->data;
39   Mat            B;
40   PetscScalar    *ptr;
41   PetscBool      flg;
42 
43   PetscFunctionBegin;
44   CHKERRQ(MatHasCongruentLayouts(A,&flg));
45   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only congruent layouts supported");
46   CHKERRQ(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B)); /* same logic as in MatGetDiagonalBlock_MPIDense() */
47   if (!B) {
48     CHKERRQ(MatCreateDense(PETSC_COMM_SELF,A->rmap->n,A->rmap->n,A->rmap->n,A->rmap->n,NULL,&B));
49     CHKERRQ(MatDenseGetArrayWrite(B,&ptr));
50     a->hmatrix->copy_local_diagonal_block(ptr);
51     CHKERRQ(MatDenseRestoreArrayWrite(B,&ptr));
52     CHKERRQ(MatPropagateSymmetryOptions(A,B));
53     CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
54     CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
55     CHKERRQ(MatScale(B,a->s));
56     CHKERRQ(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
57     *b   = B;
58     CHKERRQ(MatDestroy(&B));
59   } else *b = B;
60   PetscFunctionReturn(0);
61 }
62 
63 static PetscErrorCode MatMult_Htool(Mat A,Vec x,Vec y)
64 {
65   Mat_Htool         *a = (Mat_Htool*)A->data;
66   const PetscScalar *in;
67   PetscScalar       *out;
68 
69   PetscFunctionBegin;
70   CHKERRQ(VecGetArrayRead(x,&in));
71   CHKERRQ(VecGetArrayWrite(y,&out));
72   a->hmatrix->mvprod_local_to_local(in,out);
73   CHKERRQ(VecRestoreArrayRead(x,&in));
74   CHKERRQ(VecRestoreArrayWrite(y,&out));
75   CHKERRQ(VecScale(y,a->s));
76   PetscFunctionReturn(0);
77 }
78 
79 /* naive implementation of MatMultAdd() needed for FEM-BEM coupling via MATNEST */
80 static PetscErrorCode MatMultAdd_Htool(Mat A,Vec v1,Vec v2,Vec v3)
81 {
82   Mat_Htool         *a = (Mat_Htool*)A->data;
83   Vec               tmp;
84   const PetscScalar scale = a->s;
85 
86   PetscFunctionBegin;
87   CHKERRQ(VecDuplicate(v2,&tmp));
88   CHKERRQ(VecCopy(v2,v3)); /* no-op in MatMultAdd(bA->m[i][j],bx[j],by[i],by[i]) since VecCopy() checks for x == y */
89   a->s = 1.0; /* set s to 1.0 since VecAXPY() may be used to scale the MatMult() output Vec */
90   CHKERRQ(MatMult_Htool(A,v1,tmp));
91   CHKERRQ(VecAXPY(v3,scale,tmp));
92   CHKERRQ(VecDestroy(&tmp));
93   a->s = scale; /* set s back to its original value */
94   PetscFunctionReturn(0);
95 }
96 
97 static PetscErrorCode MatMultTranspose_Htool(Mat A,Vec x,Vec y)
98 {
99   Mat_Htool         *a = (Mat_Htool*)A->data;
100   const PetscScalar *in;
101   PetscScalar       *out;
102 
103   PetscFunctionBegin;
104   CHKERRQ(VecGetArrayRead(x,&in));
105   CHKERRQ(VecGetArrayWrite(y,&out));
106   a->hmatrix->mvprod_transp_local_to_local(in,out);
107   CHKERRQ(VecRestoreArrayRead(x,&in));
108   CHKERRQ(VecRestoreArrayWrite(y,&out));
109   CHKERRQ(VecScale(y,a->s));
110   PetscFunctionReturn(0);
111 }
112 
113 static PetscErrorCode MatIncreaseOverlap_Htool(Mat A,PetscInt is_max,IS is[],PetscInt ov)
114 {
115   std::set<PetscInt> set;
116   const PetscInt     *idx;
117   PetscInt           *oidx,size,bs[2];
118   PetscMPIInt        csize;
119 
120   PetscFunctionBegin;
121   CHKERRQ(MatGetBlockSizes(A,bs,bs+1));
122   if (bs[0] != bs[1]) bs[0] = 1;
123   for (PetscInt i=0; i<is_max; ++i) {
124     /* basic implementation that adds indices by shifting an IS by -ov, -ov+1..., -1, 1..., ov-1, ov */
125     /* needed to avoid subdomain matrices to replicate A since it is dense                           */
126     CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)is[i]),&csize));
127     PetscCheck(csize == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported parallel IS");
128     CHKERRQ(ISGetSize(is[i],&size));
129     CHKERRQ(ISGetIndices(is[i],&idx));
130     for (PetscInt j=0; j<size; ++j) {
131       set.insert(idx[j]);
132       for (PetscInt k=1; k<=ov; ++k) {               /* for each layer of overlap      */
133         if (idx[j] - k >= 0) set.insert(idx[j] - k); /* do not insert negative indices */
134         if (idx[j] + k < A->rmap->N && idx[j] + k < A->cmap->N) set.insert(idx[j] + k); /* do not insert indices greater than the dimension of A */
135       }
136     }
137     CHKERRQ(ISRestoreIndices(is[i],&idx));
138     CHKERRQ(ISDestroy(is+i));
139     if (bs[0] > 1) {
140       for (std::set<PetscInt>::iterator it=set.cbegin(); it!=set.cend(); it++) {
141         std::vector<PetscInt> block(bs[0]);
142         std::iota(block.begin(),block.end(),(*it/bs[0])*bs[0]);
143         set.insert(block.cbegin(),block.cend());
144       }
145     }
146     size = set.size(); /* size with overlap */
147     CHKERRQ(PetscMalloc1(size,&oidx));
148     for (const PetscInt j : set) *oidx++ = j;
149     oidx -= size;
150     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,size,oidx,PETSC_OWN_POINTER,is+i));
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 static PetscErrorCode MatCreateSubMatrices_Htool(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *submat[])
156 {
157   Mat_Htool         *a = (Mat_Htool*)A->data;
158   Mat               D,B,BT;
159   const PetscScalar *copy;
160   PetscScalar       *ptr;
161   const PetscInt    *idxr,*idxc,*it;
162   PetscInt          nrow,m,i;
163   PetscBool         flg;
164 
165   PetscFunctionBegin;
166   if (scall != MAT_REUSE_MATRIX) {
167     CHKERRQ(PetscCalloc1(n,submat));
168   }
169   for (i=0; i<n; ++i) {
170     CHKERRQ(ISGetLocalSize(irow[i],&nrow));
171     CHKERRQ(ISGetLocalSize(icol[i],&m));
172     CHKERRQ(ISGetIndices(irow[i],&idxr));
173     CHKERRQ(ISGetIndices(icol[i],&idxc));
174     if (scall != MAT_REUSE_MATRIX) {
175       CHKERRQ(MatCreateDense(PETSC_COMM_SELF,nrow,m,nrow,m,NULL,(*submat)+i));
176     }
177     CHKERRQ(MatDenseGetArrayWrite((*submat)[i],&ptr));
178     if (irow[i] == icol[i]) { /* same row and column IS? */
179       CHKERRQ(MatHasCongruentLayouts(A,&flg));
180       if (flg) {
181         CHKERRQ(ISSorted(irow[i],&flg));
182         if (flg) { /* sorted IS? */
183           it = std::lower_bound(idxr,idxr+nrow,A->rmap->rstart);
184           if (it != idxr+nrow && *it == A->rmap->rstart) { /* rmap->rstart in IS? */
185             if (std::distance(idxr,it) + A->rmap->n <= nrow) { /* long enough IS to store the local diagonal block? */
186               for (PetscInt j=0; j<A->rmap->n && flg; ++j) if (PetscUnlikely(it[j] != A->rmap->rstart+j)) flg = PETSC_FALSE;
187               if (flg) { /* complete local diagonal block in IS? */
188                 /* fast extraction when the local diagonal block is part of the submatrix, e.g., for PCASM or PCHPDDM
189                  *      [   B   C   E   ]
190                  *  A = [   B   D   E   ]
191                  *      [   B   F   E   ]
192                  */
193                 m = std::distance(idxr,it); /* shift of the coefficient (0,0) of block D from above */
194                 CHKERRQ(MatGetDiagonalBlock_Htool(A,&D));
195                 CHKERRQ(MatDenseGetArrayRead(D,&copy));
196                 for (PetscInt k=0; k<A->rmap->n; ++k) {
197                   CHKERRQ(PetscArraycpy(ptr+(m+k)*nrow+m,copy+k*A->rmap->n,A->rmap->n)); /* block D from above */
198                 }
199                 CHKERRQ(MatDenseRestoreArrayRead(D,&copy));
200                 if (m) {
201                   a->wrapper->copy_submatrix(nrow,m,idxr,idxc,ptr); /* vertical block B from above */
202                   /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
203                   if (A->symmetric || A->hermitian) {
204                     CHKERRQ(MatCreateDense(PETSC_COMM_SELF,A->rmap->n,m,A->rmap->n,m,ptr+m,&B));
205                     CHKERRQ(MatDenseSetLDA(B,nrow));
206                     CHKERRQ(MatCreateDense(PETSC_COMM_SELF,m,A->rmap->n,m,A->rmap->n,ptr+m*nrow,&BT));
207                     CHKERRQ(MatDenseSetLDA(BT,nrow));
208                     if (A->hermitian && PetscDefined(USE_COMPLEX)) {
209                       CHKERRQ(MatHermitianTranspose(B,MAT_REUSE_MATRIX,&BT));
210                     } else {
211                       CHKERRQ(MatTranspose(B,MAT_REUSE_MATRIX,&BT));
212                     }
213                     CHKERRQ(MatDestroy(&B));
214                     CHKERRQ(MatDestroy(&BT));
215                   } else {
216                     for (PetscInt k=0; k<A->rmap->n; ++k) { /* block C from above */
217                       a->wrapper->copy_submatrix(m,1,idxr,idxc+m+k,ptr+(m+k)*nrow);
218                     }
219                   }
220                 }
221                 if (m+A->rmap->n != nrow) {
222                   a->wrapper->copy_submatrix(nrow,std::distance(it+A->rmap->n,idxr+nrow),idxr,idxc+m+A->rmap->n,ptr+(m+A->rmap->n)*nrow); /* vertical block E from above */
223                   /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
224                   if (A->symmetric || A->hermitian) {
225                     CHKERRQ(MatCreateDense(PETSC_COMM_SELF,A->rmap->n,nrow-(m+A->rmap->n),A->rmap->n,nrow-(m+A->rmap->n),ptr+(m+A->rmap->n)*nrow+m,&B));
226                     CHKERRQ(MatDenseSetLDA(B,nrow));
227                     CHKERRQ(MatCreateDense(PETSC_COMM_SELF,nrow-(m+A->rmap->n),A->rmap->n,nrow-(m+A->rmap->n),A->rmap->n,ptr+m*nrow+m+A->rmap->n,&BT));
228                     CHKERRQ(MatDenseSetLDA(BT,nrow));
229                     if (A->hermitian && PetscDefined(USE_COMPLEX)) {
230                       CHKERRQ(MatHermitianTranspose(B,MAT_REUSE_MATRIX,&BT));
231                     } else {
232                       CHKERRQ(MatTranspose(B,MAT_REUSE_MATRIX,&BT));
233                     }
234                     CHKERRQ(MatDestroy(&B));
235                     CHKERRQ(MatDestroy(&BT));
236                   } else {
237                     for (PetscInt k=0; k<A->rmap->n; ++k) { /* block F from above */
238                       a->wrapper->copy_submatrix(std::distance(it+A->rmap->n,idxr+nrow),1,it+A->rmap->n,idxc+m+k,ptr+(m+k)*nrow+m+A->rmap->n);
239                     }
240                   }
241                 }
242               } /* complete local diagonal block not in IS */
243             } else flg = PETSC_FALSE; /* IS not long enough to store the local diagonal block */
244           } else flg = PETSC_FALSE; /* rmap->rstart not in IS */
245         } /* unsorted IS */
246       }
247     } else flg = PETSC_FALSE; /* different row and column IS */
248     if (!flg) a->wrapper->copy_submatrix(nrow,m,idxr,idxc,ptr); /* reassemble everything */
249     CHKERRQ(ISRestoreIndices(irow[i],&idxr));
250     CHKERRQ(ISRestoreIndices(icol[i],&idxc));
251     CHKERRQ(MatDenseRestoreArrayWrite((*submat)[i],&ptr));
252     CHKERRQ(MatAssemblyBegin((*submat)[i],MAT_FINAL_ASSEMBLY));
253     CHKERRQ(MatAssemblyEnd((*submat)[i],MAT_FINAL_ASSEMBLY));
254     CHKERRQ(MatScale((*submat)[i],a->s));
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 static PetscErrorCode MatDestroy_Htool(Mat A)
260 {
261   Mat_Htool               *a = (Mat_Htool*)A->data;
262   PetscContainer          container;
263   MatHtoolKernelTranspose *kernelt;
264 
265   PetscFunctionBegin;
266   CHKERRQ(PetscObjectChangeTypeName((PetscObject)A,NULL));
267   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_seqdense_C",NULL));
268   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_mpidense_C",NULL));
269   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_seqdense_C",NULL));
270   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_mpidense_C",NULL));
271   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetHierarchicalMat_C",NULL));
272   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatHtoolSetKernel_C",NULL));
273   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationSource_C",NULL));
274   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationTarget_C",NULL));
275   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatHtoolUsePermutation_C",NULL));
276   CHKERRQ(PetscObjectQuery((PetscObject)A,"KernelTranspose",(PetscObject*)&container));
277   if (container) { /* created in MatTranspose_Htool() */
278     CHKERRQ(PetscContainerGetPointer(container,(void**)&kernelt));
279     CHKERRQ(MatDestroy(&kernelt->A));
280     CHKERRQ(PetscFree(kernelt));
281     CHKERRQ(PetscContainerDestroy(&container));
282     CHKERRQ(PetscObjectCompose((PetscObject)A,"KernelTranspose",NULL));
283   }
284   if (a->gcoords_source != a->gcoords_target) {
285     CHKERRQ(PetscFree(a->gcoords_source));
286   }
287   CHKERRQ(PetscFree(a->gcoords_target));
288   CHKERRQ(PetscFree2(a->work_source,a->work_target));
289   delete a->wrapper;
290   delete a->hmatrix;
291   CHKERRQ(PetscFree(A->data));
292   PetscFunctionReturn(0);
293 }
294 
295 static PetscErrorCode MatView_Htool(Mat A,PetscViewer pv)
296 {
297   Mat_Htool      *a = (Mat_Htool*)A->data;
298   PetscBool      flg;
299 
300   PetscFunctionBegin;
301   CHKERRQ(PetscObjectTypeCompare((PetscObject)pv,PETSCVIEWERASCII,&flg));
302   if (flg) {
303     CHKERRQ(PetscViewerASCIIPrintf(pv,"symmetry: %c\n",a->hmatrix->get_symmetry_type()));
304     if (PetscAbsScalar(a->s-1.0) > PETSC_MACHINE_EPSILON) {
305 #if defined(PETSC_USE_COMPLEX)
306       CHKERRQ(PetscViewerASCIIPrintf(pv,"scaling: %g+%gi\n",(double)PetscRealPart(a->s),(double)PetscImaginaryPart(a->s)));
307 #else
308       CHKERRQ(PetscViewerASCIIPrintf(pv,"scaling: %g\n",(double)a->s));
309 #endif
310     }
311     CHKERRQ(PetscViewerASCIIPrintf(pv,"minimum cluster size: %" PetscInt_FMT "\n",a->bs[0]));
312     CHKERRQ(PetscViewerASCIIPrintf(pv,"maximum block size: %" PetscInt_FMT "\n",a->bs[1]));
313     CHKERRQ(PetscViewerASCIIPrintf(pv,"epsilon: %g\n",(double)a->epsilon));
314     CHKERRQ(PetscViewerASCIIPrintf(pv,"eta: %g\n",(double)a->eta));
315     CHKERRQ(PetscViewerASCIIPrintf(pv,"minimum target depth: %" PetscInt_FMT "\n",a->depth[0]));
316     CHKERRQ(PetscViewerASCIIPrintf(pv,"minimum source depth: %" PetscInt_FMT "\n",a->depth[1]));
317     CHKERRQ(PetscViewerASCIIPrintf(pv,"compressor: %s\n",MatHtoolCompressorTypes[a->compressor]));
318     CHKERRQ(PetscViewerASCIIPrintf(pv,"clustering: %s\n",MatHtoolClusteringTypes[a->clustering]));
319     CHKERRQ(PetscViewerASCIIPrintf(pv,"compression ratio: %s\n",a->hmatrix->get_infos("Compression_ratio").c_str()));
320     CHKERRQ(PetscViewerASCIIPrintf(pv,"space saving: %s\n",a->hmatrix->get_infos("Space_saving").c_str()));
321     CHKERRQ(PetscViewerASCIIPrintf(pv,"number of dense (resp. low rank) matrices: %s (resp. %s)\n",a->hmatrix->get_infos("Number_of_dmat").c_str(),a->hmatrix->get_infos("Number_of_lrmat").c_str()));
322     CHKERRQ(PetscViewerASCIIPrintf(pv,"(minimum, mean, maximum) dense block sizes: (%s, %s, %s)\n",a->hmatrix->get_infos("Dense_block_size_min").c_str(),a->hmatrix->get_infos("Dense_block_size_mean").c_str(),a->hmatrix->get_infos("Dense_block_size_max").c_str()));
323     CHKERRQ(PetscViewerASCIIPrintf(pv,"(minimum, mean, maximum) low rank block sizes: (%s, %s, %s)\n",a->hmatrix->get_infos("Low_rank_block_size_min").c_str(),a->hmatrix->get_infos("Low_rank_block_size_mean").c_str(),a->hmatrix->get_infos("Low_rank_block_size_max").c_str()));
324     CHKERRQ(PetscViewerASCIIPrintf(pv,"(minimum, mean, maximum) ranks: (%s, %s, %s)\n",a->hmatrix->get_infos("Rank_min").c_str(),a->hmatrix->get_infos("Rank_mean").c_str(),a->hmatrix->get_infos("Rank_max").c_str()));
325   }
326   PetscFunctionReturn(0);
327 }
328 
329 static PetscErrorCode MatScale_Htool(Mat A,PetscScalar s)
330 {
331   Mat_Htool *a = (Mat_Htool*)A->data;
332 
333   PetscFunctionBegin;
334   a->s *= s;
335   PetscFunctionReturn(0);
336 }
337 
338 /* naive implementation of MatGetRow() needed for MatConvert_Nest_AIJ() */
339 static PetscErrorCode MatGetRow_Htool(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
340 {
341   Mat_Htool      *a = (Mat_Htool*)A->data;
342   PetscInt       *idxc;
343   PetscBLASInt   one = 1,bn;
344 
345   PetscFunctionBegin;
346   if (nz) *nz = A->cmap->N;
347   if (idx || v) { /* even if !idx, need to set idxc for htool::copy_submatrix() */
348     CHKERRQ(PetscMalloc1(A->cmap->N,&idxc));
349     for (PetscInt i=0; i<A->cmap->N; ++i) idxc[i] = i;
350   }
351   if (idx) *idx = idxc;
352   if (v) {
353     CHKERRQ(PetscMalloc1(A->cmap->N,v));
354     if (a->wrapper) a->wrapper->copy_submatrix(1,A->cmap->N,&row,idxc,*v);
355     else reinterpret_cast<htool::VirtualGenerator<PetscScalar>*>(a->kernelctx)->copy_submatrix(1,A->cmap->N,&row,idxc,*v);
356     CHKERRQ(PetscBLASIntCast(A->cmap->N,&bn));
357     PetscStackCallBLAS("BLASscal",BLASscal_(&bn,&a->s,*v,&one));
358   }
359   if (!idx) {
360     CHKERRQ(PetscFree(idxc));
361   }
362   PetscFunctionReturn(0);
363 }
364 
365 static PetscErrorCode MatRestoreRow_Htool(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
366 {
367   PetscFunctionBegin;
368   if (nz) *nz = 0;
369   if (idx) {
370     CHKERRQ(PetscFree(*idx));
371   }
372   if (v) {
373     CHKERRQ(PetscFree(*v));
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 static PetscErrorCode MatSetFromOptions_Htool(PetscOptionItems *PetscOptionsObject,Mat A)
379 {
380   Mat_Htool      *a = (Mat_Htool*)A->data;
381   PetscInt       n;
382   PetscBool      flg;
383 
384   PetscFunctionBegin;
385   CHKERRQ(PetscOptionsHead(PetscOptionsObject,"Htool options"));
386   CHKERRQ(PetscOptionsInt("-mat_htool_min_cluster_size","Minimal leaf size in cluster tree",NULL,a->bs[0],a->bs,NULL));
387   CHKERRQ(PetscOptionsInt("-mat_htool_max_block_size","Maximal number of coefficients in a dense block",NULL,a->bs[1],a->bs + 1,NULL));
388   CHKERRQ(PetscOptionsReal("-mat_htool_epsilon","Relative error in Frobenius norm when approximating a block",NULL,a->epsilon,&a->epsilon,NULL));
389   CHKERRQ(PetscOptionsReal("-mat_htool_eta","Admissibility condition tolerance",NULL,a->eta,&a->eta,NULL));
390   CHKERRQ(PetscOptionsInt("-mat_htool_min_target_depth","Minimal cluster tree depth associated with the rows",NULL,a->depth[0],a->depth,NULL));
391   CHKERRQ(PetscOptionsInt("-mat_htool_min_source_depth","Minimal cluster tree depth associated with the columns",NULL,a->depth[1],a->depth + 1,NULL));
392   n = 0;
393   CHKERRQ(PetscOptionsEList("-mat_htool_compressor","Type of compression","MatHtoolCompressorType",MatHtoolCompressorTypes,ALEN(MatHtoolCompressorTypes),MatHtoolCompressorTypes[MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA],&n,&flg));
394   if (flg) a->compressor = MatHtoolCompressorType(n);
395   n = 0;
396   CHKERRQ(PetscOptionsEList("-mat_htool_clustering","Type of clustering","MatHtoolClusteringType",MatHtoolClusteringTypes,ALEN(MatHtoolClusteringTypes),MatHtoolClusteringTypes[MAT_HTOOL_CLUSTERING_PCA_REGULAR],&n,&flg));
397   if (flg) a->clustering = MatHtoolClusteringType(n);
398   CHKERRQ(PetscOptionsTail());
399   PetscFunctionReturn(0);
400 }
401 
402 static PetscErrorCode MatAssemblyEnd_Htool(Mat A,MatAssemblyType type)
403 {
404   Mat_Htool                                                    *a = (Mat_Htool*)A->data;
405   const PetscInt                                               *ranges;
406   PetscInt                                                     *offset;
407   PetscMPIInt                                                  size;
408   char                                                         S = PetscDefined(USE_COMPLEX) && A->hermitian ? 'H' : (A->symmetric ? 'S' : 'N'),uplo = S == 'N' ? 'N' : 'U';
409   htool::VirtualGenerator<PetscScalar>                         *generator = nullptr;
410   std::shared_ptr<htool::VirtualCluster>                       t,s = nullptr;
411   std::shared_ptr<htool::VirtualLowRankGenerator<PetscScalar>> compressor = nullptr;
412 
413   PetscFunctionBegin;
414   CHKERRQ(PetscCitationsRegister(HtoolCitation,&HtoolCite));
415   delete a->wrapper;
416   delete a->hmatrix;
417   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
418   CHKERRQ(PetscMalloc1(2*size,&offset));
419   CHKERRQ(MatGetOwnershipRanges(A,&ranges));
420   for (PetscInt i=0; i<size; ++i) {
421     offset[2*i] = ranges[i];
422     offset[2*i+1] = ranges[i+1] - ranges[i];
423   }
424   switch (a->clustering) {
425   case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
426     t = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
427     break;
428   case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
429     t = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
430     break;
431   case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
432     t = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::RegularSplitting>>>(a->dim);
433     break;
434   default:
435     t = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::RegularSplitting>>>(a->dim);
436   }
437   t->set_minclustersize(a->bs[0]);
438   t->build(A->rmap->N,a->gcoords_target,offset);
439   if (a->kernel) a->wrapper = new WrapperHtool(A->rmap->N,A->cmap->N,a->dim,a->kernel,a->kernelctx);
440   else {
441     a->wrapper = NULL;
442     generator = reinterpret_cast<htool::VirtualGenerator<PetscScalar>*>(a->kernelctx);
443   }
444   if (a->gcoords_target != a->gcoords_source) {
445     CHKERRQ(MatGetOwnershipRangesColumn(A,&ranges));
446     for (PetscInt i=0; i<size; ++i) {
447       offset[2*i] = ranges[i];
448       offset[2*i+1] = ranges[i+1] - ranges[i];
449     }
450     switch (a->clustering) {
451     case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
452       s = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
453       break;
454     case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
455       s = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::GeometricSplitting>>>(a->dim);
456       break;
457     case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
458       s = std::make_shared<htool::Cluster<htool::BoundingBox1<htool::SplittingTypes::RegularSplitting>>>(a->dim);
459       break;
460     default:
461       s = std::make_shared<htool::Cluster<htool::PCA<htool::SplittingTypes::RegularSplitting>>>(a->dim);
462     }
463     s->set_minclustersize(a->bs[0]);
464     s->build(A->cmap->N,a->gcoords_source,offset);
465     S = uplo = 'N';
466   }
467   CHKERRQ(PetscFree(offset));
468   switch (a->compressor) {
469   case MAT_HTOOL_COMPRESSOR_FULL_ACA:
470     compressor = std::make_shared<htool::fullACA<PetscScalar>>();
471     break;
472   case MAT_HTOOL_COMPRESSOR_SVD:
473     compressor = std::make_shared<htool::SVD<PetscScalar>>();
474     break;
475   default:
476     compressor = std::make_shared<htool::sympartialACA<PetscScalar>>();
477   }
478   a->hmatrix = dynamic_cast<htool::VirtualHMatrix<PetscScalar>*>(new htool::HMatrix<PetscScalar>(t,s ? s : t,a->epsilon,a->eta,S,uplo));
479   a->hmatrix->set_compression(compressor);
480   a->hmatrix->set_maxblocksize(a->bs[1]);
481   a->hmatrix->set_mintargetdepth(a->depth[0]);
482   a->hmatrix->set_minsourcedepth(a->depth[1]);
483   if (s) a->hmatrix->build(a->wrapper ? *a->wrapper : *generator,a->gcoords_target,a->gcoords_source);
484   else   a->hmatrix->build(a->wrapper ? *a->wrapper : *generator,a->gcoords_target);
485   PetscFunctionReturn(0);
486 }
487 
488 static PetscErrorCode MatProductNumeric_Htool(Mat C)
489 {
490   Mat_Product       *product = C->product;
491   Mat_Htool         *a = (Mat_Htool*)product->A->data;
492   const PetscScalar *in;
493   PetscScalar       *out;
494   PetscInt          N,lda;
495 
496   PetscFunctionBegin;
497   MatCheckProduct(C,1);
498   CHKERRQ(MatGetSize(C,NULL,&N));
499   CHKERRQ(MatDenseGetLDA(C,&lda));
500   PetscCheck(lda == C->rmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")",lda,C->rmap->n);
501   CHKERRQ(MatDenseGetArrayRead(product->B,&in));
502   CHKERRQ(MatDenseGetArrayWrite(C,&out));
503   switch (product->type) {
504   case MATPRODUCT_AB:
505     a->hmatrix->mvprod_local_to_local(in,out,N);
506     break;
507   case MATPRODUCT_AtB:
508     a->hmatrix->mvprod_transp_local_to_local(in,out,N);
509     break;
510   default:
511     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatProductType %s is not supported",MatProductTypes[product->type]);
512   }
513   CHKERRQ(MatDenseRestoreArrayWrite(C,&out));
514   CHKERRQ(MatDenseRestoreArrayRead(product->B,&in));
515   CHKERRQ(MatScale(C,a->s));
516   PetscFunctionReturn(0);
517 }
518 
519 static PetscErrorCode MatProductSymbolic_Htool(Mat C)
520 {
521   Mat_Product    *product = C->product;
522   Mat            A,B;
523   PetscBool      flg;
524 
525   PetscFunctionBegin;
526   MatCheckProduct(C,1);
527   A = product->A;
528   B = product->B;
529   CHKERRQ(PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,""));
530   PetscCheck(flg,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatProduct_AB not supported for %s",((PetscObject)product->B)->type_name);
531   switch (product->type) {
532   case MATPRODUCT_AB:
533     if (C->rmap->n == PETSC_DECIDE || C->cmap->n == PETSC_DECIDE || C->rmap->N == PETSC_DECIDE || C->cmap->N == PETSC_DECIDE) {
534       CHKERRQ(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N));
535     }
536     break;
537   case MATPRODUCT_AtB:
538     if (C->rmap->n == PETSC_DECIDE || C->cmap->n == PETSC_DECIDE || C->rmap->N == PETSC_DECIDE || C->cmap->N == PETSC_DECIDE) {
539       CHKERRQ(MatSetSizes(C,A->cmap->n,B->cmap->n,A->cmap->N,B->cmap->N));
540     }
541     break;
542   default:
543     SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"ProductType %s is not supported",MatProductTypes[product->type]);
544   }
545   CHKERRQ(MatSetType(C,MATDENSE));
546   CHKERRQ(MatSetUp(C));
547   CHKERRQ(MatSetOption(C,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
548   CHKERRQ(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
549   CHKERRQ(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
550   C->ops->productsymbolic = NULL;
551   C->ops->productnumeric = MatProductNumeric_Htool;
552   PetscFunctionReturn(0);
553 }
554 
555 static PetscErrorCode MatProductSetFromOptions_Htool(Mat C)
556 {
557   PetscFunctionBegin;
558   MatCheckProduct(C,1);
559   if (C->product->type == MATPRODUCT_AB || C->product->type == MATPRODUCT_AtB) C->ops->productsymbolic = MatProductSymbolic_Htool;
560   PetscFunctionReturn(0);
561 }
562 
563 static PetscErrorCode MatHtoolGetHierarchicalMat_Htool(Mat A,const htool::VirtualHMatrix<PetscScalar> **hmatrix)
564 {
565   Mat_Htool *a = (Mat_Htool*)A->data;
566 
567   PetscFunctionBegin;
568   *hmatrix = a->hmatrix;
569   PetscFunctionReturn(0);
570 }
571 
572 /*@C
573      MatHtoolGetHierarchicalMat - Retrieves the opaque pointer to a Htool virtual matrix stored in a MATHTOOL.
574 
575    Input Parameter:
576 .     A - hierarchical matrix
577 
578    Output Parameter:
579 .     hmatrix - opaque pointer to a Htool virtual matrix
580 
581    Level: advanced
582 
583 .seealso:  MATHTOOL
584 @*/
585 PETSC_EXTERN PetscErrorCode MatHtoolGetHierarchicalMat(Mat A,const htool::VirtualHMatrix<PetscScalar> **hmatrix)
586 {
587   PetscFunctionBegin;
588   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
589   PetscValidPointer(hmatrix,2);
590   CHKERRQ(PetscTryMethod(A,"MatHtoolGetHierarchicalMat_C",(Mat,const htool::VirtualHMatrix<PetscScalar>**),(A,hmatrix)));
591   PetscFunctionReturn(0);
592 }
593 
594 static PetscErrorCode MatHtoolSetKernel_Htool(Mat A,MatHtoolKernel kernel,void *kernelctx)
595 {
596   Mat_Htool *a = (Mat_Htool*)A->data;
597 
598   PetscFunctionBegin;
599   a->kernel    = kernel;
600   a->kernelctx = kernelctx;
601   delete a->wrapper;
602   if (a->kernel) a->wrapper = new WrapperHtool(A->rmap->N,A->cmap->N,a->dim,a->kernel,a->kernelctx);
603   PetscFunctionReturn(0);
604 }
605 
606 /*@C
607      MatHtoolSetKernel - Sets the kernel and context used for the assembly of a MATHTOOL.
608 
609    Input Parameters:
610 +     A - hierarchical matrix
611 .     kernel - computational kernel (or NULL)
612 -     kernelctx - kernel context (if kernel is NULL, the pointer must be of type htool::VirtualGenerator<PetscScalar>*)
613 
614    Level: advanced
615 
616 .seealso:  MATHTOOL, MatCreateHtoolFromKernel()
617 @*/
618 PETSC_EXTERN PetscErrorCode MatHtoolSetKernel(Mat A,MatHtoolKernel kernel,void *kernelctx)
619 {
620   PetscFunctionBegin;
621   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
622   if (!kernelctx) PetscValidFunction(kernel,2);
623   if (!kernel)    PetscValidPointer(kernelctx,3);
624   CHKERRQ(PetscTryMethod(A,"MatHtoolSetKernel_C",(Mat,MatHtoolKernel,void*),(A,kernel,kernelctx)));
625   PetscFunctionReturn(0);
626 }
627 
628 static PetscErrorCode MatHtoolGetPermutationSource_Htool(Mat A,IS* is)
629 {
630   Mat_Htool             *a = (Mat_Htool*)A->data;
631   std::vector<PetscInt> source;
632 
633   PetscFunctionBegin;
634   source = a->hmatrix->get_source_cluster()->get_local_perm();
635   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)A),source.size(),source.data(),PETSC_COPY_VALUES,is));
636   CHKERRQ(ISSetPermutation(*is));
637   PetscFunctionReturn(0);
638 }
639 
640 /*@C
641      MatHtoolGetPermutationSource - Gets the permutation associated to the source cluster.
642 
643    Input Parameter:
644 .     A - hierarchical matrix
645 
646    Output Parameter:
647 .     is - permutation
648 
649    Level: advanced
650 
651 .seealso:  MATHTOOL, MatHtoolGetPermutationTarget(), MatHtoolUsePermutation()
652 @*/
653 PETSC_EXTERN PetscErrorCode MatHtoolGetPermutationSource(Mat A,IS* is)
654 {
655   PetscFunctionBegin;
656   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
657   if (!is) PetscValidPointer(is,2);
658   CHKERRQ(PetscTryMethod(A,"MatHtoolGetPermutationSource_C",(Mat,IS*),(A,is)));
659   PetscFunctionReturn(0);
660 }
661 
662 static PetscErrorCode MatHtoolGetPermutationTarget_Htool(Mat A,IS* is)
663 {
664   Mat_Htool             *a = (Mat_Htool*)A->data;
665   std::vector<PetscInt> target;
666 
667   PetscFunctionBegin;
668   target = a->hmatrix->get_target_cluster()->get_local_perm();
669   CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)A),target.size(),target.data(),PETSC_COPY_VALUES,is));
670   CHKERRQ(ISSetPermutation(*is));
671   PetscFunctionReturn(0);
672 }
673 
674 /*@C
675      MatHtoolGetPermutationTarget - Gets the permutation associated to the target cluster.
676 
677    Input Parameter:
678 .     A - hierarchical matrix
679 
680    Output Parameter:
681 .     is - permutation
682 
683    Level: advanced
684 
685 .seealso:  MATHTOOL, MatHtoolGetPermutationSource(), MatHtoolUsePermutation()
686 @*/
687 PETSC_EXTERN PetscErrorCode MatHtoolGetPermutationTarget(Mat A,IS* is)
688 {
689   PetscFunctionBegin;
690   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
691   if (!is) PetscValidPointer(is,2);
692   CHKERRQ(PetscTryMethod(A,"MatHtoolGetPermutationTarget_C",(Mat,IS*),(A,is)));
693   PetscFunctionReturn(0);
694 }
695 
696 static PetscErrorCode MatHtoolUsePermutation_Htool(Mat A,PetscBool use)
697 {
698   Mat_Htool *a = (Mat_Htool*)A->data;
699 
700   PetscFunctionBegin;
701   a->hmatrix->set_use_permutation(use);
702   PetscFunctionReturn(0);
703 }
704 
705 /*@C
706      MatHtoolUsePermutation - Sets whether MATHTOOL should permute input (resp. output) vectors following its internal source (resp. target) permutation.
707 
708    Input Parameters:
709 +     A - hierarchical matrix
710 -     use - Boolean value
711 
712    Level: advanced
713 
714 .seealso:  MATHTOOL, MatHtoolGetPermutationSource(), MatHtoolGetPermutationTarget()
715 @*/
716 PETSC_EXTERN PetscErrorCode MatHtoolUsePermutation(Mat A,PetscBool use)
717 {
718   PetscFunctionBegin;
719   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
720   PetscValidLogicalCollectiveBool(A,use,2);
721   CHKERRQ(PetscTryMethod(A,"MatHtoolUsePermutation_C",(Mat,PetscBool),(A,use)));
722   PetscFunctionReturn(0);
723 }
724 
725 static PetscErrorCode MatConvert_Htool_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
726 {
727   Mat            C;
728   Mat_Htool      *a = (Mat_Htool*)A->data;
729   PetscInt       lda;
730   PetscScalar    *array;
731 
732   PetscFunctionBegin;
733   if (reuse == MAT_REUSE_MATRIX) {
734     C = *B;
735     PetscCheck(C->rmap->n == A->rmap->n && C->cmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible dimensions");
736     CHKERRQ(MatDenseGetLDA(C,&lda));
737     PetscCheck(lda == C->rmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")",lda,C->rmap->n);
738   } else {
739     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&C));
740     CHKERRQ(MatSetSizes(C,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
741     CHKERRQ(MatSetType(C,MATDENSE));
742     CHKERRQ(MatSetUp(C));
743     CHKERRQ(MatSetOption(C,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
744   }
745   CHKERRQ(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
746   CHKERRQ(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
747   CHKERRQ(MatDenseGetArrayWrite(C,&array));
748   a->hmatrix->copy_local_dense_perm(array);
749   CHKERRQ(MatDenseRestoreArrayWrite(C,&array));
750   CHKERRQ(MatScale(C,a->s));
751   if (reuse == MAT_INPLACE_MATRIX) {
752     CHKERRQ(MatHeaderReplace(A,&C));
753   } else *B = C;
754   PetscFunctionReturn(0);
755 }
756 
757 static PetscErrorCode GenEntriesTranspose(PetscInt sdim,PetscInt M,PetscInt N,const PetscInt *rows,const PetscInt *cols,PetscScalar *ptr,void *ctx)
758 {
759   MatHtoolKernelTranspose *generator = (MatHtoolKernelTranspose*)ctx;
760   PetscScalar             *tmp;
761 
762   PetscFunctionBegin;
763   generator->kernel(sdim,N,M,cols,rows,ptr,generator->kernelctx);
764   CHKERRQ(PetscMalloc1(M*N,&tmp));
765   CHKERRQ(PetscArraycpy(tmp,ptr,M*N));
766   for (PetscInt i=0; i<M; ++i) {
767     for (PetscInt j=0; j<N; ++j) ptr[i+j*M] = tmp[j+i*N];
768   }
769   CHKERRQ(PetscFree(tmp));
770   PetscFunctionReturn(0);
771 }
772 
773 /* naive implementation which keeps a reference to the original Mat */
774 static PetscErrorCode MatTranspose_Htool(Mat A,MatReuse reuse,Mat *B)
775 {
776   Mat                     C;
777   Mat_Htool               *a = (Mat_Htool*)A->data,*c;
778   PetscInt                M = A->rmap->N,N = A->cmap->N,m = A->rmap->n,n = A->cmap->n;
779   PetscContainer          container;
780   MatHtoolKernelTranspose *kernelt;
781 
782   PetscFunctionBegin;
783   PetscCheck(reuse != MAT_INPLACE_MATRIX,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MatTranspose() with MAT_INPLACE_MATRIX not supported");
784   if (reuse == MAT_INITIAL_MATRIX) {
785     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&C));
786     CHKERRQ(MatSetSizes(C,n,m,N,M));
787     CHKERRQ(MatSetType(C,((PetscObject)A)->type_name));
788     CHKERRQ(MatSetUp(C));
789     CHKERRQ(PetscContainerCreate(PetscObjectComm((PetscObject)C),&container));
790     CHKERRQ(PetscNew(&kernelt));
791     CHKERRQ(PetscContainerSetPointer(container,kernelt));
792     CHKERRQ(PetscObjectCompose((PetscObject)C,"KernelTranspose",(PetscObject)container));
793   } else {
794     C = *B;
795     CHKERRQ(PetscObjectQuery((PetscObject)C,"KernelTranspose",(PetscObject*)&container));
796     PetscCheck(container,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatTranspose() with MAT_INITIAL_MATRIX first");
797     CHKERRQ(PetscContainerGetPointer(container,(void**)&kernelt));
798   }
799   c                  = (Mat_Htool*)C->data;
800   c->dim             = a->dim;
801   c->s               = a->s;
802   c->kernel          = GenEntriesTranspose;
803   if (kernelt->A != A) {
804     CHKERRQ(MatDestroy(&kernelt->A));
805     kernelt->A       = A;
806     CHKERRQ(PetscObjectReference((PetscObject)A));
807   }
808   kernelt->kernel    = a->kernel;
809   kernelt->kernelctx = a->kernelctx;
810   c->kernelctx       = kernelt;
811   if (reuse == MAT_INITIAL_MATRIX) {
812     CHKERRQ(PetscMalloc1(N*c->dim,&c->gcoords_target));
813     CHKERRQ(PetscArraycpy(c->gcoords_target,a->gcoords_source,N*c->dim));
814     if (a->gcoords_target != a->gcoords_source) {
815       CHKERRQ(PetscMalloc1(M*c->dim,&c->gcoords_source));
816       CHKERRQ(PetscArraycpy(c->gcoords_source,a->gcoords_target,M*c->dim));
817     } else c->gcoords_source = c->gcoords_target;
818     CHKERRQ(PetscCalloc2(M,&c->work_source,N,&c->work_target));
819   }
820   CHKERRQ(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
821   CHKERRQ(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
822   if (reuse == MAT_INITIAL_MATRIX) *B = C;
823   PetscFunctionReturn(0);
824 }
825 
826 /*@C
827      MatCreateHtoolFromKernel - Creates a MATHTOOL from a user-supplied kernel.
828 
829    Input Parameters:
830 +     comm - MPI communicator
831 .     m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
832 .     n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
833 .     M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
834 .     N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
835 .     spacedim - dimension of the space coordinates
836 .     coords_target - coordinates of the target
837 .     coords_source - coordinates of the source
838 .     kernel - computational kernel (or NULL)
839 -     kernelctx - kernel context (if kernel is NULL, the pointer must be of type htool::VirtualGenerator<PetscScalar>*)
840 
841    Output Parameter:
842 .     B - matrix
843 
844    Options Database Keys:
845 +     -mat_htool_min_cluster_size <PetscInt> - minimal leaf size in cluster tree
846 .     -mat_htool_max_block_size <PetscInt> - maximal number of coefficients in a dense block
847 .     -mat_htool_epsilon <PetscReal> - relative error in Frobenius norm when approximating a block
848 .     -mat_htool_eta <PetscReal> - admissibility condition tolerance
849 .     -mat_htool_min_target_depth <PetscInt> - minimal cluster tree depth associated with the rows
850 .     -mat_htool_min_source_depth <PetscInt> - minimal cluster tree depth associated with the columns
851 .     -mat_htool_compressor <sympartialACA, fullACA, SVD> - type of compression
852 -     -mat_htool_clustering <PCARegular, PCAGeometric, BounbingBox1Regular, BoundingBox1Geometric> - type of clustering
853 
854    Level: intermediate
855 
856 .seealso:  MatCreate(), MATHTOOL, PCSetCoordinates(), MatHtoolSetKernel(), MatHtoolCompressorType, MATH2OPUS, MatCreateH2OpusFromKernel()
857 @*/
858 PetscErrorCode MatCreateHtoolFromKernel(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt spacedim,const PetscReal coords_target[],const PetscReal coords_source[],MatHtoolKernel kernel,void *kernelctx,Mat *B)
859 {
860   Mat            A;
861   Mat_Htool      *a;
862 
863   PetscFunctionBegin;
864   CHKERRQ(MatCreate(comm,&A));
865   PetscValidLogicalCollectiveInt(A,spacedim,6);
866   PetscValidRealPointer(coords_target,7);
867   PetscValidRealPointer(coords_source,8);
868   if (!kernelctx) PetscValidFunction(kernel,9);
869   if (!kernel)    PetscValidPointer(kernelctx,10);
870   CHKERRQ(MatSetSizes(A,m,n,M,N));
871   CHKERRQ(MatSetType(A,MATHTOOL));
872   CHKERRQ(MatSetUp(A));
873   a            = (Mat_Htool*)A->data;
874   a->dim       = spacedim;
875   a->s         = 1.0;
876   a->kernel    = kernel;
877   a->kernelctx = kernelctx;
878   CHKERRQ(PetscCalloc1(A->rmap->N*spacedim,&a->gcoords_target));
879   CHKERRQ(PetscArraycpy(a->gcoords_target+A->rmap->rstart*spacedim,coords_target,A->rmap->n*spacedim));
880   CHKERRMPI(MPIU_Allreduce(MPI_IN_PLACE,a->gcoords_target,A->rmap->N*spacedim,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)A))); /* global target coordinates */
881   if (coords_target != coords_source) {
882     CHKERRQ(PetscCalloc1(A->cmap->N*spacedim,&a->gcoords_source));
883     CHKERRQ(PetscArraycpy(a->gcoords_source+A->cmap->rstart*spacedim,coords_source,A->cmap->n*spacedim));
884     CHKERRMPI(MPIU_Allreduce(MPI_IN_PLACE,a->gcoords_source,A->cmap->N*spacedim,MPIU_REAL,MPI_SUM,PetscObjectComm((PetscObject)A))); /* global source coordinates */
885   } else a->gcoords_source = a->gcoords_target;
886   CHKERRQ(PetscCalloc2(A->cmap->N,&a->work_source,A->rmap->N,&a->work_target));
887   *B = A;
888   PetscFunctionReturn(0);
889 }
890 
891 /*MC
892      MATHTOOL = "htool" - A matrix type for hierarchical matrices using the Htool package.
893 
894   Use ./configure --download-htool to install PETSc to use Htool.
895 
896    Options Database Keys:
897 .     -mat_type htool - matrix type to "htool" during a call to MatSetFromOptions()
898 
899    Level: beginner
900 
901 .seealso: MATH2OPUS, MATDENSE, MatCreateHtoolFromKernel(), MatHtoolSetKernel()
902 M*/
903 PETSC_EXTERN PetscErrorCode MatCreate_Htool(Mat A)
904 {
905   Mat_Htool *a;
906 
907   PetscFunctionBegin;
908   CHKERRQ(PetscNewLog(A,&a));
909   A->data = (void*)a;
910   CHKERRQ(PetscObjectChangeTypeName((PetscObject)A,MATHTOOL));
911   CHKERRQ(PetscMemzero(A->ops,sizeof(struct _MatOps)));
912   A->ops->getdiagonal       = MatGetDiagonal_Htool;
913   A->ops->getdiagonalblock  = MatGetDiagonalBlock_Htool;
914   A->ops->mult              = MatMult_Htool;
915   A->ops->multadd           = MatMultAdd_Htool;
916   A->ops->multtranspose     = MatMultTranspose_Htool;
917   if (!PetscDefined(USE_COMPLEX)) A->ops->multhermitiantranspose = MatMultTranspose_Htool;
918   A->ops->increaseoverlap   = MatIncreaseOverlap_Htool;
919   A->ops->createsubmatrices = MatCreateSubMatrices_Htool;
920   A->ops->transpose         = MatTranspose_Htool;
921   A->ops->destroy           = MatDestroy_Htool;
922   A->ops->view              = MatView_Htool;
923   A->ops->setfromoptions    = MatSetFromOptions_Htool;
924   A->ops->scale             = MatScale_Htool;
925   A->ops->getrow            = MatGetRow_Htool;
926   A->ops->restorerow        = MatRestoreRow_Htool;
927   A->ops->assemblyend       = MatAssemblyEnd_Htool;
928   a->dim                    = 0;
929   a->gcoords_target         = NULL;
930   a->gcoords_source         = NULL;
931   a->s                      = 1.0;
932   a->bs[0]                  = 10;
933   a->bs[1]                  = 1000000;
934   a->epsilon                = PetscSqrtReal(PETSC_SMALL);
935   a->eta                    = 10.0;
936   a->depth[0]               = 0;
937   a->depth[1]               = 0;
938   a->compressor             = MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA;
939   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_seqdense_C",MatProductSetFromOptions_Htool));
940   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_htool_mpidense_C",MatProductSetFromOptions_Htool));
941   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_seqdense_C",MatConvert_Htool_Dense));
942   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatConvert_htool_mpidense_C",MatConvert_Htool_Dense));
943   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetHierarchicalMat_C",MatHtoolGetHierarchicalMat_Htool));
944   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatHtoolSetKernel_C",MatHtoolSetKernel_Htool));
945   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationSource_C",MatHtoolGetPermutationSource_Htool));
946   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatHtoolGetPermutationTarget_C",MatHtoolGetPermutationTarget_Htool));
947   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatHtoolUsePermutation_C",MatHtoolUsePermutation_Htool));
948   PetscFunctionReturn(0);
949 }
950