xref: /petsc/src/mat/impls/htool/htool.cxx (revision 7530f0189ecda25d023fdcb28178244332f3efb5)
1 #include <../src/mat/impls/htool/htool.hpp> /*I "petscmat.h" I*/
2 #include <set>
3 
4 const char *const MatHtoolCompressorTypes[] = {"sympartialACA", "fullACA", "SVD"};
5 const char *const MatHtoolClusteringTypes[] = {"PCARegular", "PCAGeometric", "BoundingBox1Regular", "BoundingBox1Geometric"};
6 // clang-format off
7 const char       *HtoolCitations[2]         = {"@article{marchand2020two,\n"
8                                                "  Author = {Marchand, Pierre and Claeys, Xavier and Jolivet, Pierre and Nataf, Fr\\'ed\\'eric and Tournier, Pierre-Henri},\n"
9                                                "  Title = {Two-level preconditioning for $h$-version boundary element approximation of hypersingular operator with {GenEO}},\n"
10                                                "  Year = {2020},\n"
11                                                "  Publisher = {Elsevier},\n"
12                                                "  Journal = {Numerische Mathematik},\n"
13                                                "  Volume = {146},\n"
14                                                "  Pages = {597--628},\n"
15                                                "  Url = {https://github.com/htool-ddm/htool}\n"
16                                                "}\n",
17                                                "@article{Marchand2026,\n"
18                                                "  Author = {Marchand, Pierre and Tournier, Pierre-Henri and Jolivet, Pierre},\n"
19                                                "  Title = {{Htool-DDM}: A {C++} library for parallel solvers and compressed linear systems},\n"
20                                                "  Year = {2026},\n"
21                                                "  Publisher = {The Open Journal},\n"
22                                                "  Journal = {Journal of Open Source Software},\n"
23                                                "  Volume = {11},\n"
24                                                "  Number = {118},\n"
25                                                "  Pages = {9279},\n"
26                                                "  Url = {https://doi.org/10.21105/joss.09279}\n"
27                                                "}\n"};
28 static PetscBool  HtoolCite[2]              = {PETSC_FALSE, PETSC_FALSE};
29 // clang-format on
30 
MatGetDiagonal_Htool(Mat A,Vec v)31 static PetscErrorCode MatGetDiagonal_Htool(Mat A, Vec v)
32 {
33   Mat_Htool   *a;
34   PetscScalar *x;
35   PetscBool    flg;
36 
37   PetscFunctionBegin;
38   PetscCall(MatHasCongruentLayouts(A, &flg));
39   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only congruent layouts supported");
40   PetscCall(MatShellGetContext(A, &a));
41   PetscCall(VecGetArrayWrite(v, &x));
42   PetscStackCallExternalVoid("copy_diagonal_in_user_numbering", htool::copy_diagonal_in_user_numbering(a->distributed_operator_holder->hmatrix, x));
43   PetscCall(VecRestoreArrayWrite(v, &x));
44   PetscFunctionReturn(PETSC_SUCCESS);
45 }
46 
MatGetDiagonalBlock_Htool(Mat A,Mat * b)47 static PetscErrorCode MatGetDiagonalBlock_Htool(Mat A, Mat *b)
48 {
49   Mat_Htool                 *a;
50   Mat                        B;
51   PetscScalar               *ptr, shift, scale;
52   PetscBool                  flg;
53   PetscMPIInt                rank;
54   htool::Cluster<PetscReal> *source_cluster = nullptr;
55 
56   PetscFunctionBegin;
57   PetscCall(MatHasCongruentLayouts(A, &flg));
58   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only congruent layouts supported");
59   PetscCall(MatShellGetContext(A, &a));
60   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); /* same logic as in MatGetDiagonalBlock_MPIDense() */
61   if (!B) {
62     PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
63     PetscCall(MatCreateDense(PETSC_COMM_SELF, A->rmap->n, A->rmap->n, A->rmap->n, A->rmap->n, nullptr, &B));
64     PetscCall(MatDenseGetArrayWrite(B, &ptr));
65     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
66     source_cluster = a->source_cluster ? a->source_cluster.get() : a->target_cluster.get();
67     PetscStackCallExternalVoid("copy_to_dense_in_user_numbering", htool::copy_to_dense_in_user_numbering(*a->distributed_operator_holder->hmatrix.get_sub_hmatrix(a->target_cluster->get_cluster_on_partition(rank), source_cluster->get_cluster_on_partition(rank)), ptr));
68     PetscCall(MatDenseRestoreArrayWrite(B, &ptr));
69     PetscCall(MatPropagateSymmetryOptions(A, B));
70     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
71     *b = B;
72     PetscCall(MatDestroy(&B));
73     PetscCall(MatShift(*b, shift));
74     PetscCall(MatScale(*b, scale));
75   } else {
76     PetscCall(MatShellGetScalingShifts(A, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
77     *b = B;
78   }
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
MatMult_Htool(Mat A,Vec x,Vec y)82 static PetscErrorCode MatMult_Htool(Mat A, Vec x, Vec y)
83 {
84   Mat_Htool         *a;
85   const PetscScalar *in;
86   PetscScalar       *out;
87 
88   PetscFunctionBegin;
89   PetscCall(MatShellGetContext(A, &a));
90   PetscCall(VecGetArrayRead(x, &in));
91   PetscCall(VecGetArrayWrite(y, &out));
92   if (a->permutation == PETSC_TRUE) htool::add_distributed_operator_vector_product_local_to_local<PetscScalar>('N', 1.0, a->distributed_operator_holder->distributed_operator, in, 0.0, out, nullptr);
93   else htool::internal_add_distributed_operator_vector_product_local_to_local<PetscScalar>('N', 1.0, a->distributed_operator_holder->distributed_operator, in, 0.0, out, nullptr);
94   PetscCall(VecRestoreArrayRead(x, &in));
95   PetscCall(VecRestoreArrayWrite(y, &out));
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
MatMultTranspose_Htool(Mat A,Vec x,Vec y)99 static PetscErrorCode MatMultTranspose_Htool(Mat A, Vec x, Vec y)
100 {
101   Mat_Htool         *a;
102   const PetscScalar *in;
103   PetscScalar       *out;
104 
105   PetscFunctionBegin;
106   PetscCall(MatShellGetContext(A, &a));
107   PetscCall(VecGetArrayRead(x, &in));
108   PetscCall(VecGetArrayWrite(y, &out));
109   if (a->permutation == PETSC_TRUE) htool::add_distributed_operator_vector_product_local_to_local<PetscScalar>('T', 1.0, a->distributed_operator_holder->distributed_operator, in, 0.0, out, nullptr);
110   else htool::internal_add_distributed_operator_vector_product_local_to_local<PetscScalar>('T', 1.0, a->distributed_operator_holder->distributed_operator, in, 0.0, out, nullptr);
111   PetscCall(VecRestoreArrayRead(x, &in));
112   PetscCall(VecRestoreArrayWrite(y, &out));
113   PetscFunctionReturn(PETSC_SUCCESS);
114 }
115 
MatIncreaseOverlap_Htool(Mat A,PetscInt is_max,IS is[],PetscInt ov)116 static PetscErrorCode MatIncreaseOverlap_Htool(Mat A, PetscInt is_max, IS is[], PetscInt ov)
117 {
118   std::set<PetscInt> set;
119   const PetscInt    *idx;
120   PetscInt          *oidx, size, bs[2];
121   PetscMPIInt        csize;
122 
123   PetscFunctionBegin;
124   PetscCall(MatGetBlockSizes(A, bs, bs + 1));
125   if (bs[0] != bs[1]) bs[0] = 1;
126   for (PetscInt i = 0; i < is_max; ++i) {
127     /* basic implementation that adds indices by shifting an IS by -ov, -ov+1..., -1, 1..., ov-1, ov */
128     /* needed to avoid subdomain matrices to replicate A since it is dense                           */
129     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)is[i]), &csize));
130     PetscCheck(csize == 1, PETSC_COMM_SELF, PETSC_ERR_WRONG_MPI_SIZE, "Unsupported parallel IS");
131     PetscCall(ISGetSize(is[i], &size));
132     PetscCall(ISGetIndices(is[i], &idx));
133     for (PetscInt j = 0; j < size; ++j) {
134       set.insert(idx[j]);
135       for (PetscInt k = 1; k <= ov; ++k) {                                              /* for each layer of overlap      */
136         if (idx[j] - k >= 0) set.insert(idx[j] - k);                                    /* do not insert negative indices */
137         if (idx[j] + k < A->rmap->N && idx[j] + k < A->cmap->N) set.insert(idx[j] + k); /* do not insert indices greater than the dimension of A */
138       }
139     }
140     PetscCall(ISRestoreIndices(is[i], &idx));
141     PetscCall(ISDestroy(is + i));
142     if (bs[0] > 1) {
143       for (std::set<PetscInt>::iterator it = set.cbegin(); it != set.cend(); it++) {
144         std::vector<PetscInt> block(bs[0]);
145         std::iota(block.begin(), block.end(), (*it / bs[0]) * bs[0]);
146         set.insert(block.cbegin(), block.cend());
147       }
148     }
149     size = set.size(); /* size with overlap */
150     PetscCall(PetscMalloc1(size, &oidx));
151     for (const PetscInt j : set) *oidx++ = j;
152     oidx -= size;
153     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, oidx, PETSC_OWN_POINTER, is + i));
154   }
155   PetscFunctionReturn(PETSC_SUCCESS);
156 }
157 
MatCreateSubMatrices_Htool(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat * submat[])158 static PetscErrorCode MatCreateSubMatrices_Htool(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
159 {
160   Mat_Htool         *a;
161   Mat                D, B, BT;
162   const PetscScalar *copy;
163   PetscScalar       *ptr, shift, scale;
164   const PetscInt    *idxr, *idxc, *it;
165   PetscInt           nrow, m, i;
166   PetscBool          flg;
167 
168   PetscFunctionBegin;
169   PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
170   PetscCall(MatShellGetContext(A, &a));
171   if (scall != MAT_REUSE_MATRIX) PetscCall(PetscCalloc1(n, submat));
172   for (i = 0; i < n; ++i) {
173     PetscCall(ISGetLocalSize(irow[i], &nrow));
174     PetscCall(ISGetLocalSize(icol[i], &m));
175     PetscCall(ISGetIndices(irow[i], &idxr));
176     PetscCall(ISGetIndices(icol[i], &idxc));
177     if (scall != MAT_REUSE_MATRIX) PetscCall(MatCreateDense(PETSC_COMM_SELF, nrow, m, nrow, m, nullptr, (*submat) + i));
178     PetscCall(MatDenseGetArrayWrite((*submat)[i], &ptr));
179     if (irow[i] == icol[i]) { /* same row and column IS? */
180       PetscCall(MatHasCongruentLayouts(A, &flg));
181       if (flg) {
182         PetscCall(ISSorted(irow[i], &flg));
183         if (flg) { /* sorted IS? */
184           it = std::lower_bound(idxr, idxr + nrow, A->rmap->rstart);
185           if (it != idxr + nrow && *it == A->rmap->rstart) {    /* rmap->rstart in IS? */
186             if (std::distance(idxr, it) + A->rmap->n <= nrow) { /* long enough IS to store the local diagonal block? */
187               for (PetscInt j = 0; j < A->rmap->n && flg; ++j)
188                 if (PetscUnlikely(it[j] != A->rmap->rstart + j)) flg = PETSC_FALSE;
189               if (flg) { /* complete local diagonal block in IS? */
190                 /* fast extraction when the local diagonal block is part of the submatrix, e.g., for PCASM or PCHPDDM
191                  *      [   B   C   E   ]
192                  *  A = [   B   D   E   ]
193                  *      [   B   F   E   ]
194                  */
195                 m = std::distance(idxr, it); /* shift of the coefficient (0,0) of block D from above */
196                 PetscCall(MatGetDiagonalBlock(A, &D));
197                 PetscCall(MatDenseGetArrayRead(D, &copy));
198                 for (PetscInt k = 0; k < A->rmap->n; ++k) PetscCall(PetscArraycpy(ptr + (m + k) * nrow + m, copy + k * A->rmap->n, A->rmap->n)); /* block D from above */
199                 PetscCall(MatDenseRestoreArrayRead(D, &copy));
200                 if (m) {
201                   a->wrapper->copy_submatrix(nrow, m, idxr, idxc, ptr); /* vertical block B from above */
202                   /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
203                   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) {
204                     PetscCall(MatCreateDense(PETSC_COMM_SELF, A->rmap->n, m, A->rmap->n, m, ptr + m, &B));
205                     PetscCall(MatDenseSetLDA(B, nrow));
206                     PetscCall(MatCreateDense(PETSC_COMM_SELF, m, A->rmap->n, m, A->rmap->n, ptr + m * nrow, &BT));
207                     PetscCall(MatDenseSetLDA(BT, nrow));
208                     if (A->hermitian == PETSC_BOOL3_TRUE && PetscDefined(USE_COMPLEX)) {
209                       PetscCall(MatHermitianTranspose(B, MAT_REUSE_MATRIX, &BT));
210                     } else {
211                       PetscCall(MatTransposeSetPrecursor(B, BT));
212                       PetscCall(MatTranspose(B, MAT_REUSE_MATRIX, &BT));
213                     }
214                     PetscCall(MatDestroy(&B));
215                     PetscCall(MatDestroy(&BT));
216                   } else {
217                     for (PetscInt k = 0; k < A->rmap->n; ++k) { /* block C from above */
218                       a->wrapper->copy_submatrix(m, 1, idxr, idxc + m + k, ptr + (m + k) * nrow);
219                     }
220                   }
221                 }
222                 if (m + A->rmap->n != nrow) {
223                   a->wrapper->copy_submatrix(nrow, std::distance(it + A->rmap->n, idxr + nrow), idxr, idxc + m + A->rmap->n, ptr + (m + A->rmap->n) * nrow); /* vertical block E from above */
224                   /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
225                   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) {
226                     PetscCall(MatCreateDense(PETSC_COMM_SELF, A->rmap->n, nrow - (m + A->rmap->n), A->rmap->n, nrow - (m + A->rmap->n), ptr + (m + A->rmap->n) * nrow + m, &B));
227                     PetscCall(MatDenseSetLDA(B, nrow));
228                     PetscCall(MatCreateDense(PETSC_COMM_SELF, nrow - (m + A->rmap->n), A->rmap->n, nrow - (m + A->rmap->n), A->rmap->n, ptr + m * nrow + m + A->rmap->n, &BT));
229                     PetscCall(MatDenseSetLDA(BT, nrow));
230                     if (A->hermitian == PETSC_BOOL3_TRUE && PetscDefined(USE_COMPLEX)) {
231                       PetscCall(MatHermitianTranspose(B, MAT_REUSE_MATRIX, &BT));
232                     } else {
233                       PetscCall(MatTransposeSetPrecursor(B, BT));
234                       PetscCall(MatTranspose(B, MAT_REUSE_MATRIX, &BT));
235                     }
236                     PetscCall(MatDestroy(&B));
237                     PetscCall(MatDestroy(&BT));
238                   } else {
239                     for (PetscInt k = 0; k < A->rmap->n; ++k) { /* block F from above */
240                       a->wrapper->copy_submatrix(std::distance(it + A->rmap->n, idxr + nrow), 1, it + A->rmap->n, idxc + m + k, ptr + (m + k) * nrow + m + A->rmap->n);
241                     }
242                   }
243                 }
244               } /* complete local diagonal block not in IS */
245             } else flg = PETSC_FALSE; /* IS not long enough to store the local diagonal block */
246           } else flg = PETSC_FALSE;   /* rmap->rstart not in IS */
247         } /* unsorted IS */
248       }
249     } else flg = PETSC_FALSE;                                       /* different row and column IS */
250     if (!flg) a->wrapper->copy_submatrix(nrow, m, idxr, idxc, ptr); /* reassemble everything */
251     PetscCall(ISRestoreIndices(irow[i], &idxr));
252     PetscCall(ISRestoreIndices(icol[i], &idxc));
253     PetscCall(MatDenseRestoreArrayWrite((*submat)[i], &ptr));
254     PetscCall(MatShift((*submat)[i], shift));
255     PetscCall(MatScale((*submat)[i], scale));
256   }
257   PetscFunctionReturn(PETSC_SUCCESS);
258 }
259 
MatDestroy_Htool(Mat A)260 static PetscErrorCode MatDestroy_Htool(Mat A)
261 {
262   Mat_Htool               *a;
263   PetscContainer           container;
264   MatHtoolKernelTranspose *kernelt;
265 
266   PetscFunctionBegin;
267   PetscCall(MatShellGetContext(A, &a));
268   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_seqdense_C", nullptr));
269   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_mpidense_C", nullptr));
270   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_seqdense_C", nullptr));
271   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_mpidense_C", nullptr));
272   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetHierarchicalMat_C", nullptr));
273   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetKernel_C", nullptr));
274   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationSource_C", nullptr));
275   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationTarget_C", nullptr));
276   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUsePermutation_C", nullptr));
277   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUseRecompression_C", nullptr));
278   PetscCall(PetscObjectQuery((PetscObject)A, "KernelTranspose", (PetscObject *)&container));
279   if (container) { /* created in MatTranspose_Htool() */
280     PetscCall(PetscContainerGetPointer(container, &kernelt));
281     PetscCall(MatDestroy(&kernelt->A));
282     PetscCall(PetscObjectCompose((PetscObject)A, "KernelTranspose", nullptr));
283   }
284   if (a->gcoords_source != a->gcoords_target) PetscCall(PetscFree(a->gcoords_source));
285   PetscCall(PetscFree(a->gcoords_target));
286   delete a->wrapper;
287   a->target_cluster.reset();
288   a->source_cluster.reset();
289   a->distributed_operator_holder.reset();
290   PetscCall(PetscFree(a));
291   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContext_C", nullptr)); // needed to avoid a call to MatShellSetContext_Immutable()
292   PetscFunctionReturn(PETSC_SUCCESS);
293 }
294 
MatView_Htool(Mat A,PetscViewer pv)295 static PetscErrorCode MatView_Htool(Mat A, PetscViewer pv)
296 {
297   Mat_Htool                         *a;
298   PetscScalar                        shift, scale;
299   PetscBool                          flg;
300   std::map<std::string, std::string> hmatrix_information;
301 
302   PetscFunctionBegin;
303   PetscCall(MatShellGetContext(A, &a));
304   hmatrix_information = htool::get_distributed_hmatrix_information(a->distributed_operator_holder->hmatrix, PetscObjectComm((PetscObject)A));
305   PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERASCII, &flg));
306   if (flg) {
307     PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
308     PetscCall(PetscViewerASCIIPrintf(pv, "symmetry: %c\n", a->distributed_operator_holder->block_diagonal_hmatrix->get_symmetry()));
309     if (PetscAbsScalar(scale - 1.0) > PETSC_MACHINE_EPSILON) {
310 #if defined(PETSC_USE_COMPLEX)
311       PetscCall(PetscViewerASCIIPrintf(pv, "scaling: %g+%gi\n", (double)PetscRealPart(scale), (double)PetscImaginaryPart(scale)));
312 #else
313       PetscCall(PetscViewerASCIIPrintf(pv, "scaling: %g\n", (double)scale));
314 #endif
315     }
316     if (PetscAbsScalar(shift) > PETSC_MACHINE_EPSILON) {
317 #if defined(PETSC_USE_COMPLEX)
318       PetscCall(PetscViewerASCIIPrintf(pv, "shift: %g+%gi\n", (double)PetscRealPart(shift), (double)PetscImaginaryPart(shift)));
319 #else
320       PetscCall(PetscViewerASCIIPrintf(pv, "shift: %g\n", (double)shift));
321 #endif
322     }
323     PetscCall(PetscViewerASCIIPrintf(pv, "maximal cluster leaf size: %" PetscInt_FMT "\n", a->max_cluster_leaf_size));
324     PetscCall(PetscViewerASCIIPrintf(pv, "epsilon: %g\n", (double)a->epsilon));
325     PetscCall(PetscViewerASCIIPrintf(pv, "eta: %g\n", (double)a->eta));
326     PetscCall(PetscViewerASCIIPrintf(pv, "minimum target depth: %" PetscInt_FMT "\n", a->depth[0]));
327     PetscCall(PetscViewerASCIIPrintf(pv, "minimum source depth: %" PetscInt_FMT "\n", a->depth[1]));
328     PetscCall(PetscViewerASCIIPrintf(pv, "compressor: %s\n", MatHtoolCompressorTypes[a->compressor]));
329     PetscCall(PetscViewerASCIIPrintf(pv, "clustering: %s\n", MatHtoolClusteringTypes[a->clustering]));
330     PetscCall(PetscViewerASCIIPrintf(pv, "compression ratio: %s\n", hmatrix_information["Compression_ratio"].c_str()));
331     PetscCall(PetscViewerASCIIPrintf(pv, "space saving: %s\n", hmatrix_information["Space_saving"].c_str()));
332     PetscCall(PetscViewerASCIIPrintf(pv, "block tree consistency: %s\n", PetscBools[a->distributed_operator_holder->hmatrix.is_block_tree_consistent()]));
333     PetscCall(PetscViewerASCIIPrintf(pv, "recompression: %s\n", PetscBools[a->recompression]));
334     PetscCall(PetscViewerASCIIPrintf(pv, "number of dense (resp. low rank) matrices: %s (resp. %s)\n", hmatrix_information["Number_of_dense_blocks"].c_str(), hmatrix_information["Number_of_low_rank_blocks"].c_str()));
335     PetscCall(
336       PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) dense block sizes: (%s, %s, %s)\n", hmatrix_information["Dense_block_size_min"].c_str(), hmatrix_information["Dense_block_size_mean"].c_str(), hmatrix_information["Dense_block_size_max"].c_str()));
337     PetscCall(PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) low rank block sizes: (%s, %s, %s)\n", hmatrix_information["Low_rank_block_size_min"].c_str(), hmatrix_information["Low_rank_block_size_mean"].c_str(),
338                                      hmatrix_information["Low_rank_block_size_max"].c_str()));
339     PetscCall(PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) ranks: (%s, %s, %s)\n", hmatrix_information["Rank_min"].c_str(), hmatrix_information["Rank_mean"].c_str(), hmatrix_information["Rank_max"].c_str()));
340   }
341   PetscFunctionReturn(PETSC_SUCCESS);
342 }
343 
344 /* naive implementation of MatGetRow() needed for MatConvert_Nest_AIJ() */
MatGetRow_Htool(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)345 static PetscErrorCode MatGetRow_Htool(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
346 {
347   Mat_Htool   *a;
348   PetscScalar  shift, scale;
349   PetscInt    *idxc;
350   PetscBLASInt one = 1, bn;
351 
352   PetscFunctionBegin;
353   PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
354   PetscCall(MatShellGetContext(A, &a));
355   if (nz) *nz = A->cmap->N;
356   if (idx || v) { /* even if !idx, need to set idxc for htool::copy_submatrix() */
357     PetscCall(PetscMalloc1(A->cmap->N, &idxc));
358     for (PetscInt i = 0; i < A->cmap->N; ++i) idxc[i] = i;
359   }
360   if (idx) *idx = idxc;
361   if (v) {
362     PetscCall(PetscMalloc1(A->cmap->N, v));
363     if (a->wrapper) a->wrapper->copy_submatrix(1, A->cmap->N, &row, idxc, *v);
364     else reinterpret_cast<htool::VirtualGenerator<PetscScalar> *>(a->kernelctx)->copy_submatrix(1, A->cmap->N, &row, idxc, *v);
365     PetscCall(PetscBLASIntCast(A->cmap->N, &bn));
366     PetscCallCXX(htool::Blas<PetscScalar>::scal(&bn, &scale, *v, &one));
367     if (row < A->cmap->N) (*v)[row] += shift;
368   }
369   if (!idx) PetscCall(PetscFree(idxc));
370   PetscFunctionReturn(PETSC_SUCCESS);
371 }
372 
MatRestoreRow_Htool(Mat,PetscInt,PetscInt *,PetscInt ** idx,PetscScalar ** v)373 static PetscErrorCode MatRestoreRow_Htool(Mat, PetscInt, PetscInt *, PetscInt **idx, PetscScalar **v)
374 {
375   PetscFunctionBegin;
376   if (idx) PetscCall(PetscFree(*idx));
377   if (v) PetscCall(PetscFree(*v));
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
MatSetFromOptions_Htool(Mat A,PetscOptionItems PetscOptionsObject)381 static PetscErrorCode MatSetFromOptions_Htool(Mat A, PetscOptionItems PetscOptionsObject)
382 {
383   Mat_Htool *a;
384   PetscInt   n;
385   PetscBool  flg;
386 
387   PetscFunctionBegin;
388   PetscCall(MatShellGetContext(A, &a));
389   PetscOptionsHeadBegin(PetscOptionsObject, "Htool options");
390   PetscCall(PetscOptionsBoundedInt("-mat_htool_max_cluster_leaf_size", "Maximal leaf size in cluster tree", nullptr, a->max_cluster_leaf_size, &a->max_cluster_leaf_size, nullptr, 0));
391   PetscCall(PetscOptionsBoundedReal("-mat_htool_epsilon", "Relative error in Frobenius norm when approximating a block", nullptr, a->epsilon, &a->epsilon, nullptr, 0.0));
392   PetscCall(PetscOptionsReal("-mat_htool_eta", "Admissibility condition tolerance", nullptr, a->eta, &a->eta, nullptr));
393   PetscCall(PetscOptionsBoundedInt("-mat_htool_min_target_depth", "Minimal cluster tree depth associated with the rows", nullptr, a->depth[0], a->depth, nullptr, 0));
394   PetscCall(PetscOptionsBoundedInt("-mat_htool_min_source_depth", "Minimal cluster tree depth associated with the columns", nullptr, a->depth[1], a->depth + 1, nullptr, 0));
395   PetscCall(PetscOptionsBool("-mat_htool_block_tree_consistency", "Block tree consistency", nullptr, a->block_tree_consistency, &a->block_tree_consistency, nullptr));
396   PetscCall(PetscOptionsBool("-mat_htool_recompression", "Use recompression", nullptr, a->recompression, &a->recompression, nullptr));
397 
398   n = 0;
399   PetscCall(PetscOptionsEList("-mat_htool_compressor", "Type of compression", "MatHtoolCompressorType", MatHtoolCompressorTypes, PETSC_STATIC_ARRAY_LENGTH(MatHtoolCompressorTypes), MatHtoolCompressorTypes[MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA], &n, &flg));
400   if (flg) a->compressor = MatHtoolCompressorType(n);
401   n = 0;
402   PetscCall(PetscOptionsEList("-mat_htool_clustering", "Type of clustering", "MatHtoolClusteringType", MatHtoolClusteringTypes, PETSC_STATIC_ARRAY_LENGTH(MatHtoolClusteringTypes), MatHtoolClusteringTypes[MAT_HTOOL_CLUSTERING_PCA_REGULAR], &n, &flg));
403   if (flg) a->clustering = MatHtoolClusteringType(n);
404   PetscOptionsHeadEnd();
405   PetscFunctionReturn(PETSC_SUCCESS);
406 }
407 
MatAssemblyEnd_Htool(Mat A,MatAssemblyType)408 static PetscErrorCode MatAssemblyEnd_Htool(Mat A, MatAssemblyType)
409 {
410   Mat_Htool                                                           *a;
411   const PetscInt                                                      *ranges;
412   PetscInt                                                            *offset;
413   PetscMPIInt                                                          size, rank;
414   char                                                                 S = PetscDefined(USE_COMPLEX) && A->hermitian == PETSC_BOOL3_TRUE ? 'H' : (A->symmetric == PETSC_BOOL3_TRUE ? 'S' : 'N'), uplo = S == 'N' ? 'N' : 'U';
415   htool::VirtualGenerator<PetscScalar>                                *generator = nullptr;
416   htool::ClusterTreeBuilder<PetscReal>                                 recursive_build_strategy;
417   htool::Cluster<PetscReal>                                           *source_cluster;
418   std::shared_ptr<htool::VirtualInternalLowRankGenerator<PetscScalar>> compressor;
419 
420   PetscFunctionBegin;
421   for (size_t i = 0; i < PETSC_STATIC_ARRAY_LENGTH(HtoolCite); ++i) PetscCall(PetscCitationsRegister(HtoolCitations[i], HtoolCite + i));
422   PetscCall(MatShellGetContext(A, &a));
423   delete a->wrapper;
424   a->target_cluster.reset();
425   a->source_cluster.reset();
426   a->distributed_operator_holder.reset();
427   // clustering
428   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
429   PetscCall(PetscMalloc1(2 * size, &offset));
430   PetscCall(MatGetOwnershipRanges(A, &ranges));
431   for (PetscInt i = 0; i < size; ++i) {
432     offset[2 * i]     = ranges[i];
433     offset[2 * i + 1] = ranges[i + 1] - ranges[i];
434   }
435   switch (a->clustering) {
436   case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
437     recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
438     break;
439   case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
440     recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
441     break;
442   case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
443     recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::RegularSplitting<PetscReal>>>());
444     break;
445   default:
446     recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::RegularSplitting<PetscReal>>>());
447   }
448   recursive_build_strategy.set_maximal_leaf_size(a->max_cluster_leaf_size);
449   a->target_cluster = std::make_unique<htool::Cluster<PetscReal>>(recursive_build_strategy.create_cluster_tree_from_local_partition(A->rmap->N, a->dim, a->gcoords_target, 2, size, offset));
450   if (a->gcoords_target != a->gcoords_source) {
451     PetscCall(MatGetOwnershipRangesColumn(A, &ranges));
452     for (PetscInt i = 0; i < size; ++i) {
453       offset[2 * i]     = ranges[i];
454       offset[2 * i + 1] = ranges[i + 1] - ranges[i];
455     }
456     switch (a->clustering) {
457     case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
458       recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
459       break;
460     case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
461       recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::GeometricSplitting<PetscReal>>>());
462       break;
463     case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
464       recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeBoundingBox<PetscReal>, htool::RegularSplitting<PetscReal>>>());
465       break;
466     default:
467       recursive_build_strategy.set_partitioning_strategy(std::make_shared<htool::Partitioning<PetscReal, htool::ComputeLargestExtent<PetscReal>, htool::RegularSplitting<PetscReal>>>());
468     }
469     recursive_build_strategy.set_maximal_leaf_size(a->max_cluster_leaf_size);
470     a->source_cluster = std::make_unique<htool::Cluster<PetscReal>>(recursive_build_strategy.create_cluster_tree_from_local_partition(A->cmap->N, a->dim, a->gcoords_source, 2, size, offset));
471     S = uplo       = 'N';
472     source_cluster = a->source_cluster.get();
473   } else source_cluster = a->target_cluster.get();
474   PetscCall(PetscFree(offset));
475   // generator
476   if (a->kernel) a->wrapper = new WrapperHtool(a->dim, a->kernel, a->kernelctx);
477   else {
478     a->wrapper = nullptr;
479     generator  = reinterpret_cast<htool::VirtualGenerator<PetscScalar> *>(a->kernelctx);
480   }
481   // compressor
482   switch (a->compressor) {
483   case MAT_HTOOL_COMPRESSOR_FULL_ACA:
484     compressor = std::make_shared<htool::fullACA<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, a->target_cluster->get_permutation().data(), source_cluster->get_permutation().data());
485     break;
486   case MAT_HTOOL_COMPRESSOR_SVD:
487     compressor = std::make_shared<htool::SVD<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, a->target_cluster->get_permutation().data(), source_cluster->get_permutation().data());
488     break;
489   default:
490     compressor = std::make_shared<htool::sympartialACA<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, a->target_cluster->get_permutation().data(), source_cluster->get_permutation().data());
491   }
492   // local hierarchical matrix
493   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
494   auto hmatrix_builder = htool::HMatrixTreeBuilder<PetscScalar>(a->epsilon, a->eta, S, uplo);
495   if (a->recompression) {
496     std::shared_ptr<htool::VirtualInternalLowRankGenerator<PetscScalar>> RecompressedLowRankGenerator = std::make_shared<htool::RecompressedLowRankGenerator<PetscScalar>>(*compressor, std::function<void(htool::LowRankMatrix<PetscScalar> &)>(htool::SVD_recompression<PetscScalar>));
497     hmatrix_builder.set_low_rank_generator(RecompressedLowRankGenerator);
498   } else {
499     hmatrix_builder.set_low_rank_generator(compressor);
500   }
501   hmatrix_builder.set_minimal_target_depth(a->depth[0]);
502   hmatrix_builder.set_minimal_source_depth(a->depth[1]);
503   PetscCheck(a->block_tree_consistency || (!a->block_tree_consistency && !(A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE)), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot have a MatHtool with inconsistent block tree which is either symmetric or Hermitian");
504   hmatrix_builder.set_block_tree_consistency(a->block_tree_consistency);
505   a->distributed_operator_holder = std::make_unique<htool::DefaultApproximationBuilder<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, *a->target_cluster, *source_cluster, hmatrix_builder, PetscObjectComm((PetscObject)A));
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
MatProductNumeric_Htool(Mat C)509 static PetscErrorCode MatProductNumeric_Htool(Mat C)
510 {
511   Mat_Product       *product = C->product;
512   Mat_Htool         *a;
513   const PetscScalar *in;
514   PetscScalar       *out;
515   PetscInt           N, lda;
516 
517   PetscFunctionBegin;
518   MatCheckProduct(C, 1);
519   PetscCall(MatGetSize(C, nullptr, &N));
520   PetscCall(MatDenseGetLDA(C, &lda));
521   PetscCheck(lda == C->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")", lda, C->rmap->n);
522   PetscCall(MatDenseGetArrayRead(product->B, &in));
523   PetscCall(MatDenseGetArrayWrite(C, &out));
524   PetscCall(MatShellGetContext(product->A, &a));
525   switch (product->type) {
526   case MATPRODUCT_AB:
527     if (a->permutation == PETSC_TRUE) htool::add_distributed_operator_matrix_product_local_to_local<PetscScalar>('N', 1.0, a->distributed_operator_holder->distributed_operator, in, 0.0, out, N, nullptr);
528     else htool::internal_add_distributed_operator_matrix_product_local_to_local<PetscScalar>('N', 1.0, a->distributed_operator_holder->distributed_operator, in, 0.0, out, N, nullptr);
529     break;
530   case MATPRODUCT_AtB:
531     if (a->permutation == PETSC_TRUE) htool::add_distributed_operator_matrix_product_local_to_local<PetscScalar>('T', 1.0, a->distributed_operator_holder->distributed_operator, in, 0.0, out, N, nullptr);
532     else htool::internal_add_distributed_operator_matrix_product_local_to_local<PetscScalar>('T', 1.0, a->distributed_operator_holder->distributed_operator, in, 0.0, out, N, nullptr);
533     break;
534   default:
535     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductType %s is not supported", MatProductTypes[product->type]);
536   }
537   PetscCall(MatDenseRestoreArrayWrite(C, &out));
538   PetscCall(MatDenseRestoreArrayRead(product->B, &in));
539   PetscFunctionReturn(PETSC_SUCCESS);
540 }
541 
MatProductSymbolic_Htool(Mat C)542 static PetscErrorCode MatProductSymbolic_Htool(Mat C)
543 {
544   Mat_Product *product = C->product;
545   Mat          A, B;
546   PetscBool    flg;
547 
548   PetscFunctionBegin;
549   MatCheckProduct(C, 1);
550   A = product->A;
551   B = product->B;
552   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, ""));
553   PetscCheck(flg && (product->type == MATPRODUCT_AB || product->type == MATPRODUCT_AtB), PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "ProductType %s not supported for %s", MatProductTypes[product->type], ((PetscObject)product->B)->type_name);
554   if (C->rmap->n == PETSC_DECIDE || C->cmap->n == PETSC_DECIDE || C->rmap->N == PETSC_DECIDE || C->cmap->N == PETSC_DECIDE) {
555     if (product->type == MATPRODUCT_AB) PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
556     else PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, A->cmap->N, B->cmap->N));
557   }
558   PetscCall(MatSetType(C, MATDENSE));
559   PetscCall(MatSetUp(C));
560   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
561   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
562   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
563   C->ops->productsymbolic = nullptr;
564   C->ops->productnumeric  = MatProductNumeric_Htool;
565   PetscFunctionReturn(PETSC_SUCCESS);
566 }
567 
MatProductSetFromOptions_Htool(Mat C)568 static PetscErrorCode MatProductSetFromOptions_Htool(Mat C)
569 {
570   PetscFunctionBegin;
571   MatCheckProduct(C, 1);
572   if (C->product->type == MATPRODUCT_AB || C->product->type == MATPRODUCT_AtB) C->ops->productsymbolic = MatProductSymbolic_Htool;
573   PetscFunctionReturn(PETSC_SUCCESS);
574 }
575 
MatHtoolGetHierarchicalMat_Htool(Mat A,const htool::DistributedOperator<PetscScalar> ** distributed_operator)576 static PetscErrorCode MatHtoolGetHierarchicalMat_Htool(Mat A, const htool::DistributedOperator<PetscScalar> **distributed_operator)
577 {
578   Mat_Htool *a;
579 
580   PetscFunctionBegin;
581   PetscCall(MatShellGetContext(A, &a));
582   *distributed_operator = &a->distributed_operator_holder->distributed_operator;
583   PetscFunctionReturn(PETSC_SUCCESS);
584 }
585 
586 /*@C
587   MatHtoolGetHierarchicalMat - Retrieves the opaque pointer to a Htool virtual matrix stored in a `MATHTOOL`.
588 
589   No Fortran Support, No C Support
590 
591   Input Parameter:
592 . A - hierarchical matrix
593 
594   Output Parameter:
595 . distributed_operator - opaque pointer to a Htool virtual matrix
596 
597   Level: advanced
598 
599 .seealso: [](ch_matrices), `Mat`, `MATHTOOL`
600 @*/
MatHtoolGetHierarchicalMat(Mat A,const htool::DistributedOperator<PetscScalar> ** distributed_operator)601 PETSC_EXTERN PetscErrorCode MatHtoolGetHierarchicalMat(Mat A, const htool::DistributedOperator<PetscScalar> **distributed_operator)
602 {
603   PetscFunctionBegin;
604   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
605   PetscAssertPointer(distributed_operator, 2);
606   PetscTryMethod(A, "MatHtoolGetHierarchicalMat_C", (Mat, const htool::DistributedOperator<PetscScalar> **), (A, distributed_operator));
607   PetscFunctionReturn(PETSC_SUCCESS);
608 }
609 
MatHtoolSetKernel_Htool(Mat A,MatHtoolKernelFn * kernel,void * kernelctx)610 static PetscErrorCode MatHtoolSetKernel_Htool(Mat A, MatHtoolKernelFn *kernel, void *kernelctx)
611 {
612   Mat_Htool *a;
613 
614   PetscFunctionBegin;
615   PetscCall(MatShellGetContext(A, &a));
616   a->kernel    = kernel;
617   a->kernelctx = kernelctx;
618   delete a->wrapper;
619   if (a->kernel) a->wrapper = new WrapperHtool(a->dim, a->kernel, a->kernelctx);
620   PetscFunctionReturn(PETSC_SUCCESS);
621 }
622 
623 /*@C
624   MatHtoolSetKernel - Sets the kernel and context used for the assembly of a `MATHTOOL`.
625 
626   Collective, No Fortran Support
627 
628   Input Parameters:
629 + A         - hierarchical matrix
630 . kernel    - computational kernel (or `NULL`)
631 - kernelctx - kernel context (if kernel is `NULL`, the pointer must be of type htool::VirtualGenerator<PetscScalar>*)
632 
633   Level: advanced
634 
635 .seealso: [](ch_matrices), `Mat`, `MATHTOOL`, `MatCreateHtoolFromKernel()`
636 @*/
MatHtoolSetKernel(Mat A,MatHtoolKernelFn * kernel,void * kernelctx)637 PetscErrorCode MatHtoolSetKernel(Mat A, MatHtoolKernelFn *kernel, void *kernelctx)
638 {
639   PetscFunctionBegin;
640   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
641   if (!kernelctx) PetscValidFunction(kernel, 2);
642   if (!kernel) PetscAssertPointer(kernelctx, 3);
643   PetscTryMethod(A, "MatHtoolSetKernel_C", (Mat, MatHtoolKernelFn *, void *), (A, kernel, kernelctx));
644   PetscFunctionReturn(PETSC_SUCCESS);
645 }
646 
MatHtoolGetPermutationSource_Htool(Mat A,IS * is)647 static PetscErrorCode MatHtoolGetPermutationSource_Htool(Mat A, IS *is)
648 {
649   Mat_Htool                       *a;
650   PetscMPIInt                      rank;
651   const std::vector<PetscInt>     *source;
652   const htool::Cluster<PetscReal> *local_source_cluster;
653 
654   PetscFunctionBegin;
655   PetscCall(MatShellGetContext(A, &a));
656   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
657   local_source_cluster = a->source_cluster ? &a->source_cluster->get_cluster_on_partition(rank) : &a->target_cluster->get_cluster_on_partition(rank);
658   source               = &local_source_cluster->get_permutation();
659   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), local_source_cluster->get_size(), source->data() + local_source_cluster->get_offset(), PETSC_COPY_VALUES, is));
660   PetscCall(ISSetPermutation(*is));
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 /*@
665   MatHtoolGetPermutationSource - Gets the permutation associated to the source cluster for a `MATHTOOL` matrix.
666 
667   Input Parameter:
668 . A - hierarchical matrix
669 
670   Output Parameter:
671 . is - permutation
672 
673   Level: advanced
674 
675 .seealso: [](ch_matrices), `Mat`, `MATHTOOL`, `MatHtoolGetPermutationTarget()`, `MatHtoolUsePermutation()`
676 @*/
MatHtoolGetPermutationSource(Mat A,IS * is)677 PetscErrorCode MatHtoolGetPermutationSource(Mat A, IS *is)
678 {
679   PetscFunctionBegin;
680   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
681   if (!is) PetscAssertPointer(is, 2);
682   PetscTryMethod(A, "MatHtoolGetPermutationSource_C", (Mat, IS *), (A, is));
683   PetscFunctionReturn(PETSC_SUCCESS);
684 }
685 
MatHtoolGetPermutationTarget_Htool(Mat A,IS * is)686 static PetscErrorCode MatHtoolGetPermutationTarget_Htool(Mat A, IS *is)
687 {
688   Mat_Htool                   *a;
689   const std::vector<PetscInt> *target;
690   PetscMPIInt                  rank;
691 
692   PetscFunctionBegin;
693   PetscCall(MatShellGetContext(A, &a));
694   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
695   target = &a->target_cluster->get_permutation();
696   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), a->target_cluster->get_cluster_on_partition(rank).get_size(), target->data() + a->target_cluster->get_cluster_on_partition(rank).get_offset(), PETSC_COPY_VALUES, is));
697   PetscCall(ISSetPermutation(*is));
698   PetscFunctionReturn(PETSC_SUCCESS);
699 }
700 
701 /*@
702   MatHtoolGetPermutationTarget - Gets the permutation associated to the target cluster for a `MATHTOOL` matrix.
703 
704   Input Parameter:
705 . A - hierarchical matrix
706 
707   Output Parameter:
708 . is - permutation
709 
710   Level: advanced
711 
712 .seealso: [](ch_matrices), `Mat`, `MATHTOOL`, `MatHtoolGetPermutationSource()`, `MatHtoolUsePermutation()`
713 @*/
MatHtoolGetPermutationTarget(Mat A,IS * is)714 PetscErrorCode MatHtoolGetPermutationTarget(Mat A, IS *is)
715 {
716   PetscFunctionBegin;
717   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
718   if (!is) PetscAssertPointer(is, 2);
719   PetscTryMethod(A, "MatHtoolGetPermutationTarget_C", (Mat, IS *), (A, is));
720   PetscFunctionReturn(PETSC_SUCCESS);
721 }
722 
MatHtoolUsePermutation_Htool(Mat A,PetscBool use)723 static PetscErrorCode MatHtoolUsePermutation_Htool(Mat A, PetscBool use)
724 {
725   Mat_Htool *a;
726 
727   PetscFunctionBegin;
728   PetscCall(MatShellGetContext(A, &a));
729   a->permutation = use;
730   PetscFunctionReturn(PETSC_SUCCESS);
731 }
732 
733 /*@
734   MatHtoolUsePermutation - Sets whether a `MATHTOOL` matrix should permute input (resp. output) vectors following its internal source (resp. target) permutation.
735 
736   Input Parameters:
737 + A   - hierarchical matrix
738 - use - Boolean value
739 
740   Level: advanced
741 
742 .seealso: [](ch_matrices), `Mat`, `MATHTOOL`, `MatHtoolGetPermutationSource()`, `MatHtoolGetPermutationTarget()`
743 @*/
MatHtoolUsePermutation(Mat A,PetscBool use)744 PetscErrorCode MatHtoolUsePermutation(Mat A, PetscBool use)
745 {
746   PetscFunctionBegin;
747   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
748   PetscValidLogicalCollectiveBool(A, use, 2);
749   PetscTryMethod(A, "MatHtoolUsePermutation_C", (Mat, PetscBool), (A, use));
750   PetscFunctionReturn(PETSC_SUCCESS);
751 }
752 
MatHtoolUseRecompression_Htool(Mat A,PetscBool use)753 static PetscErrorCode MatHtoolUseRecompression_Htool(Mat A, PetscBool use)
754 {
755   Mat_Htool *a;
756 
757   PetscFunctionBegin;
758   PetscCall(MatShellGetContext(A, &a));
759   a->recompression = use;
760   PetscFunctionReturn(PETSC_SUCCESS);
761 }
762 
763 /*@
764   MatHtoolUseRecompression - Sets whether a `MATHTOOL` matrix should use recompression.
765 
766   Input Parameters:
767 + A   - hierarchical matrix
768 - use - Boolean value
769 
770   Level: advanced
771 
772 .seealso: [](ch_matrices), `Mat`, `MATHTOOL`
773 @*/
MatHtoolUseRecompression(Mat A,PetscBool use)774 PetscErrorCode MatHtoolUseRecompression(Mat A, PetscBool use)
775 {
776   PetscFunctionBegin;
777   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
778   PetscValidLogicalCollectiveBool(A, use, 2);
779   PetscTryMethod(A, "MatHtoolUseRecompression_C", (Mat, PetscBool), (A, use));
780   PetscFunctionReturn(PETSC_SUCCESS);
781 }
782 
MatConvert_Htool_Dense(Mat A,MatType,MatReuse reuse,Mat * B)783 static PetscErrorCode MatConvert_Htool_Dense(Mat A, MatType, MatReuse reuse, Mat *B)
784 {
785   Mat          C;
786   Mat_Htool   *a;
787   PetscScalar *array, shift, scale;
788   PetscInt     lda;
789 
790   PetscFunctionBegin;
791   PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
792   PetscCall(MatShellGetContext(A, &a));
793   if (reuse == MAT_REUSE_MATRIX) {
794     C = *B;
795     PetscCheck(C->rmap->n == A->rmap->n && C->cmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible dimensions");
796     PetscCall(MatDenseGetLDA(C, &lda));
797     PetscCheck(lda == C->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")", lda, C->rmap->n);
798   } else {
799     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
800     PetscCall(MatSetSizes(C, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
801     PetscCall(MatSetType(C, MATDENSE));
802     PetscCall(MatSetUp(C));
803     PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
804   }
805   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
806   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
807   PetscCall(MatDenseGetArrayWrite(C, &array));
808   htool::copy_to_dense_in_user_numbering(a->distributed_operator_holder->hmatrix, array);
809   PetscCall(MatDenseRestoreArrayWrite(C, &array));
810   PetscCall(MatShift(C, shift));
811   PetscCall(MatScale(C, scale));
812   if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &C));
813   else *B = C;
814   PetscFunctionReturn(PETSC_SUCCESS);
815 }
816 
GenEntriesTranspose(PetscInt sdim,PetscInt M,PetscInt N,const PetscInt * rows,const PetscInt * cols,PetscScalar * ptr,PetscCtx ctx)817 static PetscErrorCode GenEntriesTranspose(PetscInt sdim, PetscInt M, PetscInt N, const PetscInt *rows, const PetscInt *cols, PetscScalar *ptr, PetscCtx ctx)
818 {
819   MatHtoolKernelTranspose *generator = (MatHtoolKernelTranspose *)ctx;
820   PetscScalar             *tmp;
821 
822   PetscFunctionBegin;
823   PetscCall(generator->kernel(sdim, N, M, cols, rows, ptr, generator->kernelctx));
824   PetscCall(PetscMalloc1(M * N, &tmp));
825   PetscCall(PetscArraycpy(tmp, ptr, M * N));
826   for (PetscInt i = 0; i < M; ++i) {
827     for (PetscInt j = 0; j < N; ++j) ptr[i + j * M] = tmp[j + i * N];
828   }
829   PetscCall(PetscFree(tmp));
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 /* naive implementation which keeps a reference to the original Mat */
MatTranspose_Htool(Mat A,MatReuse reuse,Mat * B)834 static PetscErrorCode MatTranspose_Htool(Mat A, MatReuse reuse, Mat *B)
835 {
836   Mat                      C;
837   Mat_Htool               *a, *c;
838   PetscScalar              shift, scale;
839   PetscInt                 M = A->rmap->N, N = A->cmap->N, m = A->rmap->n, n = A->cmap->n;
840   PetscContainer           container;
841   MatHtoolKernelTranspose *kernelt;
842 
843   PetscFunctionBegin;
844   PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
845   PetscCall(MatShellGetContext(A, &a));
846   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
847   PetscCheck(reuse != MAT_INPLACE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatTranspose() with MAT_INPLACE_MATRIX not supported");
848   if (reuse == MAT_INITIAL_MATRIX) {
849     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
850     PetscCall(MatSetSizes(C, n, m, N, M));
851     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
852     PetscCall(MatSetUp(C));
853     PetscCall(PetscNew(&kernelt));
854     PetscCall(PetscObjectContainerCompose((PetscObject)C, "KernelTranspose", kernelt, PetscCtxDestroyDefault));
855   } else {
856     C = *B;
857     PetscCall(PetscObjectQuery((PetscObject)C, "KernelTranspose", (PetscObject *)&container));
858     PetscCheck(container, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatTranspose() with MAT_INITIAL_MATRIX first");
859     PetscCall(PetscContainerGetPointer(container, &kernelt));
860   }
861   PetscCall(MatShellGetContext(C, &c));
862   c->dim = a->dim;
863   PetscCall(MatShift(C, shift));
864   PetscCall(MatScale(C, scale));
865   c->kernel = GenEntriesTranspose;
866   if (kernelt->A != A) {
867     PetscCall(MatDestroy(&kernelt->A));
868     kernelt->A = A;
869     PetscCall(PetscObjectReference((PetscObject)A));
870   }
871   kernelt->kernel           = a->kernel;
872   kernelt->kernelctx        = a->kernelctx;
873   c->kernelctx              = kernelt;
874   c->max_cluster_leaf_size  = a->max_cluster_leaf_size;
875   c->epsilon                = a->epsilon;
876   c->eta                    = a->eta;
877   c->block_tree_consistency = a->block_tree_consistency;
878   c->permutation            = a->permutation;
879   c->recompression          = a->recompression;
880   c->compressor             = a->compressor;
881   c->clustering             = a->clustering;
882   if (reuse == MAT_INITIAL_MATRIX) {
883     PetscCall(PetscMalloc1(N * c->dim, &c->gcoords_target));
884     PetscCall(PetscArraycpy(c->gcoords_target, a->gcoords_source, N * c->dim));
885     if (a->gcoords_target != a->gcoords_source) {
886       PetscCall(PetscMalloc1(M * c->dim, &c->gcoords_source));
887       PetscCall(PetscArraycpy(c->gcoords_source, a->gcoords_target, M * c->dim));
888     } else c->gcoords_source = c->gcoords_target;
889   }
890   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
891   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
892   if (reuse == MAT_INITIAL_MATRIX) *B = C;
893   PetscFunctionReturn(PETSC_SUCCESS);
894 }
895 
MatDestroy_Factor(Mat F)896 static PetscErrorCode MatDestroy_Factor(Mat F)
897 {
898   PetscContainer               container;
899   htool::HMatrix<PetscScalar> *A;
900 
901   PetscFunctionBegin;
902   PetscCall(PetscObjectQuery((PetscObject)F, "HMatrix", (PetscObject *)&container));
903   if (container) {
904     PetscCall(PetscContainerGetPointer(container, &A));
905     delete A;
906     PetscCall(PetscObjectCompose((PetscObject)F, "HMatrix", nullptr));
907   }
908   PetscCall(PetscObjectComposeFunction((PetscObject)F, "MatFactorGetSolverType_C", nullptr));
909   PetscFunctionReturn(PETSC_SUCCESS);
910 }
911 
MatFactorGetSolverType_Htool(Mat,MatSolverType * type)912 static PetscErrorCode MatFactorGetSolverType_Htool(Mat, MatSolverType *type)
913 {
914   PetscFunctionBegin;
915   *type = MATSOLVERHTOOL;
916   PetscFunctionReturn(PETSC_SUCCESS);
917 }
918 
919 template <char trans>
MatSolve_Private(Mat A,htool::Matrix<PetscScalar> & X)920 static inline PetscErrorCode MatSolve_Private(Mat A, htool::Matrix<PetscScalar> &X)
921 {
922   PetscContainer               container;
923   htool::HMatrix<PetscScalar> *B;
924 
925   PetscFunctionBegin;
926   PetscCheck(A->factortype == MAT_FACTOR_LU || A->factortype == MAT_FACTOR_CHOLESKY, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_UNKNOWN_TYPE, "Only MAT_LU_FACTOR and MAT_CHOLESKY_FACTOR are supported");
927   PetscCall(PetscObjectQuery((PetscObject)A, "HMatrix", (PetscObject *)&container));
928   PetscCheck(container, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call Mat%sFactorNumeric() before Mat%sSolve%s()", A->factortype == MAT_FACTOR_LU ? "LU" : "Cholesky", X.nb_cols() == 1 ? "" : "Mat", trans == 'N' ? "" : "Transpose");
929   PetscCall(PetscContainerGetPointer(container, &B));
930   if (A->factortype == MAT_FACTOR_LU) htool::lu_solve(trans, *B, X);
931   else htool::cholesky_solve('L', *B, X);
932   PetscFunctionReturn(PETSC_SUCCESS);
933 }
934 
935 template <char trans, class Type, typename std::enable_if<std::is_same<Type, Vec>::value>::type * = nullptr>
MatSolve_Htool(Mat A,Type b,Type x)936 static PetscErrorCode MatSolve_Htool(Mat A, Type b, Type x)
937 {
938   PetscInt                   n;
939   htool::Matrix<PetscScalar> v;
940   PetscScalar               *array;
941 
942   PetscFunctionBegin;
943   PetscCall(VecGetLocalSize(b, &n));
944   PetscCall(VecCopy(b, x));
945   PetscCall(VecGetArrayWrite(x, &array));
946   v.assign(n, 1, array, false);
947   PetscCall(VecRestoreArrayWrite(x, &array));
948   PetscCall(MatSolve_Private<trans>(A, v));
949   PetscFunctionReturn(PETSC_SUCCESS);
950 }
951 
952 template <char trans, class Type, typename std::enable_if<std::is_same<Type, Mat>::value>::type * = nullptr>
MatSolve_Htool(Mat A,Type B,Type X)953 static PetscErrorCode MatSolve_Htool(Mat A, Type B, Type X)
954 {
955   PetscInt                   m, N;
956   htool::Matrix<PetscScalar> v;
957   PetscScalar               *array;
958 
959   PetscFunctionBegin;
960   PetscCall(MatGetLocalSize(B, &m, nullptr));
961   PetscCall(MatGetLocalSize(B, nullptr, &N));
962   PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
963   PetscCall(MatDenseGetArrayWrite(X, &array));
964   v.assign(m, N, array, false);
965   PetscCall(MatDenseRestoreArrayWrite(X, &array));
966   PetscCall(MatSolve_Private<trans>(A, v));
967   PetscFunctionReturn(PETSC_SUCCESS);
968 }
969 
970 template <MatFactorType ftype>
MatFactorNumeric_Htool(Mat F,Mat A,const MatFactorInfo *)971 static PetscErrorCode MatFactorNumeric_Htool(Mat F, Mat A, const MatFactorInfo *)
972 {
973   Mat_Htool                   *a;
974   htool::HMatrix<PetscScalar> *B;
975 
976   PetscFunctionBegin;
977   PetscCall(MatShellGetContext(A, &a));
978   B = new htool::HMatrix<PetscScalar>(a->distributed_operator_holder->hmatrix);
979   if (ftype == MAT_FACTOR_LU) htool::sequential_lu_factorization(*B);
980   else htool::sequential_cholesky_factorization('L', *B);
981   PetscCall(PetscObjectContainerCompose((PetscObject)F, "HMatrix", B, nullptr));
982   PetscFunctionReturn(PETSC_SUCCESS);
983 }
984 
985 template <MatFactorType ftype>
MatFactorSymbolic_Htool(Mat F,Mat)986 PetscErrorCode MatFactorSymbolic_Htool(Mat F, Mat)
987 {
988   PetscFunctionBegin;
989   F->preallocated  = PETSC_TRUE;
990   F->assembled     = PETSC_TRUE;
991   F->ops->solve    = MatSolve_Htool<'N', Vec>;
992   F->ops->matsolve = MatSolve_Htool<'N', Mat>;
993   if (!PetscDefined(USE_COMPLEX) || ftype == MAT_FACTOR_LU) {
994     F->ops->solvetranspose    = MatSolve_Htool<'T', Vec>;
995     F->ops->matsolvetranspose = MatSolve_Htool<'T', Mat>;
996   }
997   F->ops->destroy = MatDestroy_Factor;
998   if (ftype == MAT_FACTOR_LU) F->ops->lufactornumeric = MatFactorNumeric_Htool<MAT_FACTOR_LU>;
999   else F->ops->choleskyfactornumeric = MatFactorNumeric_Htool<MAT_FACTOR_CHOLESKY>;
1000   PetscFunctionReturn(PETSC_SUCCESS);
1001 }
1002 
MatLUFactorSymbolic_Htool(Mat F,Mat A,IS,IS,const MatFactorInfo *)1003 static PetscErrorCode MatLUFactorSymbolic_Htool(Mat F, Mat A, IS, IS, const MatFactorInfo *)
1004 {
1005   PetscFunctionBegin;
1006   PetscCall(MatFactorSymbolic_Htool<MAT_FACTOR_LU>(F, A));
1007   PetscFunctionReturn(PETSC_SUCCESS);
1008 }
1009 
MatCholeskyFactorSymbolic_Htool(Mat F,Mat A,IS,const MatFactorInfo *)1010 static PetscErrorCode MatCholeskyFactorSymbolic_Htool(Mat F, Mat A, IS, const MatFactorInfo *)
1011 {
1012   PetscFunctionBegin;
1013   PetscCall(MatFactorSymbolic_Htool<MAT_FACTOR_CHOLESKY>(F, A));
1014   PetscFunctionReturn(PETSC_SUCCESS);
1015 }
1016 
MatGetFactor_htool_htool(Mat A,MatFactorType ftype,Mat * F)1017 static PetscErrorCode MatGetFactor_htool_htool(Mat A, MatFactorType ftype, Mat *F)
1018 {
1019   Mat         B;
1020   Mat_Htool  *a;
1021   PetscMPIInt size;
1022 
1023   PetscFunctionBegin;
1024   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1025   PetscCall(MatShellGetContext(A, &a));
1026   PetscCheck(size == 1, PetscObjectComm((PetscObject)A), PETSC_ERR_WRONG_MPI_SIZE, "Unsupported parallel MatGetFactor()");
1027   PetscCheck(a->block_tree_consistency, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot factor a MatHtool with inconsistent block tree");
1028   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1029   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1030   PetscCall(PetscStrallocpy(MATSOLVERHTOOL, &((PetscObject)B)->type_name));
1031   PetscCall(MatSetUp(B));
1032 
1033   B->ops->getinfo    = MatGetInfo_External;
1034   B->factortype      = ftype;
1035   B->trivialsymbolic = PETSC_TRUE;
1036 
1037   if (ftype == MAT_FACTOR_LU) B->ops->lufactorsymbolic = MatLUFactorSymbolic_Htool;
1038   else if (ftype == MAT_FACTOR_CHOLESKY) B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_Htool;
1039 
1040   PetscCall(PetscFree(B->solvertype));
1041   PetscCall(PetscStrallocpy(MATSOLVERHTOOL, &B->solvertype));
1042 
1043   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_Htool));
1044   *F = B;
1045   PetscFunctionReturn(PETSC_SUCCESS);
1046 }
1047 
MatSolverTypeRegister_Htool(void)1048 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_Htool(void)
1049 {
1050   PetscFunctionBegin;
1051   PetscCall(MatSolverTypeRegister(MATSOLVERHTOOL, MATHTOOL, MAT_FACTOR_LU, MatGetFactor_htool_htool));
1052   PetscCall(MatSolverTypeRegister(MATSOLVERHTOOL, MATHTOOL, MAT_FACTOR_CHOLESKY, MatGetFactor_htool_htool));
1053   PetscFunctionReturn(PETSC_SUCCESS);
1054 }
1055 
1056 /*@C
1057   MatCreateHtoolFromKernel - Creates a `MATHTOOL` from a user-supplied kernel.
1058 
1059   Collective, No Fortran Support
1060 
1061   Input Parameters:
1062 + comm          - MPI communicator
1063 . m             - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1064 . n             - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1065 . M             - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
1066 . N             - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
1067 . spacedim      - dimension of the space coordinates
1068 . coords_target - coordinates of the target
1069 . coords_source - coordinates of the source
1070 . kernel        - computational kernel (or `NULL`)
1071 - kernelctx     - kernel context (if kernel is `NULL`, the pointer must be of type htool::VirtualGenerator<PetscScalar>*)
1072 
1073   Output Parameter:
1074 . B - matrix
1075 
1076   Options Database Keys:
1077 + -mat_htool_max_cluster_leaf_size <`PetscInt`>                                                - maximal leaf size in cluster tree
1078 . -mat_htool_epsilon <`PetscReal`>                                                             - relative error in Frobenius norm when approximating a block
1079 . -mat_htool_eta <`PetscReal`>                                                                 - admissibility condition tolerance
1080 . -mat_htool_min_target_depth <`PetscInt`>                                                     - minimal cluster tree depth associated with the rows
1081 . -mat_htool_min_source_depth <`PetscInt`>                                                     - minimal cluster tree depth associated with the columns
1082 . -mat_htool_block_tree_consistency <`PetscBool`>                                              - block tree consistency
1083 . -mat_htool_recompression <`PetscBool`>                                                       - use recompression
1084 . -mat_htool_compressor <sympartialACA, fullACA, SVD>                                          - type of compression
1085 - -mat_htool_clustering <PCARegular, PCAGeometric, BounbingBox1Regular, BoundingBox1Geometric> - type of clustering
1086 
1087   Level: intermediate
1088 
1089 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MATHTOOL`, `PCSetCoordinates()`, `MatHtoolSetKernel()`, `MatHtoolCompressorType`, `MATH2OPUS`, `MatCreateH2OpusFromKernel()`
1090 @*/
MatCreateHtoolFromKernel(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt spacedim,const PetscReal coords_target[],const PetscReal coords_source[],MatHtoolKernelFn * kernel,void * kernelctx,Mat * B)1091 PetscErrorCode MatCreateHtoolFromKernel(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt spacedim, const PetscReal coords_target[], const PetscReal coords_source[], MatHtoolKernelFn *kernel, void *kernelctx, Mat *B)
1092 {
1093   Mat        A;
1094   Mat_Htool *a;
1095 
1096   PetscFunctionBegin;
1097   PetscCall(MatCreate(comm, &A));
1098   PetscValidLogicalCollectiveInt(A, spacedim, 6);
1099   PetscAssertPointer(coords_target, 7);
1100   PetscAssertPointer(coords_source, 8);
1101   if (!kernelctx) PetscValidFunction(kernel, 9);
1102   if (!kernel) PetscAssertPointer(kernelctx, 10);
1103   PetscCall(MatSetSizes(A, m, n, M, N));
1104   PetscCall(MatSetType(A, MATHTOOL));
1105   PetscCall(MatSetUp(A));
1106   PetscCall(MatShellGetContext(A, &a));
1107   a->dim       = spacedim;
1108   a->kernel    = kernel;
1109   a->kernelctx = kernelctx;
1110   PetscCall(PetscCalloc1(A->rmap->N * spacedim, &a->gcoords_target));
1111   PetscCall(PetscArraycpy(a->gcoords_target + A->rmap->rstart * spacedim, coords_target, A->rmap->n * spacedim));
1112   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, a->gcoords_target, A->rmap->N * spacedim, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)A))); /* global target coordinates */
1113   if (coords_target != coords_source) {
1114     PetscCall(PetscCalloc1(A->cmap->N * spacedim, &a->gcoords_source));
1115     PetscCall(PetscArraycpy(a->gcoords_source + A->cmap->rstart * spacedim, coords_source, A->cmap->n * spacedim));
1116     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, a->gcoords_source, A->cmap->N * spacedim, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)A))); /* global source coordinates */
1117   } else a->gcoords_source = a->gcoords_target;
1118   *B = A;
1119   PetscFunctionReturn(PETSC_SUCCESS);
1120 }
1121 
1122 /*MC
1123      MATHTOOL = "htool" - A matrix type for hierarchical matrices using the Htool package.
1124 
1125   Use `./configure --download-htool` to install PETSc to use Htool.
1126 
1127    Options Database Key:
1128 .     -mat_type htool - matrix type to `MATHTOOL`
1129 
1130    Level: beginner
1131 
1132 .seealso: [](ch_matrices), `Mat`, `MATH2OPUS`, `MATDENSE`, `MatCreateHtoolFromKernel()`, `MatHtoolSetKernel()`
1133 M*/
MatCreate_Htool(Mat A)1134 PETSC_EXTERN PetscErrorCode MatCreate_Htool(Mat A)
1135 {
1136   Mat_Htool *a;
1137 
1138   PetscFunctionBegin;
1139   PetscCall(MatSetType(A, MATSHELL));
1140   PetscCall(PetscNew(&a));
1141   PetscCall(MatShellSetContext(A, a));
1142   PetscCall(MatShellSetOperation(A, MATOP_GET_DIAGONAL, (PetscErrorCodeFn *)MatGetDiagonal_Htool));
1143   PetscCall(MatShellSetOperation(A, MATOP_GET_DIAGONAL_BLOCK, (PetscErrorCodeFn *)MatGetDiagonalBlock_Htool));
1144   PetscCall(MatShellSetOperation(A, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Htool));
1145   PetscCall(MatShellSetOperation(A, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Htool));
1146   if (!PetscDefined(USE_COMPLEX)) PetscCall(MatShellSetOperation(A, MATOP_MULT_HERMITIAN_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Htool));
1147   A->ops->increaseoverlap   = MatIncreaseOverlap_Htool;
1148   A->ops->createsubmatrices = MatCreateSubMatrices_Htool;
1149   PetscCall(MatShellSetOperation(A, MATOP_VIEW, (PetscErrorCodeFn *)MatView_Htool));
1150   PetscCall(MatShellSetOperation(A, MATOP_SET_FROM_OPTIONS, (PetscErrorCodeFn *)MatSetFromOptions_Htool));
1151   PetscCall(MatShellSetOperation(A, MATOP_GET_ROW, (PetscErrorCodeFn *)MatGetRow_Htool));
1152   PetscCall(MatShellSetOperation(A, MATOP_RESTORE_ROW, (PetscErrorCodeFn *)MatRestoreRow_Htool));
1153   PetscCall(MatShellSetOperation(A, MATOP_ASSEMBLY_END, (PetscErrorCodeFn *)MatAssemblyEnd_Htool));
1154   PetscCall(MatShellSetOperation(A, MATOP_TRANSPOSE, (PetscErrorCodeFn *)MatTranspose_Htool));
1155   PetscCall(MatShellSetOperation(A, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_Htool));
1156   a->dim                    = 0;
1157   a->gcoords_target         = nullptr;
1158   a->gcoords_source         = nullptr;
1159   a->max_cluster_leaf_size  = 10;
1160   a->epsilon                = PetscSqrtReal(PETSC_SMALL);
1161   a->eta                    = 10.0;
1162   a->depth[0]               = 0;
1163   a->depth[1]               = 0;
1164   a->block_tree_consistency = PETSC_TRUE;
1165   a->permutation            = PETSC_TRUE;
1166   a->recompression          = PETSC_FALSE;
1167   a->compressor             = MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA;
1168   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_seqdense_C", MatProductSetFromOptions_Htool));
1169   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_mpidense_C", MatProductSetFromOptions_Htool));
1170   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_seqdense_C", MatConvert_Htool_Dense));
1171   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_mpidense_C", MatConvert_Htool_Dense));
1172   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetHierarchicalMat_C", MatHtoolGetHierarchicalMat_Htool));
1173   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetKernel_C", MatHtoolSetKernel_Htool));
1174   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationSource_C", MatHtoolGetPermutationSource_Htool));
1175   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationTarget_C", MatHtoolGetPermutationTarget_Htool));
1176   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUsePermutation_C", MatHtoolUsePermutation_Htool));
1177   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUseRecompression_C", MatHtoolUseRecompression_Htool));
1178   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContext_C", MatShellSetContext_Immutable));
1179   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContextDestroy_C", MatShellSetContextDestroy_Immutable));
1180   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetManageScalingShifts_C", MatShellSetManageScalingShifts_Immutable));
1181   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATHTOOL));
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184