xref: /petsc/src/mat/impls/htool/htool.cxx (revision 6dd63270497ad23dcf16ae500a87ff2b2a0b7474)
1 #include <../src/mat/impls/htool/htool.hpp> /*I "petscmat.h" I*/
2 #include <petscblaslapack.h>
3 #include <set>
4 
5 const char *const MatHtoolCompressorTypes[] = {"sympartialACA", "fullACA", "SVD"};
6 const char *const MatHtoolClusteringTypes[] = {"PCARegular", "PCAGeometric", "BoundingBox1Regular", "BoundingBox1Geometric"};
7 const char        HtoolCitation[]           = "@article{marchand2020two,\n"
8                                               "  Author = {Marchand, Pierre and Claeys, Xavier and Jolivet, Pierre and Nataf, Fr\\'ed\\'eric and Tournier, Pierre-Henri},\n"
9                                               "  Title = {Two-level preconditioning for $h$-version boundary element approximation of hypersingular operator with {GenEO}},\n"
10                                               "  Year = {2020},\n"
11                                               "  Publisher = {Elsevier},\n"
12                                               "  Journal = {Numerische Mathematik},\n"
13                                               "  Volume = {146},\n"
14                                               "  Pages = {597--628},\n"
15                                               "  Url = {https://github.com/htool-ddm/htool}\n"
16                                               "}\n";
17 static PetscBool  HtoolCite                 = PETSC_FALSE;
18 
19 static PetscErrorCode MatGetDiagonal_Htool(Mat A, Vec v)
20 {
21   Mat_Htool   *a;
22   PetscScalar *x;
23   PetscBool    flg;
24 
25   PetscFunctionBegin;
26   PetscCall(MatHasCongruentLayouts(A, &flg));
27   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only congruent layouts supported");
28   PetscCall(MatShellGetContext(A, &a));
29   PetscCall(VecGetArrayWrite(v, &x));
30   PetscStackCallExternalVoid("copy_diagonal_in_user_numbering", htool::copy_diagonal_in_user_numbering(a->distributed_operator_holder->hmatrix, x));
31   PetscCall(VecRestoreArrayWrite(v, &x));
32   PetscFunctionReturn(PETSC_SUCCESS);
33 }
34 
35 static PetscErrorCode MatGetDiagonalBlock_Htool(Mat A, Mat *b)
36 {
37   Mat_Htool                 *a;
38   Mat                        B;
39   PetscScalar               *ptr, shift, scale;
40   PetscBool                  flg;
41   PetscMPIInt                rank;
42   htool::Cluster<PetscReal> *source_cluster = nullptr;
43 
44   PetscFunctionBegin;
45   PetscCall(MatHasCongruentLayouts(A, &flg));
46   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only congruent layouts supported");
47   PetscCall(MatShellGetContext(A, &a));
48   PetscCall(PetscObjectQuery((PetscObject)A, "DiagonalBlock", (PetscObject *)&B)); /* same logic as in MatGetDiagonalBlock_MPIDense() */
49   if (!B) {
50     PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
51     PetscCall(MatCreateDense(PETSC_COMM_SELF, A->rmap->n, A->rmap->n, A->rmap->n, A->rmap->n, nullptr, &B));
52     PetscCall(MatDenseGetArrayWrite(B, &ptr));
53     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
54     source_cluster = a->source_cluster ? a->source_cluster.get() : a->target_cluster.get();
55     PetscStackCallExternalVoid("copy_to_dense_in_user_numbering", htool::copy_to_dense_in_user_numbering(*a->distributed_operator_holder->hmatrix.get_sub_hmatrix(a->target_cluster->get_cluster_on_partition(rank), source_cluster->get_cluster_on_partition(rank)), ptr));
56     PetscCall(MatDenseRestoreArrayWrite(B, &ptr));
57     PetscCall(MatPropagateSymmetryOptions(A, B));
58     PetscCall(PetscObjectCompose((PetscObject)A, "DiagonalBlock", (PetscObject)B));
59     *b = B;
60     PetscCall(MatDestroy(&B));
61     PetscCall(MatShift(*b, shift));
62     PetscCall(MatScale(*b, scale));
63   } else {
64     PetscCall(MatShellGetScalingShifts(A, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
65     *b = B;
66   }
67   PetscFunctionReturn(PETSC_SUCCESS);
68 }
69 
70 static PetscErrorCode MatMult_Htool(Mat A, Vec x, Vec y)
71 {
72   Mat_Htool         *a;
73   const PetscScalar *in;
74   PetscScalar       *out;
75 
76   PetscFunctionBegin;
77   PetscCall(MatShellGetContext(A, &a));
78   PetscCall(VecGetArrayRead(x, &in));
79   PetscCall(VecGetArrayWrite(y, &out));
80   a->distributed_operator_holder->distributed_operator.vector_product_local_to_local(in, out, nullptr);
81   PetscCall(VecRestoreArrayRead(x, &in));
82   PetscCall(VecRestoreArrayWrite(y, &out));
83   PetscFunctionReturn(PETSC_SUCCESS);
84 }
85 
86 static PetscErrorCode MatMultTranspose_Htool(Mat A, Vec x, Vec y)
87 {
88   Mat_Htool         *a;
89   const PetscScalar *in;
90   PetscScalar       *out;
91 
92   PetscFunctionBegin;
93   PetscCall(MatShellGetContext(A, &a));
94   PetscCall(VecGetArrayRead(x, &in));
95   PetscCall(VecGetArrayWrite(y, &out));
96   a->distributed_operator_holder->distributed_operator.vector_product_transp_local_to_local(in, out, nullptr);
97   PetscCall(VecRestoreArrayRead(x, &in));
98   PetscCall(VecRestoreArrayWrite(y, &out));
99   PetscFunctionReturn(PETSC_SUCCESS);
100 }
101 
102 static PetscErrorCode MatIncreaseOverlap_Htool(Mat A, PetscInt is_max, IS is[], PetscInt ov)
103 {
104   std::set<PetscInt> set;
105   const PetscInt    *idx;
106   PetscInt          *oidx, size, bs[2];
107   PetscMPIInt        csize;
108 
109   PetscFunctionBegin;
110   PetscCall(MatGetBlockSizes(A, bs, bs + 1));
111   if (bs[0] != bs[1]) bs[0] = 1;
112   for (PetscInt i = 0; i < is_max; ++i) {
113     /* basic implementation that adds indices by shifting an IS by -ov, -ov+1..., -1, 1..., ov-1, ov */
114     /* needed to avoid subdomain matrices to replicate A since it is dense                           */
115     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)is[i]), &csize));
116     PetscCheck(csize == 1, PETSC_COMM_SELF, PETSC_ERR_WRONG_MPI_SIZE, "Unsupported parallel IS");
117     PetscCall(ISGetSize(is[i], &size));
118     PetscCall(ISGetIndices(is[i], &idx));
119     for (PetscInt j = 0; j < size; ++j) {
120       set.insert(idx[j]);
121       for (PetscInt k = 1; k <= ov; ++k) {                                              /* for each layer of overlap      */
122         if (idx[j] - k >= 0) set.insert(idx[j] - k);                                    /* do not insert negative indices */
123         if (idx[j] + k < A->rmap->N && idx[j] + k < A->cmap->N) set.insert(idx[j] + k); /* do not insert indices greater than the dimension of A */
124       }
125     }
126     PetscCall(ISRestoreIndices(is[i], &idx));
127     PetscCall(ISDestroy(is + i));
128     if (bs[0] > 1) {
129       for (std::set<PetscInt>::iterator it = set.cbegin(); it != set.cend(); it++) {
130         std::vector<PetscInt> block(bs[0]);
131         std::iota(block.begin(), block.end(), (*it / bs[0]) * bs[0]);
132         set.insert(block.cbegin(), block.cend());
133       }
134     }
135     size = set.size(); /* size with overlap */
136     PetscCall(PetscMalloc1(size, &oidx));
137     for (const PetscInt j : set) *oidx++ = j;
138     oidx -= size;
139     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, oidx, PETSC_OWN_POINTER, is + i));
140   }
141   PetscFunctionReturn(PETSC_SUCCESS);
142 }
143 
144 static PetscErrorCode MatCreateSubMatrices_Htool(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[])
145 {
146   Mat_Htool         *a;
147   Mat                D, B, BT;
148   const PetscScalar *copy;
149   PetscScalar       *ptr, shift, scale;
150   const PetscInt    *idxr, *idxc, *it;
151   PetscInt           nrow, m, i;
152   PetscBool          flg;
153 
154   PetscFunctionBegin;
155   PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
156   PetscCall(MatShellGetContext(A, &a));
157   if (scall != MAT_REUSE_MATRIX) PetscCall(PetscCalloc1(n, submat));
158   for (i = 0; i < n; ++i) {
159     PetscCall(ISGetLocalSize(irow[i], &nrow));
160     PetscCall(ISGetLocalSize(icol[i], &m));
161     PetscCall(ISGetIndices(irow[i], &idxr));
162     PetscCall(ISGetIndices(icol[i], &idxc));
163     if (scall != MAT_REUSE_MATRIX) PetscCall(MatCreateDense(PETSC_COMM_SELF, nrow, m, nrow, m, nullptr, (*submat) + i));
164     PetscCall(MatDenseGetArrayWrite((*submat)[i], &ptr));
165     if (irow[i] == icol[i]) { /* same row and column IS? */
166       PetscCall(MatHasCongruentLayouts(A, &flg));
167       if (flg) {
168         PetscCall(ISSorted(irow[i], &flg));
169         if (flg) { /* sorted IS? */
170           it = std::lower_bound(idxr, idxr + nrow, A->rmap->rstart);
171           if (it != idxr + nrow && *it == A->rmap->rstart) {    /* rmap->rstart in IS? */
172             if (std::distance(idxr, it) + A->rmap->n <= nrow) { /* long enough IS to store the local diagonal block? */
173               for (PetscInt j = 0; j < A->rmap->n && flg; ++j)
174                 if (PetscUnlikely(it[j] != A->rmap->rstart + j)) flg = PETSC_FALSE;
175               if (flg) { /* complete local diagonal block in IS? */
176                 /* fast extraction when the local diagonal block is part of the submatrix, e.g., for PCASM or PCHPDDM
177                  *      [   B   C   E   ]
178                  *  A = [   B   D   E   ]
179                  *      [   B   F   E   ]
180                  */
181                 m = std::distance(idxr, it); /* shift of the coefficient (0,0) of block D from above */
182                 PetscCall(MatGetDiagonalBlock(A, &D));
183                 PetscCall(MatDenseGetArrayRead(D, &copy));
184                 for (PetscInt k = 0; k < A->rmap->n; ++k) { PetscCall(PetscArraycpy(ptr + (m + k) * nrow + m, copy + k * A->rmap->n, A->rmap->n)); /* block D from above */ }
185                 PetscCall(MatDenseRestoreArrayRead(D, &copy));
186                 if (m) {
187                   a->wrapper->copy_submatrix(nrow, m, idxr, idxc, ptr); /* vertical block B from above */
188                   /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
189                   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) {
190                     PetscCall(MatCreateDense(PETSC_COMM_SELF, A->rmap->n, m, A->rmap->n, m, ptr + m, &B));
191                     PetscCall(MatDenseSetLDA(B, nrow));
192                     PetscCall(MatCreateDense(PETSC_COMM_SELF, m, A->rmap->n, m, A->rmap->n, ptr + m * nrow, &BT));
193                     PetscCall(MatDenseSetLDA(BT, nrow));
194                     if (A->hermitian == PETSC_BOOL3_TRUE && PetscDefined(USE_COMPLEX)) {
195                       PetscCall(MatHermitianTranspose(B, MAT_REUSE_MATRIX, &BT));
196                     } else {
197                       PetscCall(MatTransposeSetPrecursor(B, BT));
198                       PetscCall(MatTranspose(B, MAT_REUSE_MATRIX, &BT));
199                     }
200                     PetscCall(MatDestroy(&B));
201                     PetscCall(MatDestroy(&BT));
202                   } else {
203                     for (PetscInt k = 0; k < A->rmap->n; ++k) { /* block C from above */
204                       a->wrapper->copy_submatrix(m, 1, idxr, idxc + m + k, ptr + (m + k) * nrow);
205                     }
206                   }
207                 }
208                 if (m + A->rmap->n != nrow) {
209                   a->wrapper->copy_submatrix(nrow, std::distance(it + A->rmap->n, idxr + nrow), idxr, idxc + m + A->rmap->n, ptr + (m + A->rmap->n) * nrow); /* vertical block E from above */
210                   /* entry-wise assembly may be costly, so transpose already-computed entries when possible */
211                   if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) {
212                     PetscCall(MatCreateDense(PETSC_COMM_SELF, A->rmap->n, nrow - (m + A->rmap->n), A->rmap->n, nrow - (m + A->rmap->n), ptr + (m + A->rmap->n) * nrow + m, &B));
213                     PetscCall(MatDenseSetLDA(B, nrow));
214                     PetscCall(MatCreateDense(PETSC_COMM_SELF, nrow - (m + A->rmap->n), A->rmap->n, nrow - (m + A->rmap->n), A->rmap->n, ptr + m * nrow + m + A->rmap->n, &BT));
215                     PetscCall(MatDenseSetLDA(BT, nrow));
216                     if (A->hermitian == PETSC_BOOL3_TRUE && PetscDefined(USE_COMPLEX)) {
217                       PetscCall(MatHermitianTranspose(B, MAT_REUSE_MATRIX, &BT));
218                     } else {
219                       PetscCall(MatTransposeSetPrecursor(B, BT));
220                       PetscCall(MatTranspose(B, MAT_REUSE_MATRIX, &BT));
221                     }
222                     PetscCall(MatDestroy(&B));
223                     PetscCall(MatDestroy(&BT));
224                   } else {
225                     for (PetscInt k = 0; k < A->rmap->n; ++k) { /* block F from above */
226                       a->wrapper->copy_submatrix(std::distance(it + A->rmap->n, idxr + nrow), 1, it + A->rmap->n, idxc + m + k, ptr + (m + k) * nrow + m + A->rmap->n);
227                     }
228                   }
229                 }
230               } /* complete local diagonal block not in IS */
231             } else flg = PETSC_FALSE; /* IS not long enough to store the local diagonal block */
232           } else flg = PETSC_FALSE;   /* rmap->rstart not in IS */
233         } /* unsorted IS */
234       }
235     } else flg = PETSC_FALSE;                                       /* different row and column IS */
236     if (!flg) a->wrapper->copy_submatrix(nrow, m, idxr, idxc, ptr); /* reassemble everything */
237     PetscCall(ISRestoreIndices(irow[i], &idxr));
238     PetscCall(ISRestoreIndices(icol[i], &idxc));
239     PetscCall(MatDenseRestoreArrayWrite((*submat)[i], &ptr));
240     PetscCall(MatShift((*submat)[i], shift));
241     PetscCall(MatScale((*submat)[i], scale));
242   }
243   PetscFunctionReturn(PETSC_SUCCESS);
244 }
245 
246 static PetscErrorCode MatDestroy_Htool(Mat A)
247 {
248   Mat_Htool               *a;
249   PetscContainer           container;
250   MatHtoolKernelTranspose *kernelt;
251 
252   PetscFunctionBegin;
253   PetscCall(MatShellGetContext(A, &a));
254   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_seqdense_C", nullptr));
255   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_mpidense_C", nullptr));
256   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_seqdense_C", nullptr));
257   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_mpidense_C", nullptr));
258   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetHierarchicalMat_C", nullptr));
259   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetKernel_C", nullptr));
260   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationSource_C", nullptr));
261   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationTarget_C", nullptr));
262   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUsePermutation_C", nullptr));
263   PetscCall(PetscObjectQuery((PetscObject)A, "KernelTranspose", (PetscObject *)&container));
264   if (container) { /* created in MatTranspose_Htool() */
265     PetscCall(PetscContainerGetPointer(container, (void **)&kernelt));
266     PetscCall(MatDestroy(&kernelt->A));
267     PetscCall(PetscObjectCompose((PetscObject)A, "KernelTranspose", nullptr));
268   }
269   if (a->gcoords_source != a->gcoords_target) PetscCall(PetscFree(a->gcoords_source));
270   PetscCall(PetscFree(a->gcoords_target));
271   PetscCall(PetscFree2(a->work_source, a->work_target));
272   delete a->wrapper;
273   a->target_cluster.reset();
274   a->source_cluster.reset();
275   a->distributed_operator_holder.reset();
276   PetscCall(PetscFree(a));
277   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContext_C", nullptr)); // needed to avoid a call to MatShellSetContext_Immutable()
278   PetscFunctionReturn(PETSC_SUCCESS);
279 }
280 
281 static PetscErrorCode MatView_Htool(Mat A, PetscViewer pv)
282 {
283   Mat_Htool                         *a;
284   PetscScalar                        shift, scale;
285   PetscBool                          flg;
286   std::map<std::string, std::string> hmatrix_information;
287 
288   PetscFunctionBegin;
289   PetscCall(MatShellGetContext(A, &a));
290   hmatrix_information = htool::get_distributed_hmatrix_information(a->distributed_operator_holder->hmatrix, PetscObjectComm((PetscObject)A));
291   PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERASCII, &flg));
292   if (flg) {
293     PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
294     PetscCall(PetscViewerASCIIPrintf(pv, "symmetry: %c\n", a->distributed_operator_holder->distributed_operator.get_symmetry_type()));
295     if (PetscAbsScalar(scale - 1.0) > PETSC_MACHINE_EPSILON) {
296 #if defined(PETSC_USE_COMPLEX)
297       PetscCall(PetscViewerASCIIPrintf(pv, "scaling: %g+%gi\n", (double)PetscRealPart(scale), (double)PetscImaginaryPart(scale)));
298 #else
299       PetscCall(PetscViewerASCIIPrintf(pv, "scaling: %g\n", (double)scale));
300 #endif
301     }
302     if (PetscAbsScalar(shift) > PETSC_MACHINE_EPSILON) {
303 #if defined(PETSC_USE_COMPLEX)
304       PetscCall(PetscViewerASCIIPrintf(pv, "shift: %g+%gi\n", (double)PetscRealPart(shift), (double)PetscImaginaryPart(shift)));
305 #else
306       PetscCall(PetscViewerASCIIPrintf(pv, "shift: %g\n", (double)shift));
307 #endif
308     }
309     PetscCall(PetscViewerASCIIPrintf(pv, "minimum cluster size: %" PetscInt_FMT "\n", a->min_cluster_size));
310     PetscCall(PetscViewerASCIIPrintf(pv, "epsilon: %g\n", (double)a->epsilon));
311     PetscCall(PetscViewerASCIIPrintf(pv, "eta: %g\n", (double)a->eta));
312     PetscCall(PetscViewerASCIIPrintf(pv, "minimum target depth: %" PetscInt_FMT "\n", a->depth[0]));
313     PetscCall(PetscViewerASCIIPrintf(pv, "minimum source depth: %" PetscInt_FMT "\n", a->depth[1]));
314     PetscCall(PetscViewerASCIIPrintf(pv, "compressor: %s\n", MatHtoolCompressorTypes[a->compressor]));
315     PetscCall(PetscViewerASCIIPrintf(pv, "clustering: %s\n", MatHtoolClusteringTypes[a->clustering]));
316     PetscCall(PetscViewerASCIIPrintf(pv, "compression ratio: %s\n", hmatrix_information["Compression_ratio"].c_str()));
317     PetscCall(PetscViewerASCIIPrintf(pv, "space saving: %s\n", hmatrix_information["Space_saving"].c_str()));
318     PetscCall(PetscViewerASCIIPrintf(pv, "block tree consistency: %s\n", PetscBools[a->distributed_operator_holder->hmatrix.is_block_tree_consistent()]));
319     PetscCall(PetscViewerASCIIPrintf(pv, "number of dense (resp. low rank) matrices: %s (resp. %s)\n", hmatrix_information["Number_of_dense_blocks"].c_str(), hmatrix_information["Number_of_low_rank_blocks"].c_str()));
320     PetscCall(
321       PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) dense block sizes: (%s, %s, %s)\n", hmatrix_information["Dense_block_size_min"].c_str(), hmatrix_information["Dense_block_size_mean"].c_str(), hmatrix_information["Dense_block_size_max"].c_str()));
322     PetscCall(PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) low rank block sizes: (%s, %s, %s)\n", hmatrix_information["Low_rank_block_size_min"].c_str(), hmatrix_information["Low_rank_block_size_mean"].c_str(),
323                                      hmatrix_information["Low_rank_block_size_max"].c_str()));
324     PetscCall(PetscViewerASCIIPrintf(pv, "(minimum, mean, maximum) ranks: (%s, %s, %s)\n", hmatrix_information["Rank_min"].c_str(), hmatrix_information["Rank_mean"].c_str(), hmatrix_information["Rank_max"].c_str()));
325   }
326   PetscFunctionReturn(PETSC_SUCCESS);
327 }
328 
329 /* naive implementation of MatGetRow() needed for MatConvert_Nest_AIJ() */
330 static PetscErrorCode MatGetRow_Htool(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
331 {
332   Mat_Htool   *a;
333   PetscScalar  shift, scale;
334   PetscInt    *idxc;
335   PetscBLASInt one = 1, bn;
336 
337   PetscFunctionBegin;
338   PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
339   PetscCall(MatShellGetContext(A, &a));
340   if (nz) *nz = A->cmap->N;
341   if (idx || v) { /* even if !idx, need to set idxc for htool::copy_submatrix() */
342     PetscCall(PetscMalloc1(A->cmap->N, &idxc));
343     for (PetscInt i = 0; i < A->cmap->N; ++i) idxc[i] = i;
344   }
345   if (idx) *idx = idxc;
346   if (v) {
347     PetscCall(PetscMalloc1(A->cmap->N, v));
348     if (a->wrapper) a->wrapper->copy_submatrix(1, A->cmap->N, &row, idxc, *v);
349     else reinterpret_cast<htool::VirtualGenerator<PetscScalar> *>(a->kernelctx)->copy_submatrix(1, A->cmap->N, &row, idxc, *v);
350     PetscCall(PetscBLASIntCast(A->cmap->N, &bn));
351     PetscCallBLAS("BLASscal", BLASscal_(&bn, &scale, *v, &one));
352     if (row < A->cmap->N) (*v)[row] += shift;
353   }
354   if (!idx) PetscCall(PetscFree(idxc));
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 static PetscErrorCode MatRestoreRow_Htool(Mat, PetscInt, PetscInt *, PetscInt **idx, PetscScalar **v)
359 {
360   PetscFunctionBegin;
361   if (idx) PetscCall(PetscFree(*idx));
362   if (v) PetscCall(PetscFree(*v));
363   PetscFunctionReturn(PETSC_SUCCESS);
364 }
365 
366 static PetscErrorCode MatSetFromOptions_Htool(Mat A, PetscOptionItems PetscOptionsObject)
367 {
368   Mat_Htool *a;
369   PetscInt   n;
370   PetscBool  flg;
371 
372   PetscFunctionBegin;
373   PetscCall(MatShellGetContext(A, &a));
374   PetscOptionsHeadBegin(PetscOptionsObject, "Htool options");
375   PetscCall(PetscOptionsBoundedInt("-mat_htool_min_cluster_size", "Minimal leaf size in cluster tree", nullptr, a->min_cluster_size, &a->min_cluster_size, nullptr, 0));
376   PetscCall(PetscOptionsBoundedReal("-mat_htool_epsilon", "Relative error in Frobenius norm when approximating a block", nullptr, a->epsilon, &a->epsilon, nullptr, 0.0));
377   PetscCall(PetscOptionsReal("-mat_htool_eta", "Admissibility condition tolerance", nullptr, a->eta, &a->eta, nullptr));
378   PetscCall(PetscOptionsBoundedInt("-mat_htool_min_target_depth", "Minimal cluster tree depth associated with the rows", nullptr, a->depth[0], a->depth, nullptr, 0));
379   PetscCall(PetscOptionsBoundedInt("-mat_htool_min_source_depth", "Minimal cluster tree depth associated with the columns", nullptr, a->depth[1], a->depth + 1, nullptr, 0));
380   PetscCall(PetscOptionsBool("-mat_htool_block_tree_consistency", "Block tree consistency", nullptr, a->block_tree_consistency, &a->block_tree_consistency, nullptr));
381 
382   n = 0;
383   PetscCall(PetscOptionsEList("-mat_htool_compressor", "Type of compression", "MatHtoolCompressorType", MatHtoolCompressorTypes, PETSC_STATIC_ARRAY_LENGTH(MatHtoolCompressorTypes), MatHtoolCompressorTypes[MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA], &n, &flg));
384   if (flg) a->compressor = MatHtoolCompressorType(n);
385   n = 0;
386   PetscCall(PetscOptionsEList("-mat_htool_clustering", "Type of clustering", "MatHtoolClusteringType", MatHtoolClusteringTypes, PETSC_STATIC_ARRAY_LENGTH(MatHtoolClusteringTypes), MatHtoolClusteringTypes[MAT_HTOOL_CLUSTERING_PCA_REGULAR], &n, &flg));
387   if (flg) a->clustering = MatHtoolClusteringType(n);
388   PetscOptionsHeadEnd();
389   PetscFunctionReturn(PETSC_SUCCESS);
390 }
391 
392 static PetscErrorCode MatAssemblyEnd_Htool(Mat A, MatAssemblyType)
393 {
394   Mat_Htool                                                   *a;
395   const PetscInt                                              *ranges;
396   PetscInt                                                    *offset;
397   PetscMPIInt                                                  size, rank;
398   char                                                         S = PetscDefined(USE_COMPLEX) && A->hermitian == PETSC_BOOL3_TRUE ? 'H' : (A->symmetric == PETSC_BOOL3_TRUE ? 'S' : 'N'), uplo = S == 'N' ? 'N' : 'U';
399   htool::VirtualGenerator<PetscScalar>                        *generator = nullptr;
400   htool::ClusterTreeBuilder<PetscReal>                         recursive_build_strategy;
401   htool::Cluster<PetscReal>                                   *source_cluster;
402   std::shared_ptr<htool::VirtualLowRankGenerator<PetscScalar>> compressor;
403 
404   PetscFunctionBegin;
405   PetscCall(PetscCitationsRegister(HtoolCitation, &HtoolCite));
406   PetscCall(MatShellGetContext(A, &a));
407   delete a->wrapper;
408   a->target_cluster.reset();
409   a->source_cluster.reset();
410   a->distributed_operator_holder.reset();
411   // clustering
412   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
413   PetscCall(PetscMalloc1(2 * size, &offset));
414   PetscCall(MatGetOwnershipRanges(A, &ranges));
415   for (PetscInt i = 0; i < size; ++i) {
416     offset[2 * i]     = ranges[i];
417     offset[2 * i + 1] = ranges[i + 1] - ranges[i];
418   }
419   switch (a->clustering) {
420   case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
421     recursive_build_strategy.set_direction_computation_strategy(std::make_shared<htool::ComputeLargestExtent<PetscReal>>());
422     recursive_build_strategy.set_splitting_strategy(std::make_shared<htool::GeometricSplitting<PetscReal>>());
423     break;
424   case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
425     recursive_build_strategy.set_direction_computation_strategy(std::make_shared<htool::ComputeBoundingBox<PetscReal>>());
426     recursive_build_strategy.set_splitting_strategy(std::make_shared<htool::GeometricSplitting<PetscReal>>());
427     break;
428   case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
429     recursive_build_strategy.set_direction_computation_strategy(std::make_shared<htool::ComputeBoundingBox<PetscReal>>());
430     recursive_build_strategy.set_splitting_strategy(std::make_shared<htool::RegularSplitting<PetscReal>>());
431     break;
432   default:
433     recursive_build_strategy.set_direction_computation_strategy(std::make_shared<htool::ComputeLargestExtent<PetscReal>>());
434     recursive_build_strategy.set_splitting_strategy(std::make_shared<htool::RegularSplitting<PetscReal>>());
435   }
436   recursive_build_strategy.set_minclustersize(a->min_cluster_size);
437   a->target_cluster = std::make_unique<htool::Cluster<PetscReal>>(recursive_build_strategy.create_cluster_tree(A->rmap->N, a->dim, a->gcoords_target, 2, size, offset));
438   if (a->gcoords_target != a->gcoords_source) {
439     PetscCall(MatGetOwnershipRangesColumn(A, &ranges));
440     for (PetscInt i = 0; i < size; ++i) {
441       offset[2 * i]     = ranges[i];
442       offset[2 * i + 1] = ranges[i + 1] - ranges[i];
443     }
444     switch (a->clustering) {
445     case MAT_HTOOL_CLUSTERING_PCA_GEOMETRIC:
446       recursive_build_strategy.set_direction_computation_strategy(std::make_shared<htool::ComputeLargestExtent<PetscReal>>());
447       recursive_build_strategy.set_splitting_strategy(std::make_shared<htool::GeometricSplitting<PetscReal>>());
448       break;
449     case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_GEOMETRIC:
450       recursive_build_strategy.set_direction_computation_strategy(std::make_shared<htool::ComputeBoundingBox<PetscReal>>());
451       recursive_build_strategy.set_splitting_strategy(std::make_shared<htool::GeometricSplitting<PetscReal>>());
452       break;
453     case MAT_HTOOL_CLUSTERING_BOUNDING_BOX_1_REGULAR:
454       recursive_build_strategy.set_direction_computation_strategy(std::make_shared<htool::ComputeBoundingBox<PetscReal>>());
455       recursive_build_strategy.set_splitting_strategy(std::make_shared<htool::RegularSplitting<PetscReal>>());
456       break;
457     default:
458       recursive_build_strategy.set_direction_computation_strategy(std::make_shared<htool::ComputeLargestExtent<PetscReal>>());
459       recursive_build_strategy.set_splitting_strategy(std::make_shared<htool::RegularSplitting<PetscReal>>());
460     }
461     recursive_build_strategy.set_minclustersize(a->min_cluster_size);
462     a->source_cluster = std::make_unique<htool::Cluster<PetscReal>>(recursive_build_strategy.create_cluster_tree(A->cmap->N, a->dim, a->gcoords_source, 2, size, offset));
463     S = uplo       = 'N';
464     source_cluster = a->source_cluster.get();
465   } else source_cluster = a->target_cluster.get();
466   PetscCall(PetscFree(offset));
467   // generator
468   if (a->kernel) a->wrapper = new WrapperHtool(a->dim, a->kernel, a->kernelctx);
469   else {
470     a->wrapper = nullptr;
471     generator  = reinterpret_cast<htool::VirtualGenerator<PetscScalar> *>(a->kernelctx);
472   }
473   // compressor
474   switch (a->compressor) {
475   case MAT_HTOOL_COMPRESSOR_FULL_ACA:
476     compressor = std::make_shared<htool::fullACA<PetscScalar>>();
477     break;
478   case MAT_HTOOL_COMPRESSOR_SVD:
479     compressor = std::make_shared<htool::SVD<PetscScalar>>();
480     break;
481   default:
482     compressor = std::make_shared<htool::sympartialACA<PetscScalar>>();
483   }
484   // local hierarchical matrix
485   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
486   auto hmatrix_builder = htool::HMatrixTreeBuilder<PetscScalar>(*a->target_cluster, *source_cluster, a->epsilon, a->eta, S, uplo, -1, rank, rank);
487   hmatrix_builder.set_low_rank_generator(compressor);
488   hmatrix_builder.set_minimal_target_depth(a->depth[0]);
489   hmatrix_builder.set_minimal_source_depth(a->depth[1]);
490   PetscCheck(a->block_tree_consistency || (!a->block_tree_consistency && !(A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE)), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot have a MatHtool with inconsistent block tree which is either symmetric or Hermitian");
491   hmatrix_builder.set_block_tree_consistency(a->block_tree_consistency);
492   a->distributed_operator_holder = std::make_unique<htool::DistributedOperatorFromHMatrix<PetscScalar>>(a->wrapper ? *a->wrapper : *generator, *a->target_cluster, *source_cluster, hmatrix_builder, PetscObjectComm((PetscObject)A));
493   PetscFunctionReturn(PETSC_SUCCESS);
494 }
495 
496 static PetscErrorCode MatProductNumeric_Htool(Mat C)
497 {
498   Mat_Product       *product = C->product;
499   Mat_Htool         *a;
500   const PetscScalar *in;
501   PetscScalar       *out;
502   PetscInt           N, lda;
503 
504   PetscFunctionBegin;
505   MatCheckProduct(C, 1);
506   PetscCall(MatGetSize(C, nullptr, &N));
507   PetscCall(MatDenseGetLDA(C, &lda));
508   PetscCheck(lda == C->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")", lda, C->rmap->n);
509   PetscCall(MatDenseGetArrayRead(product->B, &in));
510   PetscCall(MatDenseGetArrayWrite(C, &out));
511   PetscCall(MatShellGetContext(product->A, &a));
512   switch (product->type) {
513   case MATPRODUCT_AB:
514     a->distributed_operator_holder->distributed_operator.matrix_product_local_to_local(in, out, N, nullptr);
515     break;
516   case MATPRODUCT_AtB:
517     a->distributed_operator_holder->distributed_operator.matrix_product_transp_local_to_local(in, out, N, nullptr);
518     break;
519   default:
520     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatProductType %s is not supported", MatProductTypes[product->type]);
521   }
522   PetscCall(MatDenseRestoreArrayWrite(C, &out));
523   PetscCall(MatDenseRestoreArrayRead(product->B, &in));
524   PetscFunctionReturn(PETSC_SUCCESS);
525 }
526 
527 static PetscErrorCode MatProductSymbolic_Htool(Mat C)
528 {
529   Mat_Product *product = C->product;
530   Mat          A, B;
531   PetscBool    flg;
532 
533   PetscFunctionBegin;
534   MatCheckProduct(C, 1);
535   A = product->A;
536   B = product->B;
537   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, ""));
538   PetscCheck(flg && (product->type == MATPRODUCT_AB || product->type == MATPRODUCT_AtB), PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "ProductType %s not supported for %s", MatProductTypes[product->type], ((PetscObject)product->B)->type_name);
539   if (C->rmap->n == PETSC_DECIDE || C->cmap->n == PETSC_DECIDE || C->rmap->N == PETSC_DECIDE || C->cmap->N == PETSC_DECIDE) {
540     if (product->type == MATPRODUCT_AB) PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
541     else PetscCall(MatSetSizes(C, A->cmap->n, B->cmap->n, A->cmap->N, B->cmap->N));
542   }
543   PetscCall(MatSetType(C, MATDENSE));
544   PetscCall(MatSetUp(C));
545   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
546   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
547   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
548   C->ops->productsymbolic = nullptr;
549   C->ops->productnumeric  = MatProductNumeric_Htool;
550   PetscFunctionReturn(PETSC_SUCCESS);
551 }
552 
553 static PetscErrorCode MatProductSetFromOptions_Htool(Mat C)
554 {
555   PetscFunctionBegin;
556   MatCheckProduct(C, 1);
557   if (C->product->type == MATPRODUCT_AB || C->product->type == MATPRODUCT_AtB) C->ops->productsymbolic = MatProductSymbolic_Htool;
558   PetscFunctionReturn(PETSC_SUCCESS);
559 }
560 
561 static PetscErrorCode MatHtoolGetHierarchicalMat_Htool(Mat A, const htool::DistributedOperator<PetscScalar> **distributed_operator)
562 {
563   Mat_Htool *a;
564 
565   PetscFunctionBegin;
566   PetscCall(MatShellGetContext(A, &a));
567   *distributed_operator = &a->distributed_operator_holder->distributed_operator;
568   PetscFunctionReturn(PETSC_SUCCESS);
569 }
570 
571 /*@C
572   MatHtoolGetHierarchicalMat - Retrieves the opaque pointer to a Htool virtual matrix stored in a `MATHTOOL`.
573 
574   No Fortran Support, No C Support
575 
576   Input Parameter:
577 . A - hierarchical matrix
578 
579   Output Parameter:
580 . distributed_operator - opaque pointer to a Htool virtual matrix
581 
582   Level: advanced
583 
584 .seealso: [](ch_matrices), `Mat`, `MATHTOOL`
585 @*/
586 PETSC_EXTERN PetscErrorCode MatHtoolGetHierarchicalMat(Mat A, const htool::DistributedOperator<PetscScalar> **distributed_operator)
587 {
588   PetscFunctionBegin;
589   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
590   PetscAssertPointer(distributed_operator, 2);
591   PetscTryMethod(A, "MatHtoolGetHierarchicalMat_C", (Mat, const htool::DistributedOperator<PetscScalar> **), (A, distributed_operator));
592   PetscFunctionReturn(PETSC_SUCCESS);
593 }
594 
595 static PetscErrorCode MatHtoolSetKernel_Htool(Mat A, MatHtoolKernelFn *kernel, void *kernelctx)
596 {
597   Mat_Htool *a;
598 
599   PetscFunctionBegin;
600   PetscCall(MatShellGetContext(A, &a));
601   a->kernel    = kernel;
602   a->kernelctx = kernelctx;
603   delete a->wrapper;
604   if (a->kernel) a->wrapper = new WrapperHtool(a->dim, a->kernel, a->kernelctx);
605   PetscFunctionReturn(PETSC_SUCCESS);
606 }
607 
608 /*@C
609   MatHtoolSetKernel - Sets the kernel and context used for the assembly of a `MATHTOOL`.
610 
611   Collective, No Fortran Support
612 
613   Input Parameters:
614 + A         - hierarchical matrix
615 . kernel    - computational kernel (or `NULL`)
616 - kernelctx - kernel context (if kernel is `NULL`, the pointer must be of type htool::VirtualGenerator<PetscScalar>*)
617 
618   Level: advanced
619 
620 .seealso: [](ch_matrices), `Mat`, `MATHTOOL`, `MatCreateHtoolFromKernel()`
621 @*/
622 PetscErrorCode MatHtoolSetKernel(Mat A, MatHtoolKernelFn *kernel, void *kernelctx)
623 {
624   PetscFunctionBegin;
625   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
626   if (!kernelctx) PetscValidFunction(kernel, 2);
627   if (!kernel) PetscAssertPointer(kernelctx, 3);
628   PetscTryMethod(A, "MatHtoolSetKernel_C", (Mat, MatHtoolKernelFn *, void *), (A, kernel, kernelctx));
629   PetscFunctionReturn(PETSC_SUCCESS);
630 }
631 
632 static PetscErrorCode MatHtoolGetPermutationSource_Htool(Mat A, IS *is)
633 {
634   Mat_Htool                       *a;
635   PetscMPIInt                      rank;
636   const std::vector<PetscInt>     *source;
637   const htool::Cluster<PetscReal> *local_source_cluster;
638 
639   PetscFunctionBegin;
640   PetscCall(MatShellGetContext(A, &a));
641   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
642   local_source_cluster = a->source_cluster ? &a->source_cluster->get_cluster_on_partition(rank) : &a->target_cluster->get_cluster_on_partition(rank);
643   source               = &local_source_cluster->get_permutation();
644   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), local_source_cluster->get_size(), source->data() + local_source_cluster->get_offset(), PETSC_COPY_VALUES, is));
645   PetscCall(ISSetPermutation(*is));
646   PetscFunctionReturn(PETSC_SUCCESS);
647 }
648 
649 /*@
650   MatHtoolGetPermutationSource - Gets the permutation associated to the source cluster for a `MATHTOOL` matrix.
651 
652   Input Parameter:
653 . A - hierarchical matrix
654 
655   Output Parameter:
656 . is - permutation
657 
658   Level: advanced
659 
660 .seealso: [](ch_matrices), `Mat`, `MATHTOOL`, `MatHtoolGetPermutationTarget()`, `MatHtoolUsePermutation()`
661 @*/
662 PetscErrorCode MatHtoolGetPermutationSource(Mat A, IS *is)
663 {
664   PetscFunctionBegin;
665   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
666   if (!is) PetscAssertPointer(is, 2);
667   PetscTryMethod(A, "MatHtoolGetPermutationSource_C", (Mat, IS *), (A, is));
668   PetscFunctionReturn(PETSC_SUCCESS);
669 }
670 
671 static PetscErrorCode MatHtoolGetPermutationTarget_Htool(Mat A, IS *is)
672 {
673   Mat_Htool                   *a;
674   const std::vector<PetscInt> *target;
675   PetscMPIInt                  rank;
676 
677   PetscFunctionBegin;
678   PetscCall(MatShellGetContext(A, &a));
679   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
680   target = &a->target_cluster->get_permutation();
681   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), a->target_cluster->get_cluster_on_partition(rank).get_size(), target->data() + a->target_cluster->get_cluster_on_partition(rank).get_offset(), PETSC_COPY_VALUES, is));
682   PetscCall(ISSetPermutation(*is));
683   PetscFunctionReturn(PETSC_SUCCESS);
684 }
685 
686 /*@
687   MatHtoolGetPermutationTarget - Gets the permutation associated to the target cluster for a `MATHTOOL` matrix.
688 
689   Input Parameter:
690 . A - hierarchical matrix
691 
692   Output Parameter:
693 . is - permutation
694 
695   Level: advanced
696 
697 .seealso: [](ch_matrices), `Mat`, `MATHTOOL`, `MatHtoolGetPermutationSource()`, `MatHtoolUsePermutation()`
698 @*/
699 PetscErrorCode MatHtoolGetPermutationTarget(Mat A, IS *is)
700 {
701   PetscFunctionBegin;
702   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
703   if (!is) PetscAssertPointer(is, 2);
704   PetscTryMethod(A, "MatHtoolGetPermutationTarget_C", (Mat, IS *), (A, is));
705   PetscFunctionReturn(PETSC_SUCCESS);
706 }
707 
708 static PetscErrorCode MatHtoolUsePermutation_Htool(Mat A, PetscBool use)
709 {
710   Mat_Htool *a;
711 
712   PetscFunctionBegin;
713   PetscCall(MatShellGetContext(A, &a));
714   a->distributed_operator_holder->distributed_operator.use_permutation() = use;
715   PetscFunctionReturn(PETSC_SUCCESS);
716 }
717 
718 /*@
719   MatHtoolUsePermutation - Sets whether a `MATHTOOL` matrix should permute input (resp. output) vectors following its internal source (resp. target) permutation.
720 
721   Input Parameters:
722 + A   - hierarchical matrix
723 - use - Boolean value
724 
725   Level: advanced
726 
727 .seealso: [](ch_matrices), `Mat`, `MATHTOOL`, `MatHtoolGetPermutationSource()`, `MatHtoolGetPermutationTarget()`
728 @*/
729 PetscErrorCode MatHtoolUsePermutation(Mat A, PetscBool use)
730 {
731   PetscFunctionBegin;
732   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
733   PetscValidLogicalCollectiveBool(A, use, 2);
734   PetscTryMethod(A, "MatHtoolUsePermutation_C", (Mat, PetscBool), (A, use));
735   PetscFunctionReturn(PETSC_SUCCESS);
736 }
737 
738 static PetscErrorCode MatConvert_Htool_Dense(Mat A, MatType, MatReuse reuse, Mat *B)
739 {
740   Mat          C;
741   Mat_Htool   *a;
742   PetscScalar *array, shift, scale;
743   PetscInt     lda;
744 
745   PetscFunctionBegin;
746   PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
747   PetscCall(MatShellGetContext(A, &a));
748   if (reuse == MAT_REUSE_MATRIX) {
749     C = *B;
750     PetscCheck(C->rmap->n == A->rmap->n && C->cmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible dimensions");
751     PetscCall(MatDenseGetLDA(C, &lda));
752     PetscCheck(lda == C->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported leading dimension (%" PetscInt_FMT " != %" PetscInt_FMT ")", lda, C->rmap->n);
753   } else {
754     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
755     PetscCall(MatSetSizes(C, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
756     PetscCall(MatSetType(C, MATDENSE));
757     PetscCall(MatSetUp(C));
758     PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
759   }
760   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
761   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
762   PetscCall(MatDenseGetArrayWrite(C, &array));
763   htool::copy_to_dense_in_user_numbering(a->distributed_operator_holder->hmatrix, array);
764   PetscCall(MatDenseRestoreArrayWrite(C, &array));
765   PetscCall(MatShift(C, shift));
766   PetscCall(MatScale(C, scale));
767   if (reuse == MAT_INPLACE_MATRIX) {
768     PetscCall(MatHeaderReplace(A, &C));
769   } else *B = C;
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
773 static PetscErrorCode GenEntriesTranspose(PetscInt sdim, PetscInt M, PetscInt N, const PetscInt *rows, const PetscInt *cols, PetscScalar *ptr, void *ctx)
774 {
775   MatHtoolKernelTranspose *generator = (MatHtoolKernelTranspose *)ctx;
776   PetscScalar             *tmp;
777 
778   PetscFunctionBegin;
779   PetscCall(generator->kernel(sdim, N, M, cols, rows, ptr, generator->kernelctx));
780   PetscCall(PetscMalloc1(M * N, &tmp));
781   PetscCall(PetscArraycpy(tmp, ptr, M * N));
782   for (PetscInt i = 0; i < M; ++i) {
783     for (PetscInt j = 0; j < N; ++j) ptr[i + j * M] = tmp[j + i * N];
784   }
785   PetscCall(PetscFree(tmp));
786   PetscFunctionReturn(PETSC_SUCCESS);
787 }
788 
789 /* naive implementation which keeps a reference to the original Mat */
790 static PetscErrorCode MatTranspose_Htool(Mat A, MatReuse reuse, Mat *B)
791 {
792   Mat                      C;
793   Mat_Htool               *a, *c;
794   PetscScalar              shift, scale;
795   PetscInt                 M = A->rmap->N, N = A->cmap->N, m = A->rmap->n, n = A->cmap->n;
796   PetscContainer           container;
797   MatHtoolKernelTranspose *kernelt;
798 
799   PetscFunctionBegin;
800   PetscCall(MatShellGetScalingShifts(A, &shift, &scale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
801   PetscCall(MatShellGetContext(A, &a));
802   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
803   PetscCheck(reuse != MAT_INPLACE_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatTranspose() with MAT_INPLACE_MATRIX not supported");
804   if (reuse == MAT_INITIAL_MATRIX) {
805     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
806     PetscCall(MatSetSizes(C, n, m, N, M));
807     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
808     PetscCall(MatSetUp(C));
809     PetscCall(PetscNew(&kernelt));
810     PetscCall(PetscObjectContainerCompose((PetscObject)C, "KernelTranspose", kernelt, PetscCtxDestroyDefault));
811   } else {
812     C = *B;
813     PetscCall(PetscObjectQuery((PetscObject)C, "KernelTranspose", (PetscObject *)&container));
814     PetscCheck(container, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatTranspose() with MAT_INITIAL_MATRIX first");
815     PetscCall(PetscContainerGetPointer(container, (void **)&kernelt));
816   }
817   PetscCall(MatShellGetContext(C, &c));
818   c->dim = a->dim;
819   PetscCall(MatShift(C, shift));
820   PetscCall(MatScale(C, scale));
821   c->kernel = GenEntriesTranspose;
822   if (kernelt->A != A) {
823     PetscCall(MatDestroy(&kernelt->A));
824     kernelt->A = A;
825     PetscCall(PetscObjectReference((PetscObject)A));
826   }
827   kernelt->kernel    = a->kernel;
828   kernelt->kernelctx = a->kernelctx;
829   c->kernelctx       = kernelt;
830   if (reuse == MAT_INITIAL_MATRIX) {
831     PetscCall(PetscMalloc1(N * c->dim, &c->gcoords_target));
832     PetscCall(PetscArraycpy(c->gcoords_target, a->gcoords_source, N * c->dim));
833     if (a->gcoords_target != a->gcoords_source) {
834       PetscCall(PetscMalloc1(M * c->dim, &c->gcoords_source));
835       PetscCall(PetscArraycpy(c->gcoords_source, a->gcoords_target, M * c->dim));
836     } else c->gcoords_source = c->gcoords_target;
837     PetscCall(PetscCalloc2(M, &c->work_source, N, &c->work_target));
838   }
839   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
840   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
841   if (reuse == MAT_INITIAL_MATRIX) *B = C;
842   PetscFunctionReturn(PETSC_SUCCESS);
843 }
844 
845 static PetscErrorCode MatDestroy_Factor(Mat F)
846 {
847   PetscContainer               container;
848   htool::HMatrix<PetscScalar> *A;
849 
850   PetscFunctionBegin;
851   PetscCall(PetscObjectQuery((PetscObject)F, "HMatrix", (PetscObject *)&container));
852   if (container) {
853     PetscCall(PetscContainerGetPointer(container, (void **)&A));
854     delete A;
855     PetscCall(PetscObjectCompose((PetscObject)F, "HMatrix", nullptr));
856   }
857   PetscCall(PetscObjectComposeFunction((PetscObject)F, "MatFactorGetSolverType_C", nullptr));
858   PetscFunctionReturn(PETSC_SUCCESS);
859 }
860 
861 static PetscErrorCode MatFactorGetSolverType_Htool(Mat, MatSolverType *type)
862 {
863   PetscFunctionBegin;
864   *type = MATSOLVERHTOOL;
865   PetscFunctionReturn(PETSC_SUCCESS);
866 }
867 
868 template <char trans>
869 static inline PetscErrorCode MatSolve_Private(Mat A, htool::Matrix<PetscScalar> &X)
870 {
871   PetscContainer               container;
872   htool::HMatrix<PetscScalar> *B;
873 
874   PetscFunctionBegin;
875   PetscCheck(A->factortype == MAT_FACTOR_LU || A->factortype == MAT_FACTOR_CHOLESKY, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_UNKNOWN_TYPE, "Only MAT_LU_FACTOR and MAT_CHOLESKY_FACTOR are supported");
876   PetscCall(PetscObjectQuery((PetscObject)A, "HMatrix", (PetscObject *)&container));
877   PetscCheck(container, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call Mat%sFactorNumeric() before Mat%sSolve%s()", A->factortype == MAT_FACTOR_LU ? "LU" : "Cholesky", X.nb_cols() == 1 ? "" : "Mat", trans == 'N' ? "" : "Transpose");
878   PetscCall(PetscContainerGetPointer(container, (void **)&B));
879   if (A->factortype == MAT_FACTOR_LU) htool::lu_solve(trans, *B, X);
880   else htool::cholesky_solve('L', *B, X);
881   PetscFunctionReturn(PETSC_SUCCESS);
882 }
883 
884 template <char trans, class Type, typename std::enable_if<std::is_same<Type, Vec>::value>::type * = nullptr>
885 static PetscErrorCode MatSolve_Htool(Mat A, Type b, Type x)
886 {
887   PetscInt                   n;
888   htool::Matrix<PetscScalar> v;
889   PetscScalar               *array;
890 
891   PetscFunctionBegin;
892   PetscCall(VecGetLocalSize(b, &n));
893   PetscCall(VecCopy(b, x));
894   PetscCall(VecGetArrayWrite(x, &array));
895   v.assign(n, 1, array, false);
896   PetscCall(VecRestoreArrayWrite(x, &array));
897   PetscCall(MatSolve_Private<trans>(A, v));
898   PetscFunctionReturn(PETSC_SUCCESS);
899 }
900 
901 template <char trans, class Type, typename std::enable_if<std::is_same<Type, Mat>::value>::type * = nullptr>
902 static PetscErrorCode MatSolve_Htool(Mat A, Type B, Type X)
903 {
904   PetscInt                   m, N;
905   htool::Matrix<PetscScalar> v;
906   PetscScalar               *array;
907 
908   PetscFunctionBegin;
909   PetscCall(MatGetLocalSize(B, &m, nullptr));
910   PetscCall(MatGetLocalSize(B, nullptr, &N));
911   PetscCall(MatCopy(B, X, SAME_NONZERO_PATTERN));
912   PetscCall(MatDenseGetArrayWrite(X, &array));
913   v.assign(m, N, array, false);
914   PetscCall(MatDenseRestoreArrayWrite(X, &array));
915   PetscCall(MatSolve_Private<trans>(A, v));
916   PetscFunctionReturn(PETSC_SUCCESS);
917 }
918 
919 template <MatFactorType ftype>
920 static PetscErrorCode MatFactorNumeric_Htool(Mat F, Mat A, const MatFactorInfo *)
921 {
922   Mat_Htool                   *a;
923   htool::HMatrix<PetscScalar> *B;
924 
925   PetscFunctionBegin;
926   PetscCall(MatShellGetContext(A, &a));
927   B = new htool::HMatrix<PetscScalar>(a->distributed_operator_holder->hmatrix);
928   if (ftype == MAT_FACTOR_LU) htool::lu_factorization(*B);
929   else htool::cholesky_factorization('L', *B);
930   PetscCall(PetscObjectContainerCompose((PetscObject)F, "HMatrix", B, nullptr));
931   PetscFunctionReturn(PETSC_SUCCESS);
932 }
933 
934 template <MatFactorType ftype>
935 PetscErrorCode MatFactorSymbolic_Htool(Mat F, Mat)
936 {
937   PetscFunctionBegin;
938   F->preallocated  = PETSC_TRUE;
939   F->assembled     = PETSC_TRUE;
940   F->ops->solve    = MatSolve_Htool<'N', Vec>;
941   F->ops->matsolve = MatSolve_Htool<'N', Mat>;
942   if (!PetscDefined(USE_COMPLEX) || ftype == MAT_FACTOR_LU) {
943     F->ops->solvetranspose    = MatSolve_Htool<'T', Vec>;
944     F->ops->matsolvetranspose = MatSolve_Htool<'T', Mat>;
945   }
946   F->ops->destroy = MatDestroy_Factor;
947   if (ftype == MAT_FACTOR_LU) F->ops->lufactornumeric = MatFactorNumeric_Htool<MAT_FACTOR_LU>;
948   else F->ops->choleskyfactornumeric = MatFactorNumeric_Htool<MAT_FACTOR_CHOLESKY>;
949   PetscFunctionReturn(PETSC_SUCCESS);
950 }
951 
952 static PetscErrorCode MatLUFactorSymbolic_Htool(Mat F, Mat A, IS, IS, const MatFactorInfo *)
953 {
954   PetscFunctionBegin;
955   PetscCall(MatFactorSymbolic_Htool<MAT_FACTOR_LU>(F, A));
956   PetscFunctionReturn(PETSC_SUCCESS);
957 }
958 
959 static PetscErrorCode MatCholeskyFactorSymbolic_Htool(Mat F, Mat A, IS, const MatFactorInfo *)
960 {
961   PetscFunctionBegin;
962   PetscCall(MatFactorSymbolic_Htool<MAT_FACTOR_CHOLESKY>(F, A));
963   PetscFunctionReturn(PETSC_SUCCESS);
964 }
965 
966 static PetscErrorCode MatGetFactor_htool_htool(Mat A, MatFactorType ftype, Mat *F)
967 {
968   Mat         B;
969   Mat_Htool  *a;
970   PetscMPIInt size;
971 
972   PetscFunctionBegin;
973   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
974   PetscCall(MatShellGetContext(A, &a));
975   PetscCheck(size == 1, PetscObjectComm((PetscObject)A), PETSC_ERR_WRONG_MPI_SIZE, "Unsupported parallel MatGetFactor()");
976   PetscCheck(a->block_tree_consistency, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Cannot factor a MatHtool with inconsistent block tree");
977   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
978   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
979   PetscCall(PetscStrallocpy(MATSOLVERHTOOL, &((PetscObject)B)->type_name));
980   PetscCall(MatSetUp(B));
981 
982   B->ops->getinfo    = MatGetInfo_External;
983   B->factortype      = ftype;
984   B->trivialsymbolic = PETSC_TRUE;
985 
986   if (ftype == MAT_FACTOR_LU) B->ops->lufactorsymbolic = MatLUFactorSymbolic_Htool;
987   else if (ftype == MAT_FACTOR_CHOLESKY) B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_Htool;
988 
989   PetscCall(PetscFree(B->solvertype));
990   PetscCall(PetscStrallocpy(MATSOLVERHTOOL, &B->solvertype));
991 
992   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_Htool));
993   *F = B;
994   PetscFunctionReturn(PETSC_SUCCESS);
995 }
996 
997 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_Htool(void)
998 {
999   PetscFunctionBegin;
1000   PetscCall(MatSolverTypeRegister(MATSOLVERHTOOL, MATHTOOL, MAT_FACTOR_LU, MatGetFactor_htool_htool));
1001   PetscCall(MatSolverTypeRegister(MATSOLVERHTOOL, MATHTOOL, MAT_FACTOR_CHOLESKY, MatGetFactor_htool_htool));
1002   PetscFunctionReturn(PETSC_SUCCESS);
1003 }
1004 
1005 /*@C
1006   MatCreateHtoolFromKernel - Creates a `MATHTOOL` from a user-supplied kernel.
1007 
1008   Collective, No Fortran Support
1009 
1010   Input Parameters:
1011 + comm          - MPI communicator
1012 . m             - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
1013 . n             - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
1014 . M             - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
1015 . N             - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
1016 . spacedim      - dimension of the space coordinates
1017 . coords_target - coordinates of the target
1018 . coords_source - coordinates of the source
1019 . kernel        - computational kernel (or `NULL`)
1020 - kernelctx     - kernel context (if kernel is `NULL`, the pointer must be of type htool::VirtualGenerator<PetscScalar>*)
1021 
1022   Output Parameter:
1023 . B - matrix
1024 
1025   Options Database Keys:
1026 + -mat_htool_min_cluster_size <`PetscInt`>                                                     - minimal leaf size in cluster tree
1027 . -mat_htool_epsilon <`PetscReal`>                                                             - relative error in Frobenius norm when approximating a block
1028 . -mat_htool_eta <`PetscReal`>                                                                 - admissibility condition tolerance
1029 . -mat_htool_min_target_depth <`PetscInt`>                                                     - minimal cluster tree depth associated with the rows
1030 . -mat_htool_min_source_depth <`PetscInt`>                                                     - minimal cluster tree depth associated with the columns
1031 . -mat_htool_block_tree_consistency <`PetscBool`>                                              - block tree consistency
1032 . -mat_htool_compressor <sympartialACA, fullACA, SVD>                                          - type of compression
1033 - -mat_htool_clustering <PCARegular, PCAGeometric, BounbingBox1Regular, BoundingBox1Geometric> - type of clustering
1034 
1035   Level: intermediate
1036 
1037 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MATHTOOL`, `PCSetCoordinates()`, `MatHtoolSetKernel()`, `MatHtoolCompressorType`, `MATH2OPUS`, `MatCreateH2OpusFromKernel()`
1038 @*/
1039 PetscErrorCode MatCreateHtoolFromKernel(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt spacedim, const PetscReal coords_target[], const PetscReal coords_source[], MatHtoolKernelFn *kernel, void *kernelctx, Mat *B)
1040 {
1041   Mat        A;
1042   Mat_Htool *a;
1043 
1044   PetscFunctionBegin;
1045   PetscCall(MatCreate(comm, &A));
1046   PetscValidLogicalCollectiveInt(A, spacedim, 6);
1047   PetscAssertPointer(coords_target, 7);
1048   PetscAssertPointer(coords_source, 8);
1049   if (!kernelctx) PetscValidFunction(kernel, 9);
1050   if (!kernel) PetscAssertPointer(kernelctx, 10);
1051   PetscCall(MatSetSizes(A, m, n, M, N));
1052   PetscCall(MatSetType(A, MATHTOOL));
1053   PetscCall(MatSetUp(A));
1054   PetscCall(MatShellGetContext(A, &a));
1055   a->dim       = spacedim;
1056   a->kernel    = kernel;
1057   a->kernelctx = kernelctx;
1058   PetscCall(PetscCalloc1(A->rmap->N * spacedim, &a->gcoords_target));
1059   PetscCall(PetscArraycpy(a->gcoords_target + A->rmap->rstart * spacedim, coords_target, A->rmap->n * spacedim));
1060   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, a->gcoords_target, A->rmap->N * spacedim, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)A))); /* global target coordinates */
1061   if (coords_target != coords_source) {
1062     PetscCall(PetscCalloc1(A->cmap->N * spacedim, &a->gcoords_source));
1063     PetscCall(PetscArraycpy(a->gcoords_source + A->cmap->rstart * spacedim, coords_source, A->cmap->n * spacedim));
1064     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, a->gcoords_source, A->cmap->N * spacedim, MPIU_REAL, MPI_SUM, PetscObjectComm((PetscObject)A))); /* global source coordinates */
1065   } else a->gcoords_source = a->gcoords_target;
1066   PetscCall(PetscCalloc2(A->cmap->N, &a->work_source, A->rmap->N, &a->work_target));
1067   *B = A;
1068   PetscFunctionReturn(PETSC_SUCCESS);
1069 }
1070 
1071 /*MC
1072      MATHTOOL = "htool" - A matrix type for hierarchical matrices using the Htool package.
1073 
1074   Use `./configure --download-htool` to install PETSc to use Htool.
1075 
1076    Options Database Key:
1077 .     -mat_type htool - matrix type to `MATHTOOL`
1078 
1079    Level: beginner
1080 
1081 .seealso: [](ch_matrices), `Mat`, `MATH2OPUS`, `MATDENSE`, `MatCreateHtoolFromKernel()`, `MatHtoolSetKernel()`
1082 M*/
1083 PETSC_EXTERN PetscErrorCode MatCreate_Htool(Mat A)
1084 {
1085   Mat_Htool *a;
1086 
1087   PetscFunctionBegin;
1088   PetscCall(MatSetType(A, MATSHELL));
1089   PetscCall(PetscNew(&a));
1090   PetscCall(MatShellSetContext(A, a));
1091   PetscCall(MatShellSetOperation(A, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Htool));
1092   PetscCall(MatShellSetOperation(A, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Htool));
1093   PetscCall(MatShellSetOperation(A, MATOP_MULT, (void (*)(void))MatMult_Htool));
1094   PetscCall(MatShellSetOperation(A, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Htool));
1095   if (!PetscDefined(USE_COMPLEX)) PetscCall(MatShellSetOperation(A, MATOP_MULT_HERMITIAN_TRANSPOSE, (void (*)(void))MatMultTranspose_Htool));
1096   A->ops->increaseoverlap   = MatIncreaseOverlap_Htool;
1097   A->ops->createsubmatrices = MatCreateSubMatrices_Htool;
1098   PetscCall(MatShellSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_Htool));
1099   PetscCall(MatShellSetOperation(A, MATOP_SET_FROM_OPTIONS, (void (*)(void))MatSetFromOptions_Htool));
1100   PetscCall(MatShellSetOperation(A, MATOP_GET_ROW, (void (*)(void))MatGetRow_Htool));
1101   PetscCall(MatShellSetOperation(A, MATOP_RESTORE_ROW, (void (*)(void))MatRestoreRow_Htool));
1102   PetscCall(MatShellSetOperation(A, MATOP_ASSEMBLY_END, (void (*)(void))MatAssemblyEnd_Htool));
1103   PetscCall(MatShellSetOperation(A, MATOP_TRANSPOSE, (void (*)(void))MatTranspose_Htool));
1104   PetscCall(MatShellSetOperation(A, MATOP_DESTROY, (void (*)(void))MatDestroy_Htool));
1105   a->dim                    = 0;
1106   a->gcoords_target         = nullptr;
1107   a->gcoords_source         = nullptr;
1108   a->min_cluster_size       = 10;
1109   a->epsilon                = PetscSqrtReal(PETSC_SMALL);
1110   a->eta                    = 10.0;
1111   a->depth[0]               = 0;
1112   a->depth[1]               = 0;
1113   a->block_tree_consistency = PETSC_TRUE;
1114   a->compressor             = MAT_HTOOL_COMPRESSOR_SYMPARTIAL_ACA;
1115   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_seqdense_C", MatProductSetFromOptions_Htool));
1116   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_htool_mpidense_C", MatProductSetFromOptions_Htool));
1117   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_seqdense_C", MatConvert_Htool_Dense));
1118   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_htool_mpidense_C", MatConvert_Htool_Dense));
1119   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetHierarchicalMat_C", MatHtoolGetHierarchicalMat_Htool));
1120   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolSetKernel_C", MatHtoolSetKernel_Htool));
1121   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationSource_C", MatHtoolGetPermutationSource_Htool));
1122   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolGetPermutationTarget_C", MatHtoolGetPermutationTarget_Htool));
1123   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatHtoolUsePermutation_C", MatHtoolUsePermutation_Htool));
1124   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContext_C", MatShellSetContext_Immutable));
1125   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetContextDestroy_C", MatShellSetContextDestroy_Immutable));
1126   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatShellSetManageScalingShifts_C", MatShellSetManageScalingShifts_Immutable));
1127   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATHTOOL));
1128   PetscFunctionReturn(PETSC_SUCCESS);
1129 }
1130