xref: /petsc/src/mat/utils/axpy.c (revision 2e37cc59afff36ccb5a1b1fe6479c31382db32d5)
1 #include <petsc/private/matimpl.h> /*I   "petscmat.h"  I*/
2 
3 static PetscErrorCode MatTransposeAXPY_Private(Mat Y, PetscScalar a, Mat X, MatStructure str, Mat T)
4 {
5   Mat         A, F;
6   PetscScalar vshift, vscale;
7   PetscErrorCode (*f)(Mat, Mat *);
8 
9   PetscFunctionBegin;
10   if (T == X) PetscCall(MatShellGetScalingShifts(T, &vshift, &vscale, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
11   else {
12     vshift = 0.0;
13     vscale = 1.0;
14   }
15   PetscCall(PetscObjectQueryFunction((PetscObject)T, "MatTransposeGetMat_C", &f));
16   if (f) {
17     PetscCall(MatTransposeGetMat(T, &A));
18     if (T == X) {
19       PetscCall(PetscInfo(NULL, "Explicitly transposing X of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
20       PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &F));
21       A = Y;
22     } else {
23       PetscCall(PetscInfo(NULL, "Transposing X because Y of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
24       PetscCall(MatTranspose(X, MAT_INITIAL_MATRIX, &F));
25     }
26   } else {
27     PetscCall(MatHermitianTransposeGetMat(T, &A));
28     if (T == X) {
29       PetscCall(PetscInfo(NULL, "Explicitly Hermitian transposing X of type MATHERMITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
30       PetscCall(MatHermitianTranspose(A, MAT_INITIAL_MATRIX, &F));
31       A = Y;
32     } else {
33       PetscCall(PetscInfo(NULL, "Hermitian transposing X because Y of type MATHERMITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
34       PetscCall(MatHermitianTranspose(X, MAT_INITIAL_MATRIX, &F));
35     }
36   }
37   PetscCall(MatAXPY(A, a * vscale, F, str));
38   PetscCall(MatShift(A, a * vshift));
39   PetscCall(MatDestroy(&F));
40   PetscFunctionReturn(PETSC_SUCCESS);
41 }
42 
43 /*@
44   MatAXPY - Computes Y = a*X + Y.
45 
46   Logically Collective
47 
48   Input Parameters:
49 + a   - the scalar multiplier
50 . X   - the first matrix
51 . Y   - the second matrix
52 - str - either `SAME_NONZERO_PATTERN`, `DIFFERENT_NONZERO_PATTERN`, `UNKNOWN_NONZERO_PATTERN`, or `SUBSET_NONZERO_PATTERN` (nonzeros of `X` is a subset of `Y`'s)
53 
54   Level: intermediate
55 
56 .seealso: [](ch_matrices), `Mat`, `MatAYPX()`
57  @*/
58 PetscErrorCode MatAXPY(Mat Y, PetscScalar a, Mat X, MatStructure str)
59 {
60   PetscInt  M1, M2, N1, N2;
61   PetscInt  m1, m2, n1, n2;
62   PetscBool sametype, transpose, nogetrow;
63 
64   PetscFunctionBegin;
65   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
66   PetscValidLogicalCollectiveScalar(Y, a, 2);
67   PetscValidHeaderSpecific(X, MAT_CLASSID, 3);
68   PetscCheckSameComm(Y, 1, X, 3);
69   PetscCall(MatGetSize(X, &M1, &N1));
70   PetscCall(MatGetSize(Y, &M2, &N2));
71   PetscCall(MatGetLocalSize(X, &m1, &n1));
72   PetscCall(MatGetLocalSize(Y, &m2, &n2));
73   PetscCheck(M1 == M2 && N1 == N2, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_SIZ, "Non conforming matrix add: global sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, M1, N1, M2, N2);
74   PetscCheck(m1 == m2 && n1 == n2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Non conforming matrix add: local sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, m1, n1, m2, n2);
75   PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (Y)");
76   PetscCheck(X->assembled, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (X)");
77   if (a == (PetscScalar)0.0) PetscFunctionReturn(PETSC_SUCCESS);
78   if (Y == X) {
79     PetscCall(MatScale(Y, 1.0 + a));
80     PetscFunctionReturn(PETSC_SUCCESS);
81   }
82   PetscCall(PetscObjectObjectTypeCompare((PetscObject)X, (PetscObject)Y, &sametype));
83   PetscCall(PetscLogEventBegin(MAT_AXPY, Y, 0, 0, 0));
84   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
85     PetscUseTypeMethod(Y, axpy, a, X, str);
86   } else {
87     PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
88     if (transpose) {
89       PetscCall(MatTransposeAXPY_Private(Y, a, X, str, X));
90     } else {
91       PetscCall(PetscObjectTypeCompareAny((PetscObject)Y, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
92       if (transpose) {
93         PetscCall(MatTransposeAXPY_Private(Y, a, X, str, Y));
94       } else {
95         PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &nogetrow, MATSCALAPACK, MATELEMENTAL, ""));
96         if (nogetrow) { /* Avoid MatAXPY_Basic() due to missing MatGetRow() */
97           Mat     C;
98           MatType type;
99 
100           PetscCall(MatGetType(Y, &type));
101           PetscCall(MatConvert(X, type, MAT_INITIAL_MATRIX, &C));
102           PetscCall(MatAXPY(Y, a, C, str));
103           PetscCall(MatDestroy(&C));
104         } else {
105           PetscCall(MatAXPY_Basic(Y, a, X, str));
106         }
107       }
108     }
109   }
110   PetscCall(PetscLogEventEnd(MAT_AXPY, Y, 0, 0, 0));
111   PetscFunctionReturn(PETSC_SUCCESS);
112 }
113 
114 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
115 {
116   PetscErrorCode (*preall)(Mat, Mat, Mat *) = NULL;
117 
118   PetscFunctionBegin;
119   /* look for any available faster alternative to the general preallocator */
120   PetscCall(PetscObjectQueryFunction((PetscObject)Y, "MatAXPYGetPreallocation_C", &preall));
121   if (preall) {
122     PetscCall((*preall)(Y, X, B));
123   } else { /* Use MatPrellocator, assumes same row-col distribution */
124     Mat      preallocator;
125     PetscInt r, rstart, rend;
126     PetscInt m, n, M, N;
127 
128     PetscCall(MatGetRowUpperTriangular(Y));
129     PetscCall(MatGetRowUpperTriangular(X));
130     PetscCall(MatGetSize(Y, &M, &N));
131     PetscCall(MatGetLocalSize(Y, &m, &n));
132     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &preallocator));
133     PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
134     PetscCall(MatSetLayouts(preallocator, Y->rmap, Y->cmap));
135     PetscCall(MatSetUp(preallocator));
136     PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
137     for (r = rstart; r < rend; ++r) {
138       PetscInt           ncols;
139       const PetscInt    *row;
140       const PetscScalar *vals;
141 
142       PetscCall(MatGetRow(Y, r, &ncols, &row, &vals));
143       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
144       PetscCall(MatRestoreRow(Y, r, &ncols, &row, &vals));
145       PetscCall(MatGetRow(X, r, &ncols, &row, &vals));
146       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
147       PetscCall(MatRestoreRow(X, r, &ncols, &row, &vals));
148     }
149     PetscCall(MatSetOption(preallocator, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
150     PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
151     PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
152     PetscCall(MatRestoreRowUpperTriangular(Y));
153     PetscCall(MatRestoreRowUpperTriangular(X));
154 
155     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), B));
156     PetscCall(MatSetType(*B, ((PetscObject)Y)->type_name));
157     PetscCall(MatSetLayouts(*B, Y->rmap, Y->cmap));
158     PetscCall(MatPreallocatorPreallocate(preallocator, PETSC_FALSE, *B));
159     PetscCall(MatDestroy(&preallocator));
160   }
161   PetscFunctionReturn(PETSC_SUCCESS);
162 }
163 
164 PetscErrorCode MatAXPY_Basic(Mat Y, PetscScalar a, Mat X, MatStructure str)
165 {
166   PetscBool isshell, isdense, isnest;
167 
168   PetscFunctionBegin;
169   PetscCall(MatIsShell(Y, &isshell));
170   if (isshell) { /* MatShell has special support for AXPY */
171     PetscErrorCode (*f)(Mat, PetscScalar, Mat, MatStructure);
172 
173     PetscCall(MatGetOperation(Y, MATOP_AXPY, (void (**)(void)) & f));
174     if (f) {
175       PetscCall((*f)(Y, a, X, str));
176       PetscFunctionReturn(PETSC_SUCCESS);
177     }
178   }
179   /* no need to preallocate if Y is dense */
180   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)Y, &isdense, MATSEQDENSE, MATMPIDENSE, ""));
181   if (isdense) {
182     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATNEST, &isnest));
183     if (isnest) {
184       PetscCall(MatAXPY_Dense_Nest(Y, a, X));
185       PetscFunctionReturn(PETSC_SUCCESS);
186     }
187     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
188   }
189   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
190     PetscInt           i, start, end, j, ncols, m, n;
191     const PetscInt    *row;
192     PetscScalar       *val;
193     const PetscScalar *vals;
194 
195     PetscCall(MatGetSize(X, &m, &n));
196     PetscCall(MatGetOwnershipRange(X, &start, &end));
197     PetscCall(MatGetRowUpperTriangular(X));
198     if (a == 1.0) {
199       for (i = start; i < end; i++) {
200         PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
201         PetscCall(MatSetValues(Y, 1, &i, ncols, row, vals, ADD_VALUES));
202         PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
203       }
204     } else {
205       PetscInt vs = 100;
206       /* realloc if needed, as this function may be used in parallel */
207       PetscCall(PetscMalloc1(vs, &val));
208       for (i = start; i < end; i++) {
209         PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
210         if (vs < ncols) {
211           vs = PetscMin(2 * ncols, n);
212           PetscCall(PetscRealloc(vs * sizeof(*val), &val));
213         }
214         for (j = 0; j < ncols; j++) val[j] = a * vals[j];
215         PetscCall(MatSetValues(Y, 1, &i, ncols, row, val, ADD_VALUES));
216         PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
217       }
218       PetscCall(PetscFree(val));
219     }
220     PetscCall(MatRestoreRowUpperTriangular(X));
221     PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
222     PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
223   } else {
224     Mat B;
225 
226     PetscCall(MatAXPY_Basic_Preallocate(Y, X, &B));
227     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
228     PetscCall(MatHeaderMerge(Y, &B));
229   }
230   PetscFunctionReturn(PETSC_SUCCESS);
231 }
232 
233 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B, Mat Y, PetscScalar a, Mat X, MatStructure str)
234 {
235   PetscInt           i, start, end, j, ncols, m, n;
236   const PetscInt    *row;
237   PetscScalar       *val;
238   const PetscScalar *vals;
239 
240   PetscFunctionBegin;
241   PetscCall(MatGetSize(X, &m, &n));
242   PetscCall(MatGetOwnershipRange(X, &start, &end));
243   PetscCall(MatGetRowUpperTriangular(Y));
244   PetscCall(MatGetRowUpperTriangular(X));
245   if (a == 1.0) {
246     for (i = start; i < end; i++) {
247       PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
248       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
249       PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
250 
251       PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
252       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
253       PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
254     }
255   } else {
256     PetscInt vs = 100;
257     /* realloc if needed, as this function may be used in parallel */
258     PetscCall(PetscMalloc1(vs, &val));
259     for (i = start; i < end; i++) {
260       PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
261       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
262       PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
263 
264       PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
265       if (vs < ncols) {
266         vs = PetscMin(2 * ncols, n);
267         PetscCall(PetscRealloc(vs * sizeof(*val), &val));
268       }
269       for (j = 0; j < ncols; j++) val[j] = a * vals[j];
270       PetscCall(MatSetValues(B, 1, &i, ncols, row, val, ADD_VALUES));
271       PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
272     }
273     PetscCall(PetscFree(val));
274   }
275   PetscCall(MatRestoreRowUpperTriangular(Y));
276   PetscCall(MatRestoreRowUpperTriangular(X));
277   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
278   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
279   PetscFunctionReturn(PETSC_SUCCESS);
280 }
281 
282 /*@
283   MatShift - Computes `Y =  Y + a I`, where `a` is a `PetscScalar`
284 
285   Neighbor-wise Collective
286 
287   Input Parameters:
288 + Y - the matrix
289 - a - the `PetscScalar`
290 
291   Level: intermediate
292 
293   Notes:
294   If `Y` is a rectangular matrix, the shift is done on the main diagonal of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)
295 
296   If the matrix `Y` is missing some diagonal entries this routine can be very slow. To make it fast one should initially
297   fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
298   entry). No operation is performed when a is zero.
299 
300   To form Y = Y + diag(V) use `MatDiagonalSet()`
301 
302 .seealso: [](ch_matrices), `Mat`, `MatDiagonalSet()`, `MatScale()`, `MatDiagonalScale()`
303  @*/
304 PetscErrorCode MatShift(Mat Y, PetscScalar a)
305 {
306   PetscFunctionBegin;
307   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
308   PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix");
309   PetscCheck(!Y->factortype, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
310   MatCheckPreallocated(Y, 1);
311   if (a == 0.0) PetscFunctionReturn(PETSC_SUCCESS);
312 
313   if (Y->ops->shift) PetscUseTypeMethod(Y, shift, a);
314   else PetscCall(MatShift_Basic(Y, a));
315 
316   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
317   PetscFunctionReturn(PETSC_SUCCESS);
318 }
319 
320 PetscErrorCode MatDiagonalSet_Default(Mat Y, Vec D, InsertMode is)
321 {
322   PetscInt           i, start, end;
323   const PetscScalar *v;
324 
325   PetscFunctionBegin;
326   PetscCall(MatGetOwnershipRange(Y, &start, &end));
327   PetscCall(VecGetArrayRead(D, &v));
328   for (i = start; i < end; i++) PetscCall(MatSetValues(Y, 1, &i, 1, &i, v + i - start, is));
329   PetscCall(VecRestoreArrayRead(D, &v));
330   PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
331   PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
332   PetscFunctionReturn(PETSC_SUCCESS);
333 }
334 
335 /*@
336   MatDiagonalSet - Computes `Y` = `Y` + `D`, where `D` is a diagonal matrix
337   that is represented as a vector. Or Y[i,i] = D[i] if `InsertMode` is
338   `INSERT_VALUES`.
339 
340   Neighbor-wise Collective
341 
342   Input Parameters:
343 + Y  - the input matrix
344 . D  - the diagonal matrix, represented as a vector
345 - is - `INSERT_VALUES` or `ADD_VALUES`
346 
347   Level: intermediate
348 
349   Note:
350   If the matrix `Y` is missing some diagonal entries this routine can be very slow. To make it fast one should initially
351   fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
352   entry).
353 
354 .seealso: [](ch_matrices), `Mat`, `MatShift()`, `MatScale()`, `MatDiagonalScale()`
355 @*/
356 PetscErrorCode MatDiagonalSet(Mat Y, Vec D, InsertMode is)
357 {
358   PetscInt matlocal, veclocal;
359 
360   PetscFunctionBegin;
361   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
362   PetscValidHeaderSpecific(D, VEC_CLASSID, 2);
363   MatCheckPreallocated(Y, 1);
364   PetscCall(MatGetLocalSize(Y, &matlocal, NULL));
365   PetscCall(VecGetLocalSize(D, &veclocal));
366   PetscCheck(matlocal == veclocal, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number local rows of matrix %" PetscInt_FMT " does not match that of vector for diagonal %" PetscInt_FMT, matlocal, veclocal);
367   if (Y->ops->diagonalset) PetscUseTypeMethod(Y, diagonalset, D, is);
368   else PetscCall(MatDiagonalSet_Default(Y, D, is));
369   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
370   PetscFunctionReturn(PETSC_SUCCESS);
371 }
372 
373 /*@
374   MatAYPX - Computes Y = a*Y + X.
375 
376   Logically Collective
377 
378   Input Parameters:
379 + a   - the `PetscScalar` multiplier
380 . Y   - the first matrix
381 . X   - the second matrix
382 - str - either `SAME_NONZERO_PATTERN`, `DIFFERENT_NONZERO_PATTERN`, `UNKNOWN_NONZERO_PATTERN`, or `SUBSET_NONZERO_PATTERN` (nonzeros of `X` is a subset of `Y`'s)
383 
384   Level: intermediate
385 
386 .seealso: [](ch_matrices), `Mat`, `MatAXPY()`
387  @*/
388 PetscErrorCode MatAYPX(Mat Y, PetscScalar a, Mat X, MatStructure str)
389 {
390   PetscFunctionBegin;
391   PetscCall(MatScale(Y, a));
392   PetscCall(MatAXPY(Y, 1.0, X, str));
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 
396 /*@
397   MatComputeOperator - Computes the explicit matrix
398 
399   Collective
400 
401   Input Parameters:
402 + inmat   - the matrix
403 - mattype - the matrix type for the explicit operator
404 
405   Output Parameter:
406 . mat - the explicit  operator
407 
408   Level: advanced
409 
410   Note:
411   This computation is done by applying the operator to columns of the identity matrix.
412   This routine is costly in general, and is recommended for use only with relatively small systems.
413   Currently, this routine uses a dense matrix format if `mattype` == `NULL`.
414 
415 .seealso: [](ch_matrices), `Mat`, `MatConvert()`, `MatMult()`, `MatComputeOperatorTranspose()`
416 @*/
417 PetscErrorCode MatComputeOperator(Mat inmat, MatType mattype, Mat *mat)
418 {
419   PetscFunctionBegin;
420   PetscValidHeaderSpecific(inmat, MAT_CLASSID, 1);
421   PetscAssertPointer(mat, 3);
422   PetscCall(MatConvert_Shell(inmat, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
423   PetscFunctionReturn(PETSC_SUCCESS);
424 }
425 
426 /*@
427   MatComputeOperatorTranspose - Computes the explicit matrix representation of
428   a give matrix that can apply `MatMultTranspose()`
429 
430   Collective
431 
432   Input Parameters:
433 + inmat   - the matrix
434 - mattype - the matrix type for the explicit operator
435 
436   Output Parameter:
437 . mat - the explicit  operator transposed
438 
439   Level: advanced
440 
441   Note:
442   This computation is done by applying the transpose of the operator to columns of the identity matrix.
443   This routine is costly in general, and is recommended for use only with relatively small systems.
444   Currently, this routine uses a dense matrix format if `mattype` == `NULL`.
445 
446 .seealso: [](ch_matrices), `Mat`, `MatConvert()`, `MatMult()`, `MatComputeOperator()`
447 @*/
448 PetscErrorCode MatComputeOperatorTranspose(Mat inmat, MatType mattype, Mat *mat)
449 {
450   Mat A;
451 
452   PetscFunctionBegin;
453   PetscValidHeaderSpecific(inmat, MAT_CLASSID, 1);
454   PetscAssertPointer(mat, 3);
455   PetscCall(MatCreateTranspose(inmat, &A));
456   PetscCall(MatConvert_Shell(A, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
457   PetscCall(MatDestroy(&A));
458   PetscFunctionReturn(PETSC_SUCCESS);
459 }
460 
461 /*@
462   MatFilter - Set all values in the matrix with an absolute value less than or equal to the tolerance to zero, and optionally compress the underlying storage
463 
464   Input Parameters:
465 + A        - The matrix
466 . tol      - The zero tolerance
467 . compress - Whether the storage from the input matrix `A` should be compressed once values less than or equal to `tol` are set to zero
468 - keep     - If `compress` is true and for a given row of `A`, the diagonal coefficient is less than or equal to `tol`, indicates whether it should be left in the structure or eliminated as well
469 
470   Level: intermediate
471 
472 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatZeroEntries()`, `MatEliminateZeros()`, `VecFilter()`
473  @*/
474 PetscErrorCode MatFilter(Mat A, PetscReal tol, PetscBool compress, PetscBool keep)
475 {
476   Mat          a;
477   PetscScalar *newVals;
478   PetscInt    *newCols, rStart, rEnd, maxRows, r, colMax = 0, nnz0 = 0, nnz1 = 0;
479   PetscBool    flg;
480 
481   PetscFunctionBegin;
482   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, ""));
483   if (flg) {
484     PetscCall(MatDenseGetLocalMatrix(A, &a));
485     PetscCall(MatDenseGetLDA(a, &r));
486     PetscCall(MatGetSize(a, &rStart, &rEnd));
487     PetscCall(MatDenseGetArray(a, &newVals));
488     for (; colMax < rEnd; ++colMax) {
489       for (maxRows = 0; maxRows < rStart; ++maxRows) newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) <= tol ? 0.0 : newVals[maxRows + colMax * r];
490     }
491     PetscCall(MatDenseRestoreArray(a, &newVals));
492   } else {
493     const PetscInt *ranges;
494     PetscMPIInt     rank, size;
495 
496     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
497     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
498     PetscCall(MatGetOwnershipRanges(A, &ranges));
499     rStart = ranges[rank];
500     rEnd   = ranges[rank + 1];
501     PetscCall(MatGetRowUpperTriangular(A));
502     for (r = rStart; r < rEnd; ++r) {
503       PetscInt ncols;
504 
505       PetscCall(MatGetRow(A, r, &ncols, NULL, NULL));
506       colMax = PetscMax(colMax, ncols);
507       PetscCall(MatRestoreRow(A, r, &ncols, NULL, NULL));
508     }
509     maxRows = 0;
510     for (r = 0; r < size; ++r) maxRows = PetscMax(maxRows, ranges[r + 1] - ranges[r]);
511     PetscCall(PetscCalloc2(colMax, &newCols, colMax, &newVals));
512     PetscCall(MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg)); /* cache user-defined value */
513     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
514     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
515     /* that are potentially called many times depending on the distribution of A */
516     for (r = rStart; r < rStart + maxRows; ++r) {
517       if (r < rEnd) {
518         const PetscScalar *vals;
519         const PetscInt    *cols;
520         PetscInt           ncols, newcols = 0, c;
521 
522         PetscCall(MatGetRow(A, r, &ncols, &cols, &vals));
523         nnz0 += ncols - 1;
524         for (c = 0; c < ncols; ++c) {
525           if (PetscUnlikely(PetscAbsScalar(vals[c]) <= tol)) newCols[newcols++] = cols[c];
526         }
527         nnz1 += ncols - newcols - 1;
528         PetscCall(MatRestoreRow(A, r, &ncols, &cols, &vals));
529         PetscCall(MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES));
530       }
531       PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
532       PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
533     }
534     PetscCall(MatRestoreRowUpperTriangular(A));
535     PetscCall(PetscFree2(newCols, newVals));
536     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg)); /* reset option to its user-defined value */
537     if (nnz0 > 0) PetscCall(PetscInfo(NULL, "Filtering left %g%% edges in graph\n", 100 * (double)nnz1 / (double)nnz0));
538     else PetscCall(PetscInfo(NULL, "Warning: %" PetscInt_FMT " edges to filter with %" PetscInt_FMT " rows\n", nnz0, maxRows));
539   }
540   if (compress && A->ops->eliminatezeros) {
541     Mat       B;
542     PetscBool flg;
543 
544     PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
545     if (!flg) {
546       PetscCall(MatEliminateZeros(A, keep));
547       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
548       PetscCall(MatHeaderReplace(A, &B));
549     }
550   }
551   PetscFunctionReturn(PETSC_SUCCESS);
552 }
553