xref: /petsc/src/mat/utils/axpy.c (revision b2deab974997777d2904fb5b4ce8bb26cde3bd61)
1 #include <petsc/private/matimpl.h> /*I   "petscmat.h"  I*/
2 
3 static PetscErrorCode MatTransposeAXPY_Private(Mat Y, PetscScalar a, Mat X, MatStructure str, Mat T)
4 {
5   Mat A, F;
6   PetscErrorCode (*f)(Mat, Mat *);
7 
8   PetscFunctionBegin;
9   PetscCall(PetscObjectQueryFunction((PetscObject)T, "MatTransposeGetMat_C", &f));
10   if (f) {
11     PetscCall(MatTransposeGetMat(T, &A));
12     if (T == X) {
13       PetscCall(PetscInfo(NULL, "Explicitly transposing X of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
14       PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &F));
15       A = Y;
16     } else {
17       PetscCall(PetscInfo(NULL, "Transposing X because Y of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
18       PetscCall(MatTranspose(X, MAT_INITIAL_MATRIX, &F));
19     }
20   } else {
21     PetscCall(MatHermitianTransposeGetMat(T, &A));
22     if (T == X) {
23       PetscCall(PetscInfo(NULL, "Explicitly Hermitian transposing X of type MATHERMITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
24       PetscCall(MatHermitianTranspose(A, MAT_INITIAL_MATRIX, &F));
25       A = Y;
26     } else {
27       PetscCall(PetscInfo(NULL, "Hermitian transposing X because Y of type MATHERMITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
28       PetscCall(MatHermitianTranspose(X, MAT_INITIAL_MATRIX, &F));
29     }
30   }
31   PetscCall(MatAXPY(A, a, F, str));
32   PetscCall(MatDestroy(&F));
33   PetscFunctionReturn(PETSC_SUCCESS);
34 }
35 
36 /*@
37   MatAXPY - Computes Y = a*X + Y.
38 
39   Logically Collective
40 
41   Input Parameters:
42 + a   - the scalar multiplier
43 . X   - the first matrix
44 . Y   - the second matrix
45 - str - either `SAME_NONZERO_PATTERN`, `DIFFERENT_NONZERO_PATTERN`, `UNKNOWN_NONZERO_PATTERN`, or `SUBSET_NONZERO_PATTERN` (nonzeros of `X` is a subset of `Y`'s)
46 
47   Level: intermediate
48 
49 .seealso: [](ch_matrices), `Mat`, `MatAYPX()`
50  @*/
51 PetscErrorCode MatAXPY(Mat Y, PetscScalar a, Mat X, MatStructure str)
52 {
53   PetscInt  M1, M2, N1, N2;
54   PetscInt  m1, m2, n1, n2;
55   PetscBool sametype, transpose;
56 
57   PetscFunctionBegin;
58   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
59   PetscValidLogicalCollectiveScalar(Y, a, 2);
60   PetscValidHeaderSpecific(X, MAT_CLASSID, 3);
61   PetscCheckSameComm(Y, 1, X, 3);
62   PetscCall(MatGetSize(X, &M1, &N1));
63   PetscCall(MatGetSize(Y, &M2, &N2));
64   PetscCall(MatGetLocalSize(X, &m1, &n1));
65   PetscCall(MatGetLocalSize(Y, &m2, &n2));
66   PetscCheck(M1 == M2 && N1 == N2, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_SIZ, "Non conforming matrix add: global sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, M1, N1, M2, N2);
67   PetscCheck(m1 == m2 && n1 == n2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Non conforming matrix add: local sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, m1, n1, m2, n2);
68   PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (Y)");
69   PetscCheck(X->assembled, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (X)");
70   if (a == (PetscScalar)0.0) PetscFunctionReturn(PETSC_SUCCESS);
71   if (Y == X) {
72     PetscCall(MatScale(Y, 1.0 + a));
73     PetscFunctionReturn(PETSC_SUCCESS);
74   }
75   PetscCall(PetscObjectObjectTypeCompare((PetscObject)X, (PetscObject)Y, &sametype));
76   PetscCall(PetscLogEventBegin(MAT_AXPY, Y, 0, 0, 0));
77   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
78     PetscUseTypeMethod(Y, axpy, a, X, str);
79   } else {
80     PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
81     if (transpose) {
82       PetscCall(MatTransposeAXPY_Private(Y, a, X, str, X));
83     } else {
84       PetscCall(PetscObjectTypeCompareAny((PetscObject)Y, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
85       if (transpose) {
86         PetscCall(MatTransposeAXPY_Private(Y, a, X, str, Y));
87       } else {
88         PetscCall(MatAXPY_Basic(Y, a, X, str));
89       }
90     }
91   }
92   PetscCall(PetscLogEventEnd(MAT_AXPY, Y, 0, 0, 0));
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
97 {
98   PetscErrorCode (*preall)(Mat, Mat, Mat *) = NULL;
99 
100   PetscFunctionBegin;
101   /* look for any available faster alternative to the general preallocator */
102   PetscCall(PetscObjectQueryFunction((PetscObject)Y, "MatAXPYGetPreallocation_C", &preall));
103   if (preall) {
104     PetscCall((*preall)(Y, X, B));
105   } else { /* Use MatPrellocator, assumes same row-col distribution */
106     Mat      preallocator;
107     PetscInt r, rstart, rend;
108     PetscInt m, n, M, N;
109 
110     PetscCall(MatGetRowUpperTriangular(Y));
111     PetscCall(MatGetRowUpperTriangular(X));
112     PetscCall(MatGetSize(Y, &M, &N));
113     PetscCall(MatGetLocalSize(Y, &m, &n));
114     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &preallocator));
115     PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
116     PetscCall(MatSetLayouts(preallocator, Y->rmap, Y->cmap));
117     PetscCall(MatSetUp(preallocator));
118     PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
119     for (r = rstart; r < rend; ++r) {
120       PetscInt           ncols;
121       const PetscInt    *row;
122       const PetscScalar *vals;
123 
124       PetscCall(MatGetRow(Y, r, &ncols, &row, &vals));
125       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
126       PetscCall(MatRestoreRow(Y, r, &ncols, &row, &vals));
127       PetscCall(MatGetRow(X, r, &ncols, &row, &vals));
128       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
129       PetscCall(MatRestoreRow(X, r, &ncols, &row, &vals));
130     }
131     PetscCall(MatSetOption(preallocator, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
132     PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
133     PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
134     PetscCall(MatRestoreRowUpperTriangular(Y));
135     PetscCall(MatRestoreRowUpperTriangular(X));
136 
137     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), B));
138     PetscCall(MatSetType(*B, ((PetscObject)Y)->type_name));
139     PetscCall(MatSetLayouts(*B, Y->rmap, Y->cmap));
140     PetscCall(MatPreallocatorPreallocate(preallocator, PETSC_FALSE, *B));
141     PetscCall(MatDestroy(&preallocator));
142   }
143   PetscFunctionReturn(PETSC_SUCCESS);
144 }
145 
146 PetscErrorCode MatAXPY_Basic(Mat Y, PetscScalar a, Mat X, MatStructure str)
147 {
148   PetscBool isshell, isdense, isnest;
149 
150   PetscFunctionBegin;
151   PetscCall(MatIsShell(Y, &isshell));
152   if (isshell) { /* MatShell has special support for AXPY */
153     PetscErrorCode (*f)(Mat, PetscScalar, Mat, MatStructure);
154 
155     PetscCall(MatGetOperation(Y, MATOP_AXPY, (void (**)(void)) & f));
156     if (f) {
157       PetscCall((*f)(Y, a, X, str));
158       PetscFunctionReturn(PETSC_SUCCESS);
159     }
160   }
161   /* no need to preallocate if Y is dense */
162   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)Y, &isdense, MATSEQDENSE, MATMPIDENSE, ""));
163   if (isdense) {
164     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATNEST, &isnest));
165     if (isnest) {
166       PetscCall(MatAXPY_Dense_Nest(Y, a, X));
167       PetscFunctionReturn(PETSC_SUCCESS);
168     }
169     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
170   }
171   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
172     PetscInt           i, start, end, j, ncols, m, n;
173     const PetscInt    *row;
174     PetscScalar       *val;
175     const PetscScalar *vals;
176 
177     PetscCall(MatGetSize(X, &m, &n));
178     PetscCall(MatGetOwnershipRange(X, &start, &end));
179     PetscCall(MatGetRowUpperTriangular(X));
180     if (a == 1.0) {
181       for (i = start; i < end; i++) {
182         PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
183         PetscCall(MatSetValues(Y, 1, &i, ncols, row, vals, ADD_VALUES));
184         PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
185       }
186     } else {
187       PetscInt vs = 100;
188       /* realloc if needed, as this function may be used in parallel */
189       PetscCall(PetscMalloc1(vs, &val));
190       for (i = start; i < end; i++) {
191         PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
192         if (vs < ncols) {
193           vs = PetscMin(2 * ncols, n);
194           PetscCall(PetscRealloc(vs * sizeof(*val), &val));
195         }
196         for (j = 0; j < ncols; j++) val[j] = a * vals[j];
197         PetscCall(MatSetValues(Y, 1, &i, ncols, row, val, ADD_VALUES));
198         PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
199       }
200       PetscCall(PetscFree(val));
201     }
202     PetscCall(MatRestoreRowUpperTriangular(X));
203     PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
204     PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
205   } else {
206     Mat B;
207 
208     PetscCall(MatAXPY_Basic_Preallocate(Y, X, &B));
209     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
210     PetscCall(MatHeaderMerge(Y, &B));
211   }
212   PetscFunctionReturn(PETSC_SUCCESS);
213 }
214 
215 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B, Mat Y, PetscScalar a, Mat X, MatStructure str)
216 {
217   PetscInt           i, start, end, j, ncols, m, n;
218   const PetscInt    *row;
219   PetscScalar       *val;
220   const PetscScalar *vals;
221 
222   PetscFunctionBegin;
223   PetscCall(MatGetSize(X, &m, &n));
224   PetscCall(MatGetOwnershipRange(X, &start, &end));
225   PetscCall(MatGetRowUpperTriangular(Y));
226   PetscCall(MatGetRowUpperTriangular(X));
227   if (a == 1.0) {
228     for (i = start; i < end; i++) {
229       PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
230       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
231       PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
232 
233       PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
234       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
235       PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
236     }
237   } else {
238     PetscInt vs = 100;
239     /* realloc if needed, as this function may be used in parallel */
240     PetscCall(PetscMalloc1(vs, &val));
241     for (i = start; i < end; i++) {
242       PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
243       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
244       PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
245 
246       PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
247       if (vs < ncols) {
248         vs = PetscMin(2 * ncols, n);
249         PetscCall(PetscRealloc(vs * sizeof(*val), &val));
250       }
251       for (j = 0; j < ncols; j++) val[j] = a * vals[j];
252       PetscCall(MatSetValues(B, 1, &i, ncols, row, val, ADD_VALUES));
253       PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
254     }
255     PetscCall(PetscFree(val));
256   }
257   PetscCall(MatRestoreRowUpperTriangular(Y));
258   PetscCall(MatRestoreRowUpperTriangular(X));
259   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
260   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 /*@
265   MatShift - Computes `Y =  Y + a I`, where `a` is a `PetscScalar`
266 
267   Neighbor-wise Collective
268 
269   Input Parameters:
270 + Y - the matrix
271 - a - the `PetscScalar`
272 
273   Level: intermediate
274 
275   Notes:
276   If `Y` is a rectangular matrix, the shift is done on the main diagonal of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)
277 
278   If the matrix `Y` is missing some diagonal entries this routine can be very slow. To make it fast one should initially
279   fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
280   entry). No operation is performed when a is zero.
281 
282   To form Y = Y + diag(V) use `MatDiagonalSet()`
283 
284 .seealso: [](ch_matrices), `Mat`, `MatDiagonalSet()`, `MatScale()`, `MatDiagonalScale()`
285  @*/
286 PetscErrorCode MatShift(Mat Y, PetscScalar a)
287 {
288   PetscFunctionBegin;
289   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
290   PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix");
291   PetscCheck(!Y->factortype, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
292   MatCheckPreallocated(Y, 1);
293   if (a == 0.0) PetscFunctionReturn(PETSC_SUCCESS);
294 
295   if (Y->ops->shift) PetscUseTypeMethod(Y, shift, a);
296   else PetscCall(MatShift_Basic(Y, a));
297 
298   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
299   PetscFunctionReturn(PETSC_SUCCESS);
300 }
301 
302 PetscErrorCode MatDiagonalSet_Default(Mat Y, Vec D, InsertMode is)
303 {
304   PetscInt           i, start, end;
305   const PetscScalar *v;
306 
307   PetscFunctionBegin;
308   PetscCall(MatGetOwnershipRange(Y, &start, &end));
309   PetscCall(VecGetArrayRead(D, &v));
310   for (i = start; i < end; i++) PetscCall(MatSetValues(Y, 1, &i, 1, &i, v + i - start, is));
311   PetscCall(VecRestoreArrayRead(D, &v));
312   PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
313   PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
314   PetscFunctionReturn(PETSC_SUCCESS);
315 }
316 
317 /*@
318   MatDiagonalSet - Computes `Y` = `Y` + `D`, where `D` is a diagonal matrix
319   that is represented as a vector. Or Y[i,i] = D[i] if `InsertMode` is
320   `INSERT_VALUES`.
321 
322   Neighbor-wise Collective
323 
324   Input Parameters:
325 + Y  - the input matrix
326 . D  - the diagonal matrix, represented as a vector
327 - is - `INSERT_VALUES` or `ADD_VALUES`
328 
329   Level: intermediate
330 
331   Note:
332   If the matrix `Y` is missing some diagonal entries this routine can be very slow. To make it fast one should initially
333   fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
334   entry).
335 
336 .seealso: [](ch_matrices), `Mat`, `MatShift()`, `MatScale()`, `MatDiagonalScale()`
337 @*/
338 PetscErrorCode MatDiagonalSet(Mat Y, Vec D, InsertMode is)
339 {
340   PetscInt matlocal, veclocal;
341 
342   PetscFunctionBegin;
343   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
344   PetscValidHeaderSpecific(D, VEC_CLASSID, 2);
345   MatCheckPreallocated(Y, 1);
346   PetscCall(MatGetLocalSize(Y, &matlocal, NULL));
347   PetscCall(VecGetLocalSize(D, &veclocal));
348   PetscCheck(matlocal == veclocal, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number local rows of matrix %" PetscInt_FMT " does not match that of vector for diagonal %" PetscInt_FMT, matlocal, veclocal);
349   if (Y->ops->diagonalset) PetscUseTypeMethod(Y, diagonalset, D, is);
350   else PetscCall(MatDiagonalSet_Default(Y, D, is));
351   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
352   PetscFunctionReturn(PETSC_SUCCESS);
353 }
354 
355 /*@
356   MatAYPX - Computes Y = a*Y + X.
357 
358   Logically Collective
359 
360   Input Parameters:
361 + a   - the `PetscScalar` multiplier
362 . Y   - the first matrix
363 . X   - the second matrix
364 - str - either `SAME_NONZERO_PATTERN`, `DIFFERENT_NONZERO_PATTERN`, `UNKNOWN_NONZERO_PATTERN`, or `SUBSET_NONZERO_PATTERN` (nonzeros of `X` is a subset of `Y`'s)
365 
366   Level: intermediate
367 
368 .seealso: [](ch_matrices), `Mat`, `MatAXPY()`
369  @*/
370 PetscErrorCode MatAYPX(Mat Y, PetscScalar a, Mat X, MatStructure str)
371 {
372   PetscFunctionBegin;
373   PetscCall(MatScale(Y, a));
374   PetscCall(MatAXPY(Y, 1.0, X, str));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*@C
379   MatComputeOperator - Computes the explicit matrix
380 
381   Collective
382 
383   Input Parameters:
384 + inmat   - the matrix
385 - mattype - the matrix type for the explicit operator
386 
387   Output Parameter:
388 . mat - the explicit  operator
389 
390   Level: advanced
391 
392   Note:
393   This computation is done by applying the operators to columns of the identity matrix.
394   This routine is costly in general, and is recommended for use only with relatively small systems.
395   Currently, this routine uses a dense matrix format if `mattype` == `NULL`.
396 
397 .seealso: [](ch_matrices), `Mat`, `MatConvert()`, `MatMult()`, `MatComputeOperatorTranspose()`
398 @*/
399 PetscErrorCode MatComputeOperator(Mat inmat, MatType mattype, Mat *mat)
400 {
401   PetscFunctionBegin;
402   PetscValidHeaderSpecific(inmat, MAT_CLASSID, 1);
403   PetscAssertPointer(mat, 3);
404   PetscCall(MatConvert_Shell(inmat, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
405   PetscFunctionReturn(PETSC_SUCCESS);
406 }
407 
408 /*@C
409   MatComputeOperatorTranspose - Computes the explicit matrix representation of
410   a give matrix that can apply `MatMultTranspose()`
411 
412   Collective
413 
414   Input Parameters:
415 + inmat   - the matrix
416 - mattype - the matrix type for the explicit operator
417 
418   Output Parameter:
419 . mat - the explicit  operator transposed
420 
421   Level: advanced
422 
423   Note:
424   This computation is done by applying the transpose of the operator to columns of the identity matrix.
425   This routine is costly in general, and is recommended for use only with relatively small systems.
426   Currently, this routine uses a dense matrix format if `mattype` == `NULL`.
427 
428 .seealso: [](ch_matrices), `Mat`, `MatConvert()`, `MatMult()`, `MatComputeOperator()`
429 @*/
430 PetscErrorCode MatComputeOperatorTranspose(Mat inmat, MatType mattype, Mat *mat)
431 {
432   Mat A;
433 
434   PetscFunctionBegin;
435   PetscValidHeaderSpecific(inmat, MAT_CLASSID, 1);
436   PetscAssertPointer(mat, 3);
437   PetscCall(MatCreateTranspose(inmat, &A));
438   PetscCall(MatConvert_Shell(A, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
439   PetscCall(MatDestroy(&A));
440   PetscFunctionReturn(PETSC_SUCCESS);
441 }
442 
443 /*@
444   MatFilter - Set all values in the matrix with an absolute value less than or equal to the tolerance to zero, and optionally compress the underlying storage
445 
446   Input Parameters:
447 + A        - The matrix
448 . tol      - The zero tolerance
449 . compress - Whether the storage from the input matrix `A` should be compressed once values less than or equal to `tol` are set to zero
450 - keep     - If `compress` is true and for a given row of `A`, the diagonal coefficient is less than or equal to `tol`, indicates whether it should be left in the structure or eliminated as well
451 
452   Level: intermediate
453 
454 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatZeroEntries()`, `MatEliminateZeros()`, `VecFilter()`
455  @*/
456 PetscErrorCode MatFilter(Mat A, PetscReal tol, PetscBool compress, PetscBool keep)
457 {
458   Mat          a;
459   PetscScalar *newVals;
460   PetscInt    *newCols, rStart, rEnd, maxRows, r, colMax = 0, nnz0 = 0, nnz1 = 0;
461   PetscBool    flg;
462 
463   PetscFunctionBegin;
464   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, ""));
465   if (flg) {
466     PetscCall(MatDenseGetLocalMatrix(A, &a));
467     PetscCall(MatDenseGetLDA(a, &r));
468     PetscCall(MatGetSize(a, &rStart, &rEnd));
469     PetscCall(MatDenseGetArray(a, &newVals));
470     for (; colMax < rEnd; ++colMax) {
471       for (maxRows = 0; maxRows < rStart; ++maxRows) newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) <= tol ? 0.0 : newVals[maxRows + colMax * r];
472     }
473     PetscCall(MatDenseRestoreArray(a, &newVals));
474   } else {
475     const PetscInt *ranges;
476     PetscMPIInt     rank, size;
477 
478     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
479     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
480     PetscCall(MatGetOwnershipRanges(A, &ranges));
481     rStart = ranges[rank];
482     rEnd   = ranges[rank + 1];
483     PetscCall(MatGetRowUpperTriangular(A));
484     for (r = rStart; r < rEnd; ++r) {
485       PetscInt ncols;
486 
487       PetscCall(MatGetRow(A, r, &ncols, NULL, NULL));
488       colMax = PetscMax(colMax, ncols);
489       PetscCall(MatRestoreRow(A, r, &ncols, NULL, NULL));
490     }
491     maxRows = 0;
492     for (r = 0; r < size; ++r) maxRows = PetscMax(maxRows, ranges[r + 1] - ranges[r]);
493     PetscCall(PetscCalloc2(colMax, &newCols, colMax, &newVals));
494     PetscCall(MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg)); /* cache user-defined value */
495     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
496     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
497     /* that are potentially called many times depending on the distribution of A */
498     for (r = rStart; r < rStart + maxRows; ++r) {
499       if (r < rEnd) {
500         const PetscScalar *vals;
501         const PetscInt    *cols;
502         PetscInt           ncols, newcols = 0, c;
503 
504         PetscCall(MatGetRow(A, r, &ncols, &cols, &vals));
505         nnz0 += ncols - 1;
506         for (c = 0; c < ncols; ++c) {
507           if (PetscUnlikely(PetscAbsScalar(vals[c]) <= tol)) newCols[newcols++] = cols[c];
508         }
509         nnz1 += ncols - newcols - 1;
510         PetscCall(MatRestoreRow(A, r, &ncols, &cols, &vals));
511         PetscCall(MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES));
512       }
513       PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
514       PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
515     }
516     PetscCall(MatRestoreRowUpperTriangular(A));
517     PetscCall(PetscFree2(newCols, newVals));
518     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg)); /* reset option to its user-defined value */
519     if (nnz0 > 0) PetscCall(PetscInfo(NULL, "Filtering left %g%% edges in graph\n", 100 * (double)nnz1 / (double)nnz0));
520     else PetscCall(PetscInfo(NULL, "Warning: %" PetscInt_FMT " edges to filter with %" PetscInt_FMT " rows\n", nnz0, maxRows));
521   }
522   if (compress && A->ops->eliminatezeros) {
523     Mat       B;
524     PetscBool flg;
525 
526     PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
527     if (!flg) {
528       PetscCall(MatEliminateZeros(A, keep));
529       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
530       PetscCall(MatHeaderReplace(A, &B));
531     }
532   }
533   PetscFunctionReturn(PETSC_SUCCESS);
534 }
535