xref: /petsc/src/mat/utils/axpy.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928) !
1 
2 #include <petsc/private/matimpl.h> /*I   "petscmat.h"  I*/
3 
4 static PetscErrorCode MatTransposeAXPY_Private(Mat Y, PetscScalar a, Mat X, MatStructure str, Mat T) {
5   Mat A, F;
6   PetscErrorCode (*f)(Mat, Mat *);
7 
8   PetscFunctionBegin;
9   PetscCall(PetscObjectQueryFunction((PetscObject)T, "MatTransposeGetMat_C", &f));
10   if (f) {
11     PetscCall(MatTransposeGetMat(T, &A));
12     if (T == X) {
13       PetscCall(PetscInfo(NULL, "Explicitly transposing X of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
14       PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &F));
15       A = Y;
16     } else {
17       PetscCall(PetscInfo(NULL, "Transposing X because Y of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
18       PetscCall(MatTranspose(X, MAT_INITIAL_MATRIX, &F));
19     }
20   } else {
21     PetscCall(MatHermitianTransposeGetMat(T, &A));
22     if (T == X) {
23       PetscCall(PetscInfo(NULL, "Explicitly Hermitian transposing X of type MATHERITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
24       PetscCall(MatHermitianTranspose(A, MAT_INITIAL_MATRIX, &F));
25       A = Y;
26     } else {
27       PetscCall(PetscInfo(NULL, "Hermitian transposing X because Y of type MATHERITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
28       PetscCall(MatHermitianTranspose(X, MAT_INITIAL_MATRIX, &F));
29     }
30   }
31   PetscCall(MatAXPY(A, a, F, str));
32   PetscCall(MatDestroy(&F));
33   PetscFunctionReturn(0);
34 }
35 
36 /*@
37    MatAXPY - Computes Y = a*X + Y.
38 
39    Logically  Collective on Mat
40 
41    Input Parameters:
42 +  a - the scalar multiplier
43 .  X - the first matrix
44 .  Y - the second matrix
45 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
46 
47    Note:
48    No operation is performed when a is zero.
49 
50    Level: intermediate
51 
52 .seealso: `MatAYPX()`
53  @*/
54 PetscErrorCode MatAXPY(Mat Y, PetscScalar a, Mat X, MatStructure str) {
55   PetscInt  M1, M2, N1, N2;
56   PetscInt  m1, m2, n1, n2;
57   PetscBool sametype, transpose;
58 
59   PetscFunctionBegin;
60   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
61   PetscValidLogicalCollectiveScalar(Y, a, 2);
62   PetscValidHeaderSpecific(X, MAT_CLASSID, 3);
63   PetscCheckSameComm(Y, 1, X, 3);
64   PetscCall(MatGetSize(X, &M1, &N1));
65   PetscCall(MatGetSize(Y, &M2, &N2));
66   PetscCall(MatGetLocalSize(X, &m1, &n1));
67   PetscCall(MatGetLocalSize(Y, &m2, &n2));
68   PetscCheck(M1 == M2 && N1 == N2, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_SIZ, "Non conforming matrix add: global sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, M1, N1, M2, N2);
69   PetscCheck(m1 == m2 && n1 == n2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Non conforming matrix add: local sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, m1, n1, m2, n2);
70   PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (Y)");
71   PetscCheck(X->assembled, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (X)");
72   if (a == (PetscScalar)0.0) PetscFunctionReturn(0);
73   if (Y == X) {
74     PetscCall(MatScale(Y, 1.0 + a));
75     PetscFunctionReturn(0);
76   }
77   PetscCall(PetscObjectObjectTypeCompare((PetscObject)X, (PetscObject)Y, &sametype));
78   PetscCall(PetscLogEventBegin(MAT_AXPY, Y, 0, 0, 0));
79   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
80     PetscUseTypeMethod(Y, axpy, a, X, str);
81   } else {
82     PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
83     if (transpose) {
84       PetscCall(MatTransposeAXPY_Private(Y, a, X, str, X));
85     } else {
86       PetscCall(PetscObjectTypeCompareAny((PetscObject)Y, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
87       if (transpose) {
88         PetscCall(MatTransposeAXPY_Private(Y, a, X, str, Y));
89       } else {
90         PetscCall(MatAXPY_Basic(Y, a, X, str));
91       }
92     }
93   }
94   PetscCall(PetscLogEventEnd(MAT_AXPY, Y, 0, 0, 0));
95   PetscFunctionReturn(0);
96 }
97 
98 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B) {
99   PetscErrorCode (*preall)(Mat, Mat, Mat *) = NULL;
100 
101   PetscFunctionBegin;
102   /* look for any available faster alternative to the general preallocator */
103   PetscCall(PetscObjectQueryFunction((PetscObject)Y, "MatAXPYGetPreallocation_C", &preall));
104   if (preall) {
105     PetscCall((*preall)(Y, X, B));
106   } else { /* Use MatPrellocator, assumes same row-col distribution */
107     Mat      preallocator;
108     PetscInt r, rstart, rend;
109     PetscInt m, n, M, N;
110 
111     PetscCall(MatGetRowUpperTriangular(Y));
112     PetscCall(MatGetRowUpperTriangular(X));
113     PetscCall(MatGetSize(Y, &M, &N));
114     PetscCall(MatGetLocalSize(Y, &m, &n));
115     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &preallocator));
116     PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
117     PetscCall(MatSetLayouts(preallocator, Y->rmap, Y->cmap));
118     PetscCall(MatSetUp(preallocator));
119     PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
120     for (r = rstart; r < rend; ++r) {
121       PetscInt           ncols;
122       const PetscInt    *row;
123       const PetscScalar *vals;
124 
125       PetscCall(MatGetRow(Y, r, &ncols, &row, &vals));
126       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
127       PetscCall(MatRestoreRow(Y, r, &ncols, &row, &vals));
128       PetscCall(MatGetRow(X, r, &ncols, &row, &vals));
129       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
130       PetscCall(MatRestoreRow(X, r, &ncols, &row, &vals));
131     }
132     PetscCall(MatSetOption(preallocator, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
133     PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
134     PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
135     PetscCall(MatRestoreRowUpperTriangular(Y));
136     PetscCall(MatRestoreRowUpperTriangular(X));
137 
138     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), B));
139     PetscCall(MatSetType(*B, ((PetscObject)Y)->type_name));
140     PetscCall(MatSetLayouts(*B, Y->rmap, Y->cmap));
141     PetscCall(MatPreallocatorPreallocate(preallocator, PETSC_FALSE, *B));
142     PetscCall(MatDestroy(&preallocator));
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 PetscErrorCode MatAXPY_Basic(Mat Y, PetscScalar a, Mat X, MatStructure str) {
148   PetscBool isshell, isdense, isnest;
149 
150   PetscFunctionBegin;
151   PetscCall(MatIsShell(Y, &isshell));
152   if (isshell) { /* MatShell has special support for AXPY */
153     PetscErrorCode (*f)(Mat, PetscScalar, Mat, MatStructure);
154 
155     PetscCall(MatGetOperation(Y, MATOP_AXPY, (void (**)(void)) & f));
156     if (f) {
157       PetscCall((*f)(Y, a, X, str));
158       PetscFunctionReturn(0);
159     }
160   }
161   /* no need to preallocate if Y is dense */
162   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)Y, &isdense, MATSEQDENSE, MATMPIDENSE, ""));
163   if (isdense) {
164     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATNEST, &isnest));
165     if (isnest) {
166       PetscCall(MatAXPY_Dense_Nest(Y, a, X));
167       PetscFunctionReturn(0);
168     }
169     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
170   }
171   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
172     PetscInt           i, start, end, j, ncols, m, n;
173     const PetscInt    *row;
174     PetscScalar       *val;
175     const PetscScalar *vals;
176 
177     PetscCall(MatGetSize(X, &m, &n));
178     PetscCall(MatGetOwnershipRange(X, &start, &end));
179     PetscCall(MatGetRowUpperTriangular(X));
180     if (a == 1.0) {
181       for (i = start; i < end; i++) {
182         PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
183         PetscCall(MatSetValues(Y, 1, &i, ncols, row, vals, ADD_VALUES));
184         PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
185       }
186     } else {
187       PetscInt vs = 100;
188       /* realloc if needed, as this function may be used in parallel */
189       PetscCall(PetscMalloc1(vs, &val));
190       for (i = start; i < end; i++) {
191         PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
192         if (vs < ncols) {
193           vs = PetscMin(2 * ncols, n);
194           PetscCall(PetscRealloc(vs * sizeof(*val), &val));
195         }
196         for (j = 0; j < ncols; j++) val[j] = a * vals[j];
197         PetscCall(MatSetValues(Y, 1, &i, ncols, row, val, ADD_VALUES));
198         PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
199       }
200       PetscCall(PetscFree(val));
201     }
202     PetscCall(MatRestoreRowUpperTriangular(X));
203     PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
204     PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
205   } else {
206     Mat B;
207 
208     PetscCall(MatAXPY_Basic_Preallocate(Y, X, &B));
209     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
210     PetscCall(MatHeaderMerge(Y, &B));
211   }
212   PetscFunctionReturn(0);
213 }
214 
215 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B, Mat Y, PetscScalar a, Mat X, MatStructure str) {
216   PetscInt           i, start, end, j, ncols, m, n;
217   const PetscInt    *row;
218   PetscScalar       *val;
219   const PetscScalar *vals;
220 
221   PetscFunctionBegin;
222   PetscCall(MatGetSize(X, &m, &n));
223   PetscCall(MatGetOwnershipRange(X, &start, &end));
224   PetscCall(MatGetRowUpperTriangular(Y));
225   PetscCall(MatGetRowUpperTriangular(X));
226   if (a == 1.0) {
227     for (i = start; i < end; i++) {
228       PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
229       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
230       PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
231 
232       PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
233       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
234       PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
235     }
236   } else {
237     PetscInt vs = 100;
238     /* realloc if needed, as this function may be used in parallel */
239     PetscCall(PetscMalloc1(vs, &val));
240     for (i = start; i < end; i++) {
241       PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
242       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
243       PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
244 
245       PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
246       if (vs < ncols) {
247         vs = PetscMin(2 * ncols, n);
248         PetscCall(PetscRealloc(vs * sizeof(*val), &val));
249       }
250       for (j = 0; j < ncols; j++) val[j] = a * vals[j];
251       PetscCall(MatSetValues(B, 1, &i, ncols, row, val, ADD_VALUES));
252       PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
253     }
254     PetscCall(PetscFree(val));
255   }
256   PetscCall(MatRestoreRowUpperTriangular(Y));
257   PetscCall(MatRestoreRowUpperTriangular(X));
258   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
259   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
260   PetscFunctionReturn(0);
261 }
262 
263 /*@
264    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
265 
266    Neighbor-wise Collective on Mat
267 
268    Input Parameters:
269 +  Y - the matrices
270 -  a - the PetscScalar
271 
272    Level: intermediate
273 
274    Notes:
275     If Y is a rectangular matrix, the shift is done on the main diagonal Y_{ii} of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)
276 
277     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
278    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
279    entry). No operation is performed when a is zero.
280 
281    To form Y = Y + diag(V) use MatDiagonalSet()
282 
283 .seealso: `MatDiagonalSet()`, `MatScale()`, `MatDiagonalScale()`
284  @*/
285 PetscErrorCode MatShift(Mat Y, PetscScalar a) {
286   PetscFunctionBegin;
287   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
288   PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix");
289   PetscCheck(!Y->factortype, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
290   MatCheckPreallocated(Y, 1);
291   if (a == 0.0) PetscFunctionReturn(0);
292 
293   if (Y->ops->shift) PetscUseTypeMethod(Y, shift, a);
294   else PetscCall(MatShift_Basic(Y, a));
295 
296   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
297   PetscFunctionReturn(0);
298 }
299 
300 PetscErrorCode MatDiagonalSet_Default(Mat Y, Vec D, InsertMode is) {
301   PetscInt           i, start, end;
302   const PetscScalar *v;
303 
304   PetscFunctionBegin;
305   PetscCall(MatGetOwnershipRange(Y, &start, &end));
306   PetscCall(VecGetArrayRead(D, &v));
307   for (i = start; i < end; i++) PetscCall(MatSetValues(Y, 1, &i, 1, &i, v + i - start, is));
308   PetscCall(VecRestoreArrayRead(D, &v));
309   PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
310   PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
311   PetscFunctionReturn(0);
312 }
313 
314 /*@
315    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
316    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
317    INSERT_VALUES.
318 
319    Neighbor-wise Collective on Mat
320 
321    Input Parameters:
322 +  Y - the input matrix
323 .  D - the diagonal matrix, represented as a vector
324 -  i - INSERT_VALUES or ADD_VALUES
325 
326    Note:
327     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
328    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
329    entry).
330 
331    Level: intermediate
332 
333 .seealso: `MatShift()`, `MatScale()`, `MatDiagonalScale()`
334 @*/
335 PetscErrorCode MatDiagonalSet(Mat Y, Vec D, InsertMode is) {
336   PetscInt matlocal, veclocal;
337 
338   PetscFunctionBegin;
339   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
340   PetscValidHeaderSpecific(D, VEC_CLASSID, 2);
341   PetscCall(MatGetLocalSize(Y, &matlocal, NULL));
342   PetscCall(VecGetLocalSize(D, &veclocal));
343   PetscCheck(matlocal == veclocal, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number local rows of matrix %" PetscInt_FMT " does not match that of vector for diagonal %" PetscInt_FMT, matlocal, veclocal);
344   if (Y->ops->diagonalset) PetscUseTypeMethod(Y, diagonalset, D, is);
345   else PetscCall(MatDiagonalSet_Default(Y, D, is));
346   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
347   PetscFunctionReturn(0);
348 }
349 
350 /*@
351    MatAYPX - Computes Y = a*Y + X.
352 
353    Logically on Mat
354 
355    Input Parameters:
356 +  a - the PetscScalar multiplier
357 .  Y - the first matrix
358 .  X - the second matrix
359 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
360 
361    Level: intermediate
362 
363 .seealso: `MatAXPY()`
364  @*/
365 PetscErrorCode MatAYPX(Mat Y, PetscScalar a, Mat X, MatStructure str) {
366   PetscFunctionBegin;
367   PetscCall(MatScale(Y, a));
368   PetscCall(MatAXPY(Y, 1.0, X, str));
369   PetscFunctionReturn(0);
370 }
371 
372 /*@
373     MatComputeOperator - Computes the explicit matrix
374 
375     Collective on Mat
376 
377     Input Parameters:
378 +   inmat - the matrix
379 -   mattype - the matrix type for the explicit operator
380 
381     Output Parameter:
382 .   mat - the explicit  operator
383 
384     Note:
385     This computation is done by applying the operators to columns of the identity matrix.
386     This routine is costly in general, and is recommended for use only with relatively small systems.
387     Currently, this routine uses a dense matrix format if mattype == NULL.
388 
389     Level: advanced
390 
391 @*/
392 PetscErrorCode MatComputeOperator(Mat inmat, MatType mattype, Mat *mat) {
393   PetscFunctionBegin;
394   PetscValidHeaderSpecific(inmat, MAT_CLASSID, 1);
395   PetscValidPointer(mat, 3);
396   PetscCall(MatConvert_Shell(inmat, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
397   PetscFunctionReturn(0);
398 }
399 
400 /*@
401     MatComputeOperatorTranspose - Computes the explicit matrix representation of
402         a give matrix that can apply MatMultTranspose()
403 
404     Collective on Mat
405 
406     Input Parameters:
407 +   inmat - the matrix
408 -   mattype - the matrix type for the explicit operator
409 
410     Output Parameter:
411 .   mat - the explicit  operator transposed
412 
413     Note:
414     This computation is done by applying the transpose of the operator to columns of the identity matrix.
415     This routine is costly in general, and is recommended for use only with relatively small systems.
416     Currently, this routine uses a dense matrix format if mattype == NULL.
417 
418     Level: advanced
419 
420 @*/
421 PetscErrorCode MatComputeOperatorTranspose(Mat inmat, MatType mattype, Mat *mat) {
422   Mat A;
423 
424   PetscFunctionBegin;
425   PetscValidHeaderSpecific(inmat, MAT_CLASSID, 1);
426   PetscValidPointer(mat, 3);
427   PetscCall(MatCreateTranspose(inmat, &A));
428   PetscCall(MatConvert_Shell(A, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
429   PetscCall(MatDestroy(&A));
430   PetscFunctionReturn(0);
431 }
432 
433 /*@
434   MatChop - Set all values in the matrix less than the tolerance to zero
435 
436   Input Parameters:
437 + A   - The matrix
438 - tol - The zero tolerance
439 
440   Output Parameters:
441 . A - The chopped matrix
442 
443   Level: intermediate
444 
445 .seealso: `MatCreate()`, `MatZeroEntries()`
446  @*/
447 PetscErrorCode MatChop(Mat A, PetscReal tol) {
448   Mat          a;
449   PetscScalar *newVals;
450   PetscInt    *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
451   PetscBool    flg;
452 
453   PetscFunctionBegin;
454   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, ""));
455   if (flg) {
456     PetscCall(MatDenseGetLocalMatrix(A, &a));
457     PetscCall(MatDenseGetLDA(a, &r));
458     PetscCall(MatGetSize(a, &rStart, &rEnd));
459     PetscCall(MatDenseGetArray(a, &newVals));
460     for (; colMax < rEnd; ++colMax) {
461       for (maxRows = 0; maxRows < rStart; ++maxRows) newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
462     }
463     PetscCall(MatDenseRestoreArray(a, &newVals));
464   } else {
465     PetscCall(MatGetOwnershipRange(A, &rStart, &rEnd));
466     PetscCall(MatGetRowUpperTriangular(A));
467     for (r = rStart; r < rEnd; ++r) {
468       PetscInt ncols;
469 
470       PetscCall(MatGetRow(A, r, &ncols, NULL, NULL));
471       colMax = PetscMax(colMax, ncols);
472       PetscCall(MatRestoreRow(A, r, &ncols, NULL, NULL));
473     }
474     numRows = rEnd - rStart;
475     PetscCall(MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A)));
476     PetscCall(PetscMalloc2(colMax, &newCols, colMax, &newVals));
477     PetscCall(MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg)); /* cache user-defined value */
478     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
479     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
480     /* that are potentially called many times depending on the distribution of A */
481     for (r = rStart; r < rStart + maxRows; ++r) {
482       const PetscScalar *vals;
483       const PetscInt    *cols;
484       PetscInt           ncols, newcols, c;
485 
486       if (r < rEnd) {
487         PetscCall(MatGetRow(A, r, &ncols, &cols, &vals));
488         for (c = 0; c < ncols; ++c) {
489           newCols[c] = cols[c];
490           newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
491         }
492         newcols = ncols;
493         PetscCall(MatRestoreRow(A, r, &ncols, &cols, &vals));
494         PetscCall(MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES));
495       }
496       PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
497       PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
498     }
499     PetscCall(MatRestoreRowUpperTriangular(A));
500     PetscCall(PetscFree2(newCols, newVals));
501     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg)); /* reset option to its user-defined value */
502   }
503   PetscFunctionReturn(0);
504 }
505