xref: /petsc/src/mat/utils/axpy.c (revision 9371c9d470a9602b6d10a8bf50c9b2280a79e45a)
1 
2 #include <petsc/private/matimpl.h> /*I   "petscmat.h"  I*/
3 
4 static PetscErrorCode MatTransposeAXPY_Private(Mat Y, PetscScalar a, Mat X, MatStructure str, Mat T) {
5   Mat A, F;
6   PetscErrorCode (*f)(Mat, Mat *);
7 
8   PetscFunctionBegin;
9   PetscCall(PetscObjectQueryFunction((PetscObject)T, "MatTransposeGetMat_C", &f));
10   if (f) {
11     PetscCall(MatTransposeGetMat(T, &A));
12     if (T == X) {
13       PetscCall(PetscInfo(NULL, "Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
14       PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &F));
15       A = Y;
16     } else {
17       PetscCall(PetscInfo(NULL, "Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
18       PetscCall(MatTranspose(X, MAT_INITIAL_MATRIX, &F));
19     }
20   } else {
21     PetscCall(MatHermitianTransposeGetMat(T, &A));
22     if (T == X) {
23       PetscCall(PetscInfo(NULL, "Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
24       PetscCall(MatHermitianTranspose(A, MAT_INITIAL_MATRIX, &F));
25       A = Y;
26     } else {
27       PetscCall(PetscInfo(NULL, "Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
28       PetscCall(MatHermitianTranspose(X, MAT_INITIAL_MATRIX, &F));
29     }
30   }
31   PetscCall(MatAXPY(A, a, F, str));
32   PetscCall(MatDestroy(&F));
33   PetscFunctionReturn(0);
34 }
35 
36 /*@
37    MatAXPY - Computes Y = a*X + Y.
38 
39    Logically  Collective on Mat
40 
41    Input Parameters:
42 +  a - the scalar multiplier
43 .  X - the first matrix
44 .  Y - the second matrix
45 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
46 
47    Notes: No operation is performed when a is zero.
48 
49    Level: intermediate
50 
51 .seealso: `MatAYPX()`
52  @*/
53 PetscErrorCode MatAXPY(Mat Y, PetscScalar a, Mat X, MatStructure str) {
54   PetscInt  M1, M2, N1, N2;
55   PetscInt  m1, m2, n1, n2;
56   MatType   t1, t2;
57   PetscBool sametype, transpose;
58 
59   PetscFunctionBegin;
60   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
61   PetscValidLogicalCollectiveScalar(Y, a, 2);
62   PetscValidHeaderSpecific(X, MAT_CLASSID, 3);
63   PetscCheckSameComm(Y, 1, X, 3);
64   PetscCall(MatGetSize(X, &M1, &N1));
65   PetscCall(MatGetSize(Y, &M2, &N2));
66   PetscCall(MatGetLocalSize(X, &m1, &n1));
67   PetscCall(MatGetLocalSize(Y, &m2, &n2));
68   PetscCheck(M1 == M2 && N1 == N2, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_SIZ, "Non conforming matrix add: global sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, M1, N1, M2, N2);
69   PetscCheck(m1 == m2 && n1 == n2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Non conforming matrix add: local sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, m1, n1, m2, n2);
70   PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (Y)");
71   PetscCheck(X->assembled, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (X)");
72   if (a == (PetscScalar)0.0) PetscFunctionReturn(0);
73   if (Y == X) {
74     PetscCall(MatScale(Y, 1.0 + a));
75     PetscFunctionReturn(0);
76   }
77   PetscCall(MatGetType(X, &t1));
78   PetscCall(MatGetType(Y, &t2));
79   PetscCall(PetscStrcmp(t1, t2, &sametype));
80   PetscCall(PetscLogEventBegin(MAT_AXPY, Y, 0, 0, 0));
81   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
82     PetscUseTypeMethod(Y, axpy, a, X, str);
83   } else {
84     PetscCall(PetscStrcmp(t1, MATTRANSPOSEMAT, &transpose));
85     if (transpose) {
86       PetscCall(MatTransposeAXPY_Private(Y, a, X, str, X));
87     } else {
88       PetscCall(PetscStrcmp(t2, MATTRANSPOSEMAT, &transpose));
89       if (transpose) {
90         PetscCall(MatTransposeAXPY_Private(Y, a, X, str, Y));
91       } else {
92         PetscCall(MatAXPY_Basic(Y, a, X, str));
93       }
94     }
95   }
96   PetscCall(PetscLogEventEnd(MAT_AXPY, Y, 0, 0, 0));
97   PetscFunctionReturn(0);
98 }
99 
100 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B) {
101   PetscErrorCode (*preall)(Mat, Mat, Mat *) = NULL;
102 
103   PetscFunctionBegin;
104   /* look for any available faster alternative to the general preallocator */
105   PetscCall(PetscObjectQueryFunction((PetscObject)Y, "MatAXPYGetPreallocation_C", &preall));
106   if (preall) {
107     PetscCall((*preall)(Y, X, B));
108   } else { /* Use MatPrellocator, assumes same row-col distribution */
109     Mat      preallocator;
110     PetscInt r, rstart, rend;
111     PetscInt m, n, M, N;
112 
113     PetscCall(MatGetRowUpperTriangular(Y));
114     PetscCall(MatGetRowUpperTriangular(X));
115     PetscCall(MatGetSize(Y, &M, &N));
116     PetscCall(MatGetLocalSize(Y, &m, &n));
117     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &preallocator));
118     PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
119     PetscCall(MatSetLayouts(preallocator, Y->rmap, Y->cmap));
120     PetscCall(MatSetUp(preallocator));
121     PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
122     for (r = rstart; r < rend; ++r) {
123       PetscInt           ncols;
124       const PetscInt    *row;
125       const PetscScalar *vals;
126 
127       PetscCall(MatGetRow(Y, r, &ncols, &row, &vals));
128       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
129       PetscCall(MatRestoreRow(Y, r, &ncols, &row, &vals));
130       PetscCall(MatGetRow(X, r, &ncols, &row, &vals));
131       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
132       PetscCall(MatRestoreRow(X, r, &ncols, &row, &vals));
133     }
134     PetscCall(MatSetOption(preallocator, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
135     PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
136     PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
137     PetscCall(MatRestoreRowUpperTriangular(Y));
138     PetscCall(MatRestoreRowUpperTriangular(X));
139 
140     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), B));
141     PetscCall(MatSetType(*B, ((PetscObject)Y)->type_name));
142     PetscCall(MatSetLayouts(*B, Y->rmap, Y->cmap));
143     PetscCall(MatPreallocatorPreallocate(preallocator, PETSC_FALSE, *B));
144     PetscCall(MatDestroy(&preallocator));
145   }
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatAXPY_Basic(Mat Y, PetscScalar a, Mat X, MatStructure str) {
150   PetscBool isshell, isdense, isnest;
151 
152   PetscFunctionBegin;
153   PetscCall(MatIsShell(Y, &isshell));
154   if (isshell) { /* MatShell has special support for AXPY */
155     PetscErrorCode (*f)(Mat, PetscScalar, Mat, MatStructure);
156 
157     PetscCall(MatGetOperation(Y, MATOP_AXPY, (void (**)(void)) & f));
158     if (f) {
159       PetscCall((*f)(Y, a, X, str));
160       PetscFunctionReturn(0);
161     }
162   }
163   /* no need to preallocate if Y is dense */
164   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)Y, &isdense, MATSEQDENSE, MATMPIDENSE, ""));
165   if (isdense) {
166     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATNEST, &isnest));
167     if (isnest) {
168       PetscCall(MatAXPY_Dense_Nest(Y, a, X));
169       PetscFunctionReturn(0);
170     }
171     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
172   }
173   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
174     PetscInt           i, start, end, j, ncols, m, n;
175     const PetscInt    *row;
176     PetscScalar       *val;
177     const PetscScalar *vals;
178 
179     PetscCall(MatGetSize(X, &m, &n));
180     PetscCall(MatGetOwnershipRange(X, &start, &end));
181     PetscCall(MatGetRowUpperTriangular(X));
182     if (a == 1.0) {
183       for (i = start; i < end; i++) {
184         PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
185         PetscCall(MatSetValues(Y, 1, &i, ncols, row, vals, ADD_VALUES));
186         PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
187       }
188     } else {
189       PetscInt vs = 100;
190       /* realloc if needed, as this function may be used in parallel */
191       PetscCall(PetscMalloc1(vs, &val));
192       for (i = start; i < end; i++) {
193         PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
194         if (vs < ncols) {
195           vs = PetscMin(2 * ncols, n);
196           PetscCall(PetscRealloc(vs * sizeof(*val), &val));
197         }
198         for (j = 0; j < ncols; j++) val[j] = a * vals[j];
199         PetscCall(MatSetValues(Y, 1, &i, ncols, row, val, ADD_VALUES));
200         PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
201       }
202       PetscCall(PetscFree(val));
203     }
204     PetscCall(MatRestoreRowUpperTriangular(X));
205     PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
206     PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
207   } else {
208     Mat B;
209 
210     PetscCall(MatAXPY_Basic_Preallocate(Y, X, &B));
211     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
212     PetscCall(MatHeaderMerge(Y, &B));
213   }
214   PetscFunctionReturn(0);
215 }
216 
217 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B, Mat Y, PetscScalar a, Mat X, MatStructure str) {
218   PetscInt           i, start, end, j, ncols, m, n;
219   const PetscInt    *row;
220   PetscScalar       *val;
221   const PetscScalar *vals;
222 
223   PetscFunctionBegin;
224   PetscCall(MatGetSize(X, &m, &n));
225   PetscCall(MatGetOwnershipRange(X, &start, &end));
226   PetscCall(MatGetRowUpperTriangular(Y));
227   PetscCall(MatGetRowUpperTriangular(X));
228   if (a == 1.0) {
229     for (i = start; i < end; i++) {
230       PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
231       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
232       PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
233 
234       PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
235       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
236       PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
237     }
238   } else {
239     PetscInt vs = 100;
240     /* realloc if needed, as this function may be used in parallel */
241     PetscCall(PetscMalloc1(vs, &val));
242     for (i = start; i < end; i++) {
243       PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
244       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
245       PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
246 
247       PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
248       if (vs < ncols) {
249         vs = PetscMin(2 * ncols, n);
250         PetscCall(PetscRealloc(vs * sizeof(*val), &val));
251       }
252       for (j = 0; j < ncols; j++) val[j] = a * vals[j];
253       PetscCall(MatSetValues(B, 1, &i, ncols, row, val, ADD_VALUES));
254       PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
255     }
256     PetscCall(PetscFree(val));
257   }
258   PetscCall(MatRestoreRowUpperTriangular(Y));
259   PetscCall(MatRestoreRowUpperTriangular(X));
260   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
261   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
262   PetscFunctionReturn(0);
263 }
264 
265 /*@
266    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
267 
268    Neighbor-wise Collective on Mat
269 
270    Input Parameters:
271 +  Y - the matrices
272 -  a - the PetscScalar
273 
274    Level: intermediate
275 
276    Notes:
277     If Y is a rectangular matrix, the shift is done on the main diagonal Y_{ii} of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)
278 
279     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
280    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
281    entry). No operation is performed when a is zero.
282 
283    To form Y = Y + diag(V) use MatDiagonalSet()
284 
285 .seealso: `MatDiagonalSet()`, `MatScale()`, `MatDiagonalScale()`
286  @*/
287 PetscErrorCode MatShift(Mat Y, PetscScalar a) {
288   PetscFunctionBegin;
289   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
290   PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix");
291   PetscCheck(!Y->factortype, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
292   MatCheckPreallocated(Y, 1);
293   if (a == 0.0) PetscFunctionReturn(0);
294 
295   if (Y->ops->shift) PetscUseTypeMethod(Y, shift, a);
296   else PetscCall(MatShift_Basic(Y, a));
297 
298   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
299   PetscFunctionReturn(0);
300 }
301 
302 PetscErrorCode MatDiagonalSet_Default(Mat Y, Vec D, InsertMode is) {
303   PetscInt           i, start, end;
304   const PetscScalar *v;
305 
306   PetscFunctionBegin;
307   PetscCall(MatGetOwnershipRange(Y, &start, &end));
308   PetscCall(VecGetArrayRead(D, &v));
309   for (i = start; i < end; i++) { PetscCall(MatSetValues(Y, 1, &i, 1, &i, v + i - start, is)); }
310   PetscCall(VecRestoreArrayRead(D, &v));
311   PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
312   PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
313   PetscFunctionReturn(0);
314 }
315 
316 /*@
317    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
318    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
319    INSERT_VALUES.
320 
321    Neighbor-wise Collective on Mat
322 
323    Input Parameters:
324 +  Y - the input matrix
325 .  D - the diagonal matrix, represented as a vector
326 -  i - INSERT_VALUES or ADD_VALUES
327 
328    Notes:
329     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
330    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
331    entry).
332 
333    Level: intermediate
334 
335 .seealso: `MatShift()`, `MatScale()`, `MatDiagonalScale()`
336 @*/
337 PetscErrorCode MatDiagonalSet(Mat Y, Vec D, InsertMode is) {
338   PetscInt matlocal, veclocal;
339 
340   PetscFunctionBegin;
341   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
342   PetscValidHeaderSpecific(D, VEC_CLASSID, 2);
343   PetscCall(MatGetLocalSize(Y, &matlocal, NULL));
344   PetscCall(VecGetLocalSize(D, &veclocal));
345   PetscCheck(matlocal == veclocal, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number local rows of matrix %" PetscInt_FMT " does not match that of vector for diagonal %" PetscInt_FMT, matlocal, veclocal);
346   if (Y->ops->diagonalset) PetscUseTypeMethod(Y, diagonalset, D, is);
347   else PetscCall(MatDiagonalSet_Default(Y, D, is));
348   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
349   PetscFunctionReturn(0);
350 }
351 
352 /*@
353    MatAYPX - Computes Y = a*Y + X.
354 
355    Logically on Mat
356 
357    Input Parameters:
358 +  a - the PetscScalar multiplier
359 .  Y - the first matrix
360 .  X - the second matrix
361 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
362 
363    Level: intermediate
364 
365 .seealso: `MatAXPY()`
366  @*/
367 PetscErrorCode MatAYPX(Mat Y, PetscScalar a, Mat X, MatStructure str) {
368   PetscFunctionBegin;
369   PetscCall(MatScale(Y, a));
370   PetscCall(MatAXPY(Y, 1.0, X, str));
371   PetscFunctionReturn(0);
372 }
373 
374 /*@
375     MatComputeOperator - Computes the explicit matrix
376 
377     Collective on Mat
378 
379     Input Parameters:
380 +   inmat - the matrix
381 -   mattype - the matrix type for the explicit operator
382 
383     Output Parameter:
384 .   mat - the explicit  operator
385 
386     Notes:
387     This computation is done by applying the operators to columns of the identity matrix.
388     This routine is costly in general, and is recommended for use only with relatively small systems.
389     Currently, this routine uses a dense matrix format if mattype == NULL.
390 
391     Level: advanced
392 
393 @*/
394 PetscErrorCode MatComputeOperator(Mat inmat, MatType mattype, Mat *mat) {
395   PetscFunctionBegin;
396   PetscValidHeaderSpecific(inmat, MAT_CLASSID, 1);
397   PetscValidPointer(mat, 3);
398   PetscCall(MatConvert_Shell(inmat, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
399   PetscFunctionReturn(0);
400 }
401 
402 /*@
403     MatComputeOperatorTranspose - Computes the explicit matrix representation of
404         a give matrix that can apply MatMultTranspose()
405 
406     Collective on Mat
407 
408     Input Parameters:
409 +   inmat - the matrix
410 -   mattype - the matrix type for the explicit operator
411 
412     Output Parameter:
413 .   mat - the explicit  operator transposed
414 
415     Notes:
416     This computation is done by applying the transpose of the operator to columns of the identity matrix.
417     This routine is costly in general, and is recommended for use only with relatively small systems.
418     Currently, this routine uses a dense matrix format if mattype == NULL.
419 
420     Level: advanced
421 
422 @*/
423 PetscErrorCode MatComputeOperatorTranspose(Mat inmat, MatType mattype, Mat *mat) {
424   Mat A;
425 
426   PetscFunctionBegin;
427   PetscValidHeaderSpecific(inmat, MAT_CLASSID, 1);
428   PetscValidPointer(mat, 3);
429   PetscCall(MatCreateTranspose(inmat, &A));
430   PetscCall(MatConvert_Shell(A, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
431   PetscCall(MatDestroy(&A));
432   PetscFunctionReturn(0);
433 }
434 
435 /*@
436   MatChop - Set all values in the matrix less than the tolerance to zero
437 
438   Input Parameters:
439 + A   - The matrix
440 - tol - The zero tolerance
441 
442   Output Parameters:
443 . A - The chopped matrix
444 
445   Level: intermediate
446 
447 .seealso: `MatCreate()`, `MatZeroEntries()`
448  @*/
449 PetscErrorCode MatChop(Mat A, PetscReal tol) {
450   Mat          a;
451   PetscScalar *newVals;
452   PetscInt    *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
453   PetscBool    flg;
454 
455   PetscFunctionBegin;
456   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, ""));
457   if (flg) {
458     PetscCall(MatDenseGetLocalMatrix(A, &a));
459     PetscCall(MatDenseGetLDA(a, &r));
460     PetscCall(MatGetSize(a, &rStart, &rEnd));
461     PetscCall(MatDenseGetArray(a, &newVals));
462     for (; colMax < rEnd; ++colMax) {
463       for (maxRows = 0; maxRows < rStart; ++maxRows) { newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r]; }
464     }
465     PetscCall(MatDenseRestoreArray(a, &newVals));
466   } else {
467     PetscCall(MatGetOwnershipRange(A, &rStart, &rEnd));
468     PetscCall(MatGetRowUpperTriangular(A));
469     for (r = rStart; r < rEnd; ++r) {
470       PetscInt ncols;
471 
472       PetscCall(MatGetRow(A, r, &ncols, NULL, NULL));
473       colMax = PetscMax(colMax, ncols);
474       PetscCall(MatRestoreRow(A, r, &ncols, NULL, NULL));
475     }
476     numRows = rEnd - rStart;
477     PetscCall(MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A)));
478     PetscCall(PetscMalloc2(colMax, &newCols, colMax, &newVals));
479     PetscCall(MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg)); /* cache user-defined value */
480     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
481     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
482     /* that are potentially called many times depending on the distribution of A */
483     for (r = rStart; r < rStart + maxRows; ++r) {
484       const PetscScalar *vals;
485       const PetscInt    *cols;
486       PetscInt           ncols, newcols, c;
487 
488       if (r < rEnd) {
489         PetscCall(MatGetRow(A, r, &ncols, &cols, &vals));
490         for (c = 0; c < ncols; ++c) {
491           newCols[c] = cols[c];
492           newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
493         }
494         newcols = ncols;
495         PetscCall(MatRestoreRow(A, r, &ncols, &cols, &vals));
496         PetscCall(MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES));
497       }
498       PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
499       PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
500     }
501     PetscCall(MatRestoreRowUpperTriangular(A));
502     PetscCall(PetscFree2(newCols, newVals));
503     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg)); /* reset option to its user-defined value */
504   }
505   PetscFunctionReturn(0);
506 }
507