xref: /petsc/src/mat/utils/axpy.c (revision 37d05b0256c1e9ba4bc423c4eccb3df226931ef0)
1 
2 #include <petsc/private/matimpl.h> /*I   "petscmat.h"  I*/
3 
4 static PetscErrorCode MatTransposeAXPY_Private(Mat Y, PetscScalar a, Mat X, MatStructure str, Mat T)
5 {
6   Mat A, F;
7   PetscErrorCode (*f)(Mat, Mat *);
8 
9   PetscFunctionBegin;
10   PetscCall(PetscObjectQueryFunction((PetscObject)T, "MatTransposeGetMat_C", &f));
11   if (f) {
12     PetscCall(MatTransposeGetMat(T, &A));
13     if (T == X) {
14       PetscCall(PetscInfo(NULL, "Explicitly transposing X of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
15       PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &F));
16       A = Y;
17     } else {
18       PetscCall(PetscInfo(NULL, "Transposing X because Y of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
19       PetscCall(MatTranspose(X, MAT_INITIAL_MATRIX, &F));
20     }
21   } else {
22     PetscCall(MatHermitianTransposeGetMat(T, &A));
23     if (T == X) {
24       PetscCall(PetscInfo(NULL, "Explicitly Hermitian transposing X of type MATHERITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
25       PetscCall(MatHermitianTranspose(A, MAT_INITIAL_MATRIX, &F));
26       A = Y;
27     } else {
28       PetscCall(PetscInfo(NULL, "Hermitian transposing X because Y of type MATHERITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
29       PetscCall(MatHermitianTranspose(X, MAT_INITIAL_MATRIX, &F));
30     }
31   }
32   PetscCall(MatAXPY(A, a, F, str));
33   PetscCall(MatDestroy(&F));
34   PetscFunctionReturn(PETSC_SUCCESS);
35 }
36 
37 /*@
38    MatAXPY - Computes Y = a*X + Y.
39 
40    Logically  Collective
41 
42    Input Parameters:
43 +  a - the scalar multiplier
44 .  X - the first matrix
45 .  Y - the second matrix
46 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
47 
48    Note:
49    No operation is performed when a is zero.
50 
51    Level: intermediate
52 
53 .seealso: `MatAYPX()`
54  @*/
55 PetscErrorCode MatAXPY(Mat Y, PetscScalar a, Mat X, MatStructure str)
56 {
57   PetscInt  M1, M2, N1, N2;
58   PetscInt  m1, m2, n1, n2;
59   PetscBool sametype, transpose;
60 
61   PetscFunctionBegin;
62   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
63   PetscValidLogicalCollectiveScalar(Y, a, 2);
64   PetscValidHeaderSpecific(X, MAT_CLASSID, 3);
65   PetscCheckSameComm(Y, 1, X, 3);
66   PetscCall(MatGetSize(X, &M1, &N1));
67   PetscCall(MatGetSize(Y, &M2, &N2));
68   PetscCall(MatGetLocalSize(X, &m1, &n1));
69   PetscCall(MatGetLocalSize(Y, &m2, &n2));
70   PetscCheck(M1 == M2 && N1 == N2, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_SIZ, "Non conforming matrix add: global sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, M1, N1, M2, N2);
71   PetscCheck(m1 == m2 && n1 == n2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Non conforming matrix add: local sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, m1, n1, m2, n2);
72   PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (Y)");
73   PetscCheck(X->assembled, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (X)");
74   if (a == (PetscScalar)0.0) PetscFunctionReturn(PETSC_SUCCESS);
75   if (Y == X) {
76     PetscCall(MatScale(Y, 1.0 + a));
77     PetscFunctionReturn(PETSC_SUCCESS);
78   }
79   PetscCall(PetscObjectObjectTypeCompare((PetscObject)X, (PetscObject)Y, &sametype));
80   PetscCall(PetscLogEventBegin(MAT_AXPY, Y, 0, 0, 0));
81   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
82     PetscUseTypeMethod(Y, axpy, a, X, str);
83   } else {
84     PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
85     if (transpose) {
86       PetscCall(MatTransposeAXPY_Private(Y, a, X, str, X));
87     } else {
88       PetscCall(PetscObjectTypeCompareAny((PetscObject)Y, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
89       if (transpose) {
90         PetscCall(MatTransposeAXPY_Private(Y, a, X, str, Y));
91       } else {
92         PetscCall(MatAXPY_Basic(Y, a, X, str));
93       }
94     }
95   }
96   PetscCall(PetscLogEventEnd(MAT_AXPY, Y, 0, 0, 0));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
101 {
102   PetscErrorCode (*preall)(Mat, Mat, Mat *) = NULL;
103 
104   PetscFunctionBegin;
105   /* look for any available faster alternative to the general preallocator */
106   PetscCall(PetscObjectQueryFunction((PetscObject)Y, "MatAXPYGetPreallocation_C", &preall));
107   if (preall) {
108     PetscCall((*preall)(Y, X, B));
109   } else { /* Use MatPrellocator, assumes same row-col distribution */
110     Mat      preallocator;
111     PetscInt r, rstart, rend;
112     PetscInt m, n, M, N;
113 
114     PetscCall(MatGetRowUpperTriangular(Y));
115     PetscCall(MatGetRowUpperTriangular(X));
116     PetscCall(MatGetSize(Y, &M, &N));
117     PetscCall(MatGetLocalSize(Y, &m, &n));
118     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &preallocator));
119     PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
120     PetscCall(MatSetLayouts(preallocator, Y->rmap, Y->cmap));
121     PetscCall(MatSetUp(preallocator));
122     PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
123     for (r = rstart; r < rend; ++r) {
124       PetscInt           ncols;
125       const PetscInt    *row;
126       const PetscScalar *vals;
127 
128       PetscCall(MatGetRow(Y, r, &ncols, &row, &vals));
129       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
130       PetscCall(MatRestoreRow(Y, r, &ncols, &row, &vals));
131       PetscCall(MatGetRow(X, r, &ncols, &row, &vals));
132       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
133       PetscCall(MatRestoreRow(X, r, &ncols, &row, &vals));
134     }
135     PetscCall(MatSetOption(preallocator, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
136     PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
137     PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
138     PetscCall(MatRestoreRowUpperTriangular(Y));
139     PetscCall(MatRestoreRowUpperTriangular(X));
140 
141     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), B));
142     PetscCall(MatSetType(*B, ((PetscObject)Y)->type_name));
143     PetscCall(MatSetLayouts(*B, Y->rmap, Y->cmap));
144     PetscCall(MatPreallocatorPreallocate(preallocator, PETSC_FALSE, *B));
145     PetscCall(MatDestroy(&preallocator));
146   }
147   PetscFunctionReturn(PETSC_SUCCESS);
148 }
149 
150 PetscErrorCode MatAXPY_Basic(Mat Y, PetscScalar a, Mat X, MatStructure str)
151 {
152   PetscBool isshell, isdense, isnest;
153 
154   PetscFunctionBegin;
155   PetscCall(MatIsShell(Y, &isshell));
156   if (isshell) { /* MatShell has special support for AXPY */
157     PetscErrorCode (*f)(Mat, PetscScalar, Mat, MatStructure);
158 
159     PetscCall(MatGetOperation(Y, MATOP_AXPY, (void (**)(void)) & f));
160     if (f) {
161       PetscCall((*f)(Y, a, X, str));
162       PetscFunctionReturn(PETSC_SUCCESS);
163     }
164   }
165   /* no need to preallocate if Y is dense */
166   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)Y, &isdense, MATSEQDENSE, MATMPIDENSE, ""));
167   if (isdense) {
168     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATNEST, &isnest));
169     if (isnest) {
170       PetscCall(MatAXPY_Dense_Nest(Y, a, X));
171       PetscFunctionReturn(PETSC_SUCCESS);
172     }
173     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
174   }
175   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
176     PetscInt           i, start, end, j, ncols, m, n;
177     const PetscInt    *row;
178     PetscScalar       *val;
179     const PetscScalar *vals;
180 
181     PetscCall(MatGetSize(X, &m, &n));
182     PetscCall(MatGetOwnershipRange(X, &start, &end));
183     PetscCall(MatGetRowUpperTriangular(X));
184     if (a == 1.0) {
185       for (i = start; i < end; i++) {
186         PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
187         PetscCall(MatSetValues(Y, 1, &i, ncols, row, vals, ADD_VALUES));
188         PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
189       }
190     } else {
191       PetscInt vs = 100;
192       /* realloc if needed, as this function may be used in parallel */
193       PetscCall(PetscMalloc1(vs, &val));
194       for (i = start; i < end; i++) {
195         PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
196         if (vs < ncols) {
197           vs = PetscMin(2 * ncols, n);
198           PetscCall(PetscRealloc(vs * sizeof(*val), &val));
199         }
200         for (j = 0; j < ncols; j++) val[j] = a * vals[j];
201         PetscCall(MatSetValues(Y, 1, &i, ncols, row, val, ADD_VALUES));
202         PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
203       }
204       PetscCall(PetscFree(val));
205     }
206     PetscCall(MatRestoreRowUpperTriangular(X));
207     PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
208     PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
209   } else {
210     Mat B;
211 
212     PetscCall(MatAXPY_Basic_Preallocate(Y, X, &B));
213     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
214     PetscCall(MatHeaderMerge(Y, &B));
215   }
216   PetscFunctionReturn(PETSC_SUCCESS);
217 }
218 
219 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B, Mat Y, PetscScalar a, Mat X, MatStructure str)
220 {
221   PetscInt           i, start, end, j, ncols, m, n;
222   const PetscInt    *row;
223   PetscScalar       *val;
224   const PetscScalar *vals;
225 
226   PetscFunctionBegin;
227   PetscCall(MatGetSize(X, &m, &n));
228   PetscCall(MatGetOwnershipRange(X, &start, &end));
229   PetscCall(MatGetRowUpperTriangular(Y));
230   PetscCall(MatGetRowUpperTriangular(X));
231   if (a == 1.0) {
232     for (i = start; i < end; i++) {
233       PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
234       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
235       PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
236 
237       PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
238       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
239       PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
240     }
241   } else {
242     PetscInt vs = 100;
243     /* realloc if needed, as this function may be used in parallel */
244     PetscCall(PetscMalloc1(vs, &val));
245     for (i = start; i < end; i++) {
246       PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
247       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
248       PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
249 
250       PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
251       if (vs < ncols) {
252         vs = PetscMin(2 * ncols, n);
253         PetscCall(PetscRealloc(vs * sizeof(*val), &val));
254       }
255       for (j = 0; j < ncols; j++) val[j] = a * vals[j];
256       PetscCall(MatSetValues(B, 1, &i, ncols, row, val, ADD_VALUES));
257       PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
258     }
259     PetscCall(PetscFree(val));
260   }
261   PetscCall(MatRestoreRowUpperTriangular(Y));
262   PetscCall(MatRestoreRowUpperTriangular(X));
263   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
264   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
265   PetscFunctionReturn(PETSC_SUCCESS);
266 }
267 
268 /*@
269    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
270 
271    Neighbor-wise Collective
272 
273    Input Parameters:
274 +  Y - the matrices
275 -  a - the PetscScalar
276 
277    Level: intermediate
278 
279    Notes:
280     If Y is a rectangular matrix, the shift is done on the main diagonal Y_{ii} of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)
281 
282     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
283    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
284    entry). No operation is performed when a is zero.
285 
286    To form Y = Y + diag(V) use MatDiagonalSet()
287 
288 .seealso: `MatDiagonalSet()`, `MatScale()`, `MatDiagonalScale()`
289  @*/
290 PetscErrorCode MatShift(Mat Y, PetscScalar a)
291 {
292   PetscFunctionBegin;
293   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
294   PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix");
295   PetscCheck(!Y->factortype, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
296   MatCheckPreallocated(Y, 1);
297   if (a == 0.0) PetscFunctionReturn(PETSC_SUCCESS);
298 
299   if (Y->ops->shift) PetscUseTypeMethod(Y, shift, a);
300   else PetscCall(MatShift_Basic(Y, a));
301 
302   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
303   PetscFunctionReturn(PETSC_SUCCESS);
304 }
305 
306 PetscErrorCode MatDiagonalSet_Default(Mat Y, Vec D, InsertMode is)
307 {
308   PetscInt           i, start, end;
309   const PetscScalar *v;
310 
311   PetscFunctionBegin;
312   PetscCall(MatGetOwnershipRange(Y, &start, &end));
313   PetscCall(VecGetArrayRead(D, &v));
314   for (i = start; i < end; i++) PetscCall(MatSetValues(Y, 1, &i, 1, &i, v + i - start, is));
315   PetscCall(VecRestoreArrayRead(D, &v));
316   PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
317   PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
318   PetscFunctionReturn(PETSC_SUCCESS);
319 }
320 
321 /*@
322    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
323    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
324    INSERT_VALUES.
325 
326    Neighbor-wise Collective
327 
328    Input Parameters:
329 +  Y - the input matrix
330 .  D - the diagonal matrix, represented as a vector
331 -  i - INSERT_VALUES or ADD_VALUES
332 
333    Note:
334     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
335    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
336    entry).
337 
338    Level: intermediate
339 
340 .seealso: `MatShift()`, `MatScale()`, `MatDiagonalScale()`
341 @*/
342 PetscErrorCode MatDiagonalSet(Mat Y, Vec D, InsertMode is)
343 {
344   PetscInt matlocal, veclocal;
345 
346   PetscFunctionBegin;
347   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
348   PetscValidHeaderSpecific(D, VEC_CLASSID, 2);
349   PetscCall(MatGetLocalSize(Y, &matlocal, NULL));
350   PetscCall(VecGetLocalSize(D, &veclocal));
351   PetscCheck(matlocal == veclocal, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number local rows of matrix %" PetscInt_FMT " does not match that of vector for diagonal %" PetscInt_FMT, matlocal, veclocal);
352   if (Y->ops->diagonalset) PetscUseTypeMethod(Y, diagonalset, D, is);
353   else PetscCall(MatDiagonalSet_Default(Y, D, is));
354   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 /*@
359    MatAYPX - Computes Y = a*Y + X.
360 
361    Logically on Mat
362 
363    Input Parameters:
364 +  a - the PetscScalar multiplier
365 .  Y - the first matrix
366 .  X - the second matrix
367 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
368 
369    Level: intermediate
370 
371 .seealso: `MatAXPY()`
372  @*/
373 PetscErrorCode MatAYPX(Mat Y, PetscScalar a, Mat X, MatStructure str)
374 {
375   PetscFunctionBegin;
376   PetscCall(MatScale(Y, a));
377   PetscCall(MatAXPY(Y, 1.0, X, str));
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
381 /*@
382     MatComputeOperator - Computes the explicit matrix
383 
384     Collective
385 
386     Input Parameters:
387 +   inmat - the matrix
388 -   mattype - the matrix type for the explicit operator
389 
390     Output Parameter:
391 .   mat - the explicit  operator
392 
393     Note:
394     This computation is done by applying the operators to columns of the identity matrix.
395     This routine is costly in general, and is recommended for use only with relatively small systems.
396     Currently, this routine uses a dense matrix format if mattype == NULL.
397 
398     Level: advanced
399 
400 @*/
401 PetscErrorCode MatComputeOperator(Mat inmat, MatType mattype, Mat *mat)
402 {
403   PetscFunctionBegin;
404   PetscValidHeaderSpecific(inmat, MAT_CLASSID, 1);
405   PetscValidPointer(mat, 3);
406   PetscCall(MatConvert_Shell(inmat, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
407   PetscFunctionReturn(PETSC_SUCCESS);
408 }
409 
410 /*@
411     MatComputeOperatorTranspose - Computes the explicit matrix representation of
412         a give matrix that can apply MatMultTranspose()
413 
414     Collective
415 
416     Input Parameters:
417 +   inmat - the matrix
418 -   mattype - the matrix type for the explicit operator
419 
420     Output Parameter:
421 .   mat - the explicit  operator transposed
422 
423     Note:
424     This computation is done by applying the transpose of the operator to columns of the identity matrix.
425     This routine is costly in general, and is recommended for use only with relatively small systems.
426     Currently, this routine uses a dense matrix format if mattype == NULL.
427 
428     Level: advanced
429 
430 @*/
431 PetscErrorCode MatComputeOperatorTranspose(Mat inmat, MatType mattype, Mat *mat)
432 {
433   Mat A;
434 
435   PetscFunctionBegin;
436   PetscValidHeaderSpecific(inmat, MAT_CLASSID, 1);
437   PetscValidPointer(mat, 3);
438   PetscCall(MatCreateTranspose(inmat, &A));
439   PetscCall(MatConvert_Shell(A, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
440   PetscCall(MatDestroy(&A));
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 
444 /*@
445   MatChop - Set all values in the matrix less than the tolerance to zero
446 
447   Input Parameters:
448 + A   - The matrix
449 - tol - The zero tolerance
450 
451   Output Parameters:
452 . A - The chopped matrix
453 
454   Level: intermediate
455 
456 .seealso: `MatCreate()`, `MatZeroEntries()`
457  @*/
458 PetscErrorCode MatChop(Mat A, PetscReal tol)
459 {
460   Mat          a;
461   PetscScalar *newVals;
462   PetscInt    *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
463   PetscBool    flg;
464 
465   PetscFunctionBegin;
466   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, ""));
467   if (flg) {
468     PetscCall(MatDenseGetLocalMatrix(A, &a));
469     PetscCall(MatDenseGetLDA(a, &r));
470     PetscCall(MatGetSize(a, &rStart, &rEnd));
471     PetscCall(MatDenseGetArray(a, &newVals));
472     for (; colMax < rEnd; ++colMax) {
473       for (maxRows = 0; maxRows < rStart; ++maxRows) newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
474     }
475     PetscCall(MatDenseRestoreArray(a, &newVals));
476   } else {
477     PetscCall(MatGetOwnershipRange(A, &rStart, &rEnd));
478     PetscCall(MatGetRowUpperTriangular(A));
479     for (r = rStart; r < rEnd; ++r) {
480       PetscInt ncols;
481 
482       PetscCall(MatGetRow(A, r, &ncols, NULL, NULL));
483       colMax = PetscMax(colMax, ncols);
484       PetscCall(MatRestoreRow(A, r, &ncols, NULL, NULL));
485     }
486     numRows = rEnd - rStart;
487     PetscCall(MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A)));
488     PetscCall(PetscMalloc2(colMax, &newCols, colMax, &newVals));
489     PetscCall(MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg)); /* cache user-defined value */
490     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
491     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
492     /* that are potentially called many times depending on the distribution of A */
493     for (r = rStart; r < rStart + maxRows; ++r) {
494       const PetscScalar *vals;
495       const PetscInt    *cols;
496       PetscInt           ncols, newcols, c;
497 
498       if (r < rEnd) {
499         PetscCall(MatGetRow(A, r, &ncols, &cols, &vals));
500         for (c = 0; c < ncols; ++c) {
501           newCols[c] = cols[c];
502           newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
503         }
504         newcols = ncols;
505         PetscCall(MatRestoreRow(A, r, &ncols, &cols, &vals));
506         PetscCall(MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES));
507       }
508       PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
509       PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
510     }
511     PetscCall(MatRestoreRowUpperTriangular(A));
512     PetscCall(PetscFree2(newCols, newVals));
513     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg)); /* reset option to its user-defined value */
514   }
515   PetscFunctionReturn(PETSC_SUCCESS);
516 }
517