xref: /petsc/src/mat/utils/axpy.c (revision dbbe0bcd3f3a8fbab5a45420dc06f8387e5764c6)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5 {
6   Mat            A,F;
7   PetscErrorCode (*f)(Mat,Mat*);
8 
9   PetscFunctionBegin;
10   PetscCall(PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f));
11   if (f) {
12     PetscCall(MatTransposeGetMat(T,&A));
13     if (T == X) {
14       PetscCall(PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
15       PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&F));
16       A = Y;
17     } else {
18       PetscCall(PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
19       PetscCall(MatTranspose(X,MAT_INITIAL_MATRIX,&F));
20     }
21   } else {
22     PetscCall(MatHermitianTransposeGetMat(T,&A));
23     if (T == X) {
24       PetscCall(PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
25       PetscCall(MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F));
26       A = Y;
27     } else {
28       PetscCall(PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
29       PetscCall(MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F));
30     }
31   }
32   PetscCall(MatAXPY(A,a,F,str));
33   PetscCall(MatDestroy(&F));
34   PetscFunctionReturn(0);
35 }
36 
37 /*@
38    MatAXPY - Computes Y = a*X + Y.
39 
40    Logically  Collective on Mat
41 
42    Input Parameters:
43 +  a - the scalar multiplier
44 .  X - the first matrix
45 .  Y - the second matrix
46 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
47 
48    Notes: No operation is performed when a is zero.
49 
50    Level: intermediate
51 
52 .seealso: `MatAYPX()`
53  @*/
54 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
55 {
56   PetscInt       M1,M2,N1,N2;
57   PetscInt       m1,m2,n1,n2;
58   MatType        t1,t2;
59   PetscBool      sametype,transpose;
60 
61   PetscFunctionBegin;
62   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
63   PetscValidLogicalCollectiveScalar(Y,a,2);
64   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
65   PetscCheckSameComm(Y,1,X,3);
66   PetscCall(MatGetSize(X,&M1,&N1));
67   PetscCall(MatGetSize(Y,&M2,&N2));
68   PetscCall(MatGetLocalSize(X,&m1,&n1));
69   PetscCall(MatGetLocalSize(Y,&m2,&n2));
70   PetscCheck(M1 == M2 && N1 == N2,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT,M1,N1,M2,N2);
71   PetscCheck(m1 == m2 && n1 == n2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT,m1,n1,m2,n2);
72   PetscCheck(Y->assembled,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)");
73   PetscCheck(X->assembled,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)");
74   if (a == (PetscScalar)0.0) PetscFunctionReturn(0);
75   if (Y == X) {
76     PetscCall(MatScale(Y,1.0 + a));
77     PetscFunctionReturn(0);
78   }
79   PetscCall(MatGetType(X,&t1));
80   PetscCall(MatGetType(Y,&t2));
81   PetscCall(PetscStrcmp(t1,t2,&sametype));
82   PetscCall(PetscLogEventBegin(MAT_AXPY,Y,0,0,0));
83   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
84     PetscUseTypeMethod(Y,axpy,a,X,str);
85   } else {
86     PetscCall(PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose));
87     if (transpose) {
88       PetscCall(MatTransposeAXPY_Private(Y,a,X,str,X));
89     } else {
90       PetscCall(PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose));
91       if (transpose) {
92         PetscCall(MatTransposeAXPY_Private(Y,a,X,str,Y));
93       } else {
94         PetscCall(MatAXPY_Basic(Y,a,X,str));
95       }
96     }
97   }
98   PetscCall(PetscLogEventEnd(MAT_AXPY,Y,0,0,0));
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
103 {
104   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
105 
106   PetscFunctionBegin;
107   /* look for any available faster alternative to the general preallocator */
108   PetscCall(PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall));
109   if (preall) {
110     PetscCall((*preall)(Y,X,B));
111   } else { /* Use MatPrellocator, assumes same row-col distribution */
112     Mat      preallocator;
113     PetscInt r,rstart,rend;
114     PetscInt m,n,M,N;
115 
116     PetscCall(MatGetRowUpperTriangular(Y));
117     PetscCall(MatGetRowUpperTriangular(X));
118     PetscCall(MatGetSize(Y,&M,&N));
119     PetscCall(MatGetLocalSize(Y,&m,&n));
120     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&preallocator));
121     PetscCall(MatSetType(preallocator,MATPREALLOCATOR));
122     PetscCall(MatSetLayouts(preallocator,Y->rmap,Y->cmap));
123     PetscCall(MatSetUp(preallocator));
124     PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend));
125     for (r = rstart; r < rend; ++r) {
126       PetscInt          ncols;
127       const PetscInt    *row;
128       const PetscScalar *vals;
129 
130       PetscCall(MatGetRow(Y,r,&ncols,&row,&vals));
131       PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
132       PetscCall(MatRestoreRow(Y,r,&ncols,&row,&vals));
133       PetscCall(MatGetRow(X,r,&ncols,&row,&vals));
134       PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
135       PetscCall(MatRestoreRow(X,r,&ncols,&row,&vals));
136     }
137     PetscCall(MatSetOption(preallocator,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
138     PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY));
139     PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY));
140     PetscCall(MatRestoreRowUpperTriangular(Y));
141     PetscCall(MatRestoreRowUpperTriangular(X));
142 
143     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),B));
144     PetscCall(MatSetType(*B,((PetscObject)Y)->type_name));
145     PetscCall(MatSetLayouts(*B,Y->rmap,Y->cmap));
146     PetscCall(MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B));
147     PetscCall(MatDestroy(&preallocator));
148   }
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
153 {
154   PetscBool      isshell,isdense,isnest;
155 
156   PetscFunctionBegin;
157   PetscCall(MatIsShell(Y,&isshell));
158   if (isshell) { /* MatShell has special support for AXPY */
159     PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);
160 
161     PetscCall(MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f));
162     if (f) {
163       PetscCall((*f)(Y,a,X,str));
164       PetscFunctionReturn(0);
165     }
166   }
167   /* no need to preallocate if Y is dense */
168   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,""));
169   if (isdense) {
170     PetscCall(PetscObjectTypeCompare((PetscObject)X,MATNEST,&isnest));
171     if (isnest) {
172       PetscCall(MatAXPY_Dense_Nest(Y,a,X));
173       PetscFunctionReturn(0);
174     }
175     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
176   }
177   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
178     PetscInt          i,start,end,j,ncols,m,n;
179     const PetscInt    *row;
180     PetscScalar       *val;
181     const PetscScalar *vals;
182 
183     PetscCall(MatGetSize(X,&m,&n));
184     PetscCall(MatGetOwnershipRange(X,&start,&end));
185     PetscCall(MatGetRowUpperTriangular(X));
186     if (a == 1.0) {
187       for (i = start; i < end; i++) {
188         PetscCall(MatGetRow(X,i,&ncols,&row,&vals));
189         PetscCall(MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES));
190         PetscCall(MatRestoreRow(X,i,&ncols,&row,&vals));
191       }
192     } else {
193       PetscInt vs = 100;
194       /* realloc if needed, as this function may be used in parallel */
195       PetscCall(PetscMalloc1(vs,&val));
196       for (i=start; i<end; i++) {
197         PetscCall(MatGetRow(X,i,&ncols,&row,&vals));
198         if (vs < ncols) {
199           vs   = PetscMin(2*ncols,n);
200           PetscCall(PetscRealloc(vs*sizeof(*val),&val));
201         }
202         for (j=0; j<ncols; j++) val[j] = a*vals[j];
203         PetscCall(MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES));
204         PetscCall(MatRestoreRow(X,i,&ncols,&row,&vals));
205       }
206       PetscCall(PetscFree(val));
207     }
208     PetscCall(MatRestoreRowUpperTriangular(X));
209     PetscCall(MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY));
210     PetscCall(MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY));
211   } else {
212     Mat B;
213 
214     PetscCall(MatAXPY_Basic_Preallocate(Y,X,&B));
215     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
216     PetscCall(MatHeaderMerge(Y,&B));
217   }
218   PetscFunctionReturn(0);
219 }
220 
221 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
222 {
223   PetscInt          i,start,end,j,ncols,m,n;
224   const PetscInt    *row;
225   PetscScalar       *val;
226   const PetscScalar *vals;
227 
228   PetscFunctionBegin;
229   PetscCall(MatGetSize(X,&m,&n));
230   PetscCall(MatGetOwnershipRange(X,&start,&end));
231   PetscCall(MatGetRowUpperTriangular(Y));
232   PetscCall(MatGetRowUpperTriangular(X));
233   if (a == 1.0) {
234     for (i = start; i < end; i++) {
235       PetscCall(MatGetRow(Y,i,&ncols,&row,&vals));
236       PetscCall(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES));
237       PetscCall(MatRestoreRow(Y,i,&ncols,&row,&vals));
238 
239       PetscCall(MatGetRow(X,i,&ncols,&row,&vals));
240       PetscCall(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES));
241       PetscCall(MatRestoreRow(X,i,&ncols,&row,&vals));
242     }
243   } else {
244     PetscInt vs = 100;
245     /* realloc if needed, as this function may be used in parallel */
246     PetscCall(PetscMalloc1(vs,&val));
247     for (i=start; i<end; i++) {
248       PetscCall(MatGetRow(Y,i,&ncols,&row,&vals));
249       PetscCall(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES));
250       PetscCall(MatRestoreRow(Y,i,&ncols,&row,&vals));
251 
252       PetscCall(MatGetRow(X,i,&ncols,&row,&vals));
253       if (vs < ncols) {
254         vs   = PetscMin(2*ncols,n);
255         PetscCall(PetscRealloc(vs*sizeof(*val),&val));
256       }
257       for (j=0; j<ncols; j++) val[j] = a*vals[j];
258       PetscCall(MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES));
259       PetscCall(MatRestoreRow(X,i,&ncols,&row,&vals));
260     }
261     PetscCall(PetscFree(val));
262   }
263   PetscCall(MatRestoreRowUpperTriangular(Y));
264   PetscCall(MatRestoreRowUpperTriangular(X));
265   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
266   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
267   PetscFunctionReturn(0);
268 }
269 
270 /*@
271    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
272 
273    Neighbor-wise Collective on Mat
274 
275    Input Parameters:
276 +  Y - the matrices
277 -  a - the PetscScalar
278 
279    Level: intermediate
280 
281    Notes:
282     If Y is a rectangular matrix, the shift is done on the main diagonal Y_{ii} of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)
283 
284     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
285    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
286    entry). No operation is performed when a is zero.
287 
288    To form Y = Y + diag(V) use MatDiagonalSet()
289 
290 .seealso: `MatDiagonalSet()`, `MatScale()`, `MatDiagonalScale()`
291  @*/
292 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
293 {
294   PetscFunctionBegin;
295   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
296   PetscCheck(Y->assembled,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
297   PetscCheck(!Y->factortype,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
298   MatCheckPreallocated(Y,1);
299   if (a == 0.0) PetscFunctionReturn(0);
300 
301   if (Y->ops->shift) PetscUseTypeMethod(Y,shift,a);
302   else PetscCall(MatShift_Basic(Y,a));
303 
304   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
305   PetscFunctionReturn(0);
306 }
307 
308 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
309 {
310   PetscInt          i,start,end;
311   const PetscScalar *v;
312 
313   PetscFunctionBegin;
314   PetscCall(MatGetOwnershipRange(Y,&start,&end));
315   PetscCall(VecGetArrayRead(D,&v));
316   for (i=start; i<end; i++) {
317     PetscCall(MatSetValues(Y,1,&i,1,&i,v+i-start,is));
318   }
319   PetscCall(VecRestoreArrayRead(D,&v));
320   PetscCall(MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY));
321   PetscCall(MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY));
322   PetscFunctionReturn(0);
323 }
324 
325 /*@
326    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
327    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
328    INSERT_VALUES.
329 
330    Neighbor-wise Collective on Mat
331 
332    Input Parameters:
333 +  Y - the input matrix
334 .  D - the diagonal matrix, represented as a vector
335 -  i - INSERT_VALUES or ADD_VALUES
336 
337    Notes:
338     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
339    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
340    entry).
341 
342    Level: intermediate
343 
344 .seealso: `MatShift()`, `MatScale()`, `MatDiagonalScale()`
345 @*/
346 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
347 {
348   PetscInt       matlocal,veclocal;
349 
350   PetscFunctionBegin;
351   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
352   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
353   PetscCall(MatGetLocalSize(Y,&matlocal,NULL));
354   PetscCall(VecGetLocalSize(D,&veclocal));
355   PetscCheck(matlocal == veclocal,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %" PetscInt_FMT " does not match that of vector for diagonal %" PetscInt_FMT,matlocal,veclocal);
356   if (Y->ops->diagonalset) PetscUseTypeMethod(Y,diagonalset,D,is);
357   else PetscCall(MatDiagonalSet_Default(Y,D,is));
358   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
359   PetscFunctionReturn(0);
360 }
361 
362 /*@
363    MatAYPX - Computes Y = a*Y + X.
364 
365    Logically on Mat
366 
367    Input Parameters:
368 +  a - the PetscScalar multiplier
369 .  Y - the first matrix
370 .  X - the second matrix
371 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
372 
373    Level: intermediate
374 
375 .seealso: `MatAXPY()`
376  @*/
377 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
378 {
379   PetscFunctionBegin;
380   PetscCall(MatScale(Y,a));
381   PetscCall(MatAXPY(Y,1.0,X,str));
382   PetscFunctionReturn(0);
383 }
384 
385 /*@
386     MatComputeOperator - Computes the explicit matrix
387 
388     Collective on Mat
389 
390     Input Parameters:
391 +   inmat - the matrix
392 -   mattype - the matrix type for the explicit operator
393 
394     Output Parameter:
395 .   mat - the explicit  operator
396 
397     Notes:
398     This computation is done by applying the operators to columns of the identity matrix.
399     This routine is costly in general, and is recommended for use only with relatively small systems.
400     Currently, this routine uses a dense matrix format if mattype == NULL.
401 
402     Level: advanced
403 
404 @*/
405 PetscErrorCode  MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
406 {
407   PetscFunctionBegin;
408   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
409   PetscValidPointer(mat,3);
410   PetscCall(MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat));
411   PetscFunctionReturn(0);
412 }
413 
414 /*@
415     MatComputeOperatorTranspose - Computes the explicit matrix representation of
416         a give matrix that can apply MatMultTranspose()
417 
418     Collective on Mat
419 
420     Input Parameters:
421 +   inmat - the matrix
422 -   mattype - the matrix type for the explicit operator
423 
424     Output Parameter:
425 .   mat - the explicit  operator transposed
426 
427     Notes:
428     This computation is done by applying the transpose of the operator to columns of the identity matrix.
429     This routine is costly in general, and is recommended for use only with relatively small systems.
430     Currently, this routine uses a dense matrix format if mattype == NULL.
431 
432     Level: advanced
433 
434 @*/
435 PetscErrorCode  MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
436 {
437   Mat            A;
438 
439   PetscFunctionBegin;
440   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
441   PetscValidPointer(mat,3);
442   PetscCall(MatCreateTranspose(inmat,&A));
443   PetscCall(MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat));
444   PetscCall(MatDestroy(&A));
445   PetscFunctionReturn(0);
446 }
447 
448 /*@
449   MatChop - Set all values in the matrix less than the tolerance to zero
450 
451   Input Parameters:
452 + A   - The matrix
453 - tol - The zero tolerance
454 
455   Output Parameters:
456 . A - The chopped matrix
457 
458   Level: intermediate
459 
460 .seealso: `MatCreate()`, `MatZeroEntries()`
461  @*/
462 PetscErrorCode MatChop(Mat A, PetscReal tol)
463 {
464   Mat            a;
465   PetscScalar    *newVals;
466   PetscInt       *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
467   PetscBool      flg;
468 
469   PetscFunctionBegin;
470   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, ""));
471   if (flg) {
472     PetscCall(MatDenseGetLocalMatrix(A, &a));
473     PetscCall(MatDenseGetLDA(a, &r));
474     PetscCall(MatGetSize(a, &rStart, &rEnd));
475     PetscCall(MatDenseGetArray(a, &newVals));
476     for (; colMax < rEnd; ++colMax) {
477       for (maxRows = 0; maxRows < rStart; ++maxRows) {
478         newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
479       }
480     }
481     PetscCall(MatDenseRestoreArray(a, &newVals));
482   } else {
483     PetscCall(MatGetOwnershipRange(A, &rStart, &rEnd));
484     PetscCall(MatGetRowUpperTriangular(A));
485     for (r = rStart; r < rEnd; ++r) {
486       PetscInt ncols;
487 
488       PetscCall(MatGetRow(A, r, &ncols, NULL, NULL));
489       colMax = PetscMax(colMax, ncols);
490       PetscCall(MatRestoreRow(A, r, &ncols, NULL, NULL));
491     }
492     numRows = rEnd - rStart;
493     PetscCall(MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A)));
494     PetscCall(PetscMalloc2(colMax, &newCols, colMax, &newVals));
495     PetscCall(MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg)); /* cache user-defined value */
496     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
497     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
498     /* that are potentially called many times depending on the distribution of A */
499     for (r = rStart; r < rStart+maxRows; ++r) {
500       const PetscScalar *vals;
501       const PetscInt    *cols;
502       PetscInt           ncols, newcols, c;
503 
504       if (r < rEnd) {
505         PetscCall(MatGetRow(A, r, &ncols, &cols, &vals));
506         for (c = 0; c < ncols; ++c) {
507           newCols[c] = cols[c];
508           newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
509         }
510         newcols = ncols;
511         PetscCall(MatRestoreRow(A, r, &ncols, &cols, &vals));
512         PetscCall(MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES));
513       }
514       PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
515       PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
516     }
517     PetscCall(MatRestoreRowUpperTriangular(A));
518     PetscCall(PetscFree2(newCols, newVals));
519     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg)); /* reset option to its user-defined value */
520   }
521   PetscFunctionReturn(0);
522 }
523