xref: /petsc/src/mat/utils/axpy.c (revision f97672e55eacc8688507b9471cd7ec2664d7f203)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5 {
6   Mat            A,F;
7   PetscErrorCode (*f)(Mat,Mat*);
8 
9   PetscFunctionBegin;
10   PetscCall(PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f));
11   if (f) {
12     PetscCall(MatTransposeGetMat(T,&A));
13     if (T == X) {
14       PetscCall(PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
15       PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&F));
16       A = Y;
17     } else {
18       PetscCall(PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
19       PetscCall(MatTranspose(X,MAT_INITIAL_MATRIX,&F));
20     }
21   } else {
22     PetscCall(MatHermitianTransposeGetMat(T,&A));
23     if (T == X) {
24       PetscCall(PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
25       PetscCall(MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F));
26       A = Y;
27     } else {
28       PetscCall(PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
29       PetscCall(MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F));
30     }
31   }
32   PetscCall(MatAXPY(A,a,F,str));
33   PetscCall(MatDestroy(&F));
34   PetscFunctionReturn(0);
35 }
36 
37 /*@
38    MatAXPY - Computes Y = a*X + Y.
39 
40    Logically  Collective on Mat
41 
42    Input Parameters:
43 +  a - the scalar multiplier
44 .  X - the first matrix
45 .  Y - the second matrix
46 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
47 
48    Notes: No operation is performed when a is zero.
49 
50    Level: intermediate
51 
52 .seealso: `MatAYPX()`
53  @*/
54 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
55 {
56   PetscInt       M1,M2,N1,N2;
57   PetscInt       m1,m2,n1,n2;
58   MatType        t1,t2;
59   PetscBool      sametype,transpose;
60 
61   PetscFunctionBegin;
62   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
63   PetscValidLogicalCollectiveScalar(Y,a,2);
64   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
65   PetscCheckSameComm(Y,1,X,3);
66   PetscCall(MatGetSize(X,&M1,&N1));
67   PetscCall(MatGetSize(Y,&M2,&N2));
68   PetscCall(MatGetLocalSize(X,&m1,&n1));
69   PetscCall(MatGetLocalSize(Y,&m2,&n2));
70   PetscCheck(M1 == M2 && N1 == N2,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT,M1,N1,M2,N2);
71   PetscCheck(m1 == m2 && n1 == n2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT,m1,n1,m2,n2);
72   PetscCheck(Y->assembled,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)");
73   PetscCheck(X->assembled,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)");
74   if (a == (PetscScalar)0.0) PetscFunctionReturn(0);
75   if (Y == X) {
76     PetscCall(MatScale(Y,1.0 + a));
77     PetscFunctionReturn(0);
78   }
79   PetscCall(MatGetType(X,&t1));
80   PetscCall(MatGetType(Y,&t2));
81   PetscCall(PetscStrcmp(t1,t2,&sametype));
82   PetscCall(PetscLogEventBegin(MAT_AXPY,Y,0,0,0));
83   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
84     PetscCall((*Y->ops->axpy)(Y,a,X,str));
85   } else {
86     PetscCall(PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose));
87     if (transpose) {
88       PetscCall(MatTransposeAXPY_Private(Y,a,X,str,X));
89     } else {
90       PetscCall(PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose));
91       if (transpose) {
92         PetscCall(MatTransposeAXPY_Private(Y,a,X,str,Y));
93       } else {
94         PetscCall(MatAXPY_Basic(Y,a,X,str));
95       }
96     }
97   }
98   PetscCall(PetscLogEventEnd(MAT_AXPY,Y,0,0,0));
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
103 {
104   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
105 
106   PetscFunctionBegin;
107   /* look for any available faster alternative to the general preallocator */
108   PetscCall(PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall));
109   if (preall) {
110     PetscCall((*preall)(Y,X,B));
111   } else { /* Use MatPrellocator, assumes same row-col distribution */
112     Mat      preallocator;
113     PetscInt r,rstart,rend;
114     PetscInt m,n,M,N;
115 
116     PetscCall(MatGetRowUpperTriangular(Y));
117     PetscCall(MatGetRowUpperTriangular(X));
118     PetscCall(MatGetSize(Y,&M,&N));
119     PetscCall(MatGetLocalSize(Y,&m,&n));
120     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&preallocator));
121     PetscCall(MatSetType(preallocator,MATPREALLOCATOR));
122     PetscCall(MatSetLayouts(preallocator,Y->rmap,Y->cmap));
123     PetscCall(MatSetUp(preallocator));
124     PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend));
125     for (r = rstart; r < rend; ++r) {
126       PetscInt          ncols;
127       const PetscInt    *row;
128       const PetscScalar *vals;
129 
130       PetscCall(MatGetRow(Y,r,&ncols,&row,&vals));
131       PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
132       PetscCall(MatRestoreRow(Y,r,&ncols,&row,&vals));
133       PetscCall(MatGetRow(X,r,&ncols,&row,&vals));
134       PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
135       PetscCall(MatRestoreRow(X,r,&ncols,&row,&vals));
136     }
137     PetscCall(MatSetOption(preallocator,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
138     PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY));
139     PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY));
140     PetscCall(MatRestoreRowUpperTriangular(Y));
141     PetscCall(MatRestoreRowUpperTriangular(X));
142 
143     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),B));
144     PetscCall(MatSetType(*B,((PetscObject)Y)->type_name));
145     PetscCall(MatSetLayouts(*B,Y->rmap,Y->cmap));
146     PetscCall(MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B));
147     PetscCall(MatDestroy(&preallocator));
148   }
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
153 {
154   PetscBool      isshell,isdense,isnest;
155 
156   PetscFunctionBegin;
157   PetscCall(MatIsShell(Y,&isshell));
158   if (isshell) { /* MatShell has special support for AXPY */
159     PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);
160 
161     PetscCall(MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f));
162     if (f) {
163       PetscCall((*f)(Y,a,X,str));
164       PetscFunctionReturn(0);
165     }
166   }
167   /* no need to preallocate if Y is dense */
168   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,""));
169   if (isdense) {
170     PetscCall(PetscObjectTypeCompare((PetscObject)X,MATNEST,&isnest));
171     if (isnest) {
172       PetscCall(MatAXPY_Dense_Nest(Y,a,X));
173       PetscFunctionReturn(0);
174     }
175     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
176   }
177   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
178     PetscInt          i,start,end,j,ncols,m,n;
179     const PetscInt    *row;
180     PetscScalar       *val;
181     const PetscScalar *vals;
182 
183     PetscCall(MatGetSize(X,&m,&n));
184     PetscCall(MatGetOwnershipRange(X,&start,&end));
185     PetscCall(MatGetRowUpperTriangular(X));
186     if (a == 1.0) {
187       for (i = start; i < end; i++) {
188         PetscCall(MatGetRow(X,i,&ncols,&row,&vals));
189         PetscCall(MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES));
190         PetscCall(MatRestoreRow(X,i,&ncols,&row,&vals));
191       }
192     } else {
193       PetscInt vs = 100;
194       /* realloc if needed, as this function may be used in parallel */
195       PetscCall(PetscMalloc1(vs,&val));
196       for (i=start; i<end; i++) {
197         PetscCall(MatGetRow(X,i,&ncols,&row,&vals));
198         if (vs < ncols) {
199           vs   = PetscMin(2*ncols,n);
200           PetscCall(PetscRealloc(vs*sizeof(*val),&val));
201         }
202         for (j=0; j<ncols; j++) val[j] = a*vals[j];
203         PetscCall(MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES));
204         PetscCall(MatRestoreRow(X,i,&ncols,&row,&vals));
205       }
206       PetscCall(PetscFree(val));
207     }
208     PetscCall(MatRestoreRowUpperTriangular(X));
209     PetscCall(MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY));
210     PetscCall(MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY));
211   } else {
212     Mat B;
213 
214     PetscCall(MatAXPY_Basic_Preallocate(Y,X,&B));
215     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
216     PetscCall(MatHeaderMerge(Y,&B));
217   }
218   PetscFunctionReturn(0);
219 }
220 
221 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
222 {
223   PetscInt          i,start,end,j,ncols,m,n;
224   const PetscInt    *row;
225   PetscScalar       *val;
226   const PetscScalar *vals;
227 
228   PetscFunctionBegin;
229   PetscCall(MatGetSize(X,&m,&n));
230   PetscCall(MatGetOwnershipRange(X,&start,&end));
231   PetscCall(MatGetRowUpperTriangular(Y));
232   PetscCall(MatGetRowUpperTriangular(X));
233   if (a == 1.0) {
234     for (i = start; i < end; i++) {
235       PetscCall(MatGetRow(Y,i,&ncols,&row,&vals));
236       PetscCall(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES));
237       PetscCall(MatRestoreRow(Y,i,&ncols,&row,&vals));
238 
239       PetscCall(MatGetRow(X,i,&ncols,&row,&vals));
240       PetscCall(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES));
241       PetscCall(MatRestoreRow(X,i,&ncols,&row,&vals));
242     }
243   } else {
244     PetscInt vs = 100;
245     /* realloc if needed, as this function may be used in parallel */
246     PetscCall(PetscMalloc1(vs,&val));
247     for (i=start; i<end; i++) {
248       PetscCall(MatGetRow(Y,i,&ncols,&row,&vals));
249       PetscCall(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES));
250       PetscCall(MatRestoreRow(Y,i,&ncols,&row,&vals));
251 
252       PetscCall(MatGetRow(X,i,&ncols,&row,&vals));
253       if (vs < ncols) {
254         vs   = PetscMin(2*ncols,n);
255         PetscCall(PetscRealloc(vs*sizeof(*val),&val));
256       }
257       for (j=0; j<ncols; j++) val[j] = a*vals[j];
258       PetscCall(MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES));
259       PetscCall(MatRestoreRow(X,i,&ncols,&row,&vals));
260     }
261     PetscCall(PetscFree(val));
262   }
263   PetscCall(MatRestoreRowUpperTriangular(Y));
264   PetscCall(MatRestoreRowUpperTriangular(X));
265   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
266   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
267   PetscFunctionReturn(0);
268 }
269 
270 /*@
271    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
272 
273    Neighbor-wise Collective on Mat
274 
275    Input Parameters:
276 +  Y - the matrices
277 -  a - the PetscScalar
278 
279    Level: intermediate
280 
281    Notes:
282     If Y is a rectangular matrix, the shift is done on the main diagonal Y_{ii} of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)
283 
284     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
285    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
286    entry). No operation is performed when a is zero.
287 
288    To form Y = Y + diag(V) use MatDiagonalSet()
289 
290 .seealso: `MatDiagonalSet()`, `MatScale()`, `MatDiagonalScale()`
291  @*/
292 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
293 {
294   PetscFunctionBegin;
295   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
296   PetscCheck(Y->assembled,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
297   PetscCheck(!Y->factortype,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
298   MatCheckPreallocated(Y,1);
299   if (a == 0.0) PetscFunctionReturn(0);
300 
301   if (Y->ops->shift) PetscCall((*Y->ops->shift)(Y,a));
302   else PetscCall(MatShift_Basic(Y,a));
303 
304   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
305   PetscFunctionReturn(0);
306 }
307 
308 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
309 {
310   PetscInt          i,start,end;
311   const PetscScalar *v;
312 
313   PetscFunctionBegin;
314   PetscCall(MatGetOwnershipRange(Y,&start,&end));
315   PetscCall(VecGetArrayRead(D,&v));
316   for (i=start; i<end; i++) {
317     PetscCall(MatSetValues(Y,1,&i,1,&i,v+i-start,is));
318   }
319   PetscCall(VecRestoreArrayRead(D,&v));
320   PetscCall(MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY));
321   PetscCall(MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY));
322   PetscFunctionReturn(0);
323 }
324 
325 /*@
326    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
327    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
328    INSERT_VALUES.
329 
330    Neighbor-wise Collective on Mat
331 
332    Input Parameters:
333 +  Y - the input matrix
334 .  D - the diagonal matrix, represented as a vector
335 -  i - INSERT_VALUES or ADD_VALUES
336 
337    Notes:
338     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
339    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
340    entry).
341 
342    Level: intermediate
343 
344 .seealso: `MatShift()`, `MatScale()`, `MatDiagonalScale()`
345 @*/
346 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
347 {
348   PetscInt       matlocal,veclocal;
349 
350   PetscFunctionBegin;
351   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
352   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
353   PetscCall(MatGetLocalSize(Y,&matlocal,NULL));
354   PetscCall(VecGetLocalSize(D,&veclocal));
355   PetscCheck(matlocal == veclocal,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %" PetscInt_FMT " does not match that of vector for diagonal %" PetscInt_FMT,matlocal,veclocal);
356   if (Y->ops->diagonalset) {
357     PetscCall((*Y->ops->diagonalset)(Y,D,is));
358   } else {
359     PetscCall(MatDiagonalSet_Default(Y,D,is));
360   }
361   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
362   PetscFunctionReturn(0);
363 }
364 
365 /*@
366    MatAYPX - Computes Y = a*Y + X.
367 
368    Logically on Mat
369 
370    Input Parameters:
371 +  a - the PetscScalar multiplier
372 .  Y - the first matrix
373 .  X - the second matrix
374 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
375 
376    Level: intermediate
377 
378 .seealso: `MatAXPY()`
379  @*/
380 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
381 {
382   PetscFunctionBegin;
383   PetscCall(MatScale(Y,a));
384   PetscCall(MatAXPY(Y,1.0,X,str));
385   PetscFunctionReturn(0);
386 }
387 
388 /*@
389     MatComputeOperator - Computes the explicit matrix
390 
391     Collective on Mat
392 
393     Input Parameters:
394 +   inmat - the matrix
395 -   mattype - the matrix type for the explicit operator
396 
397     Output Parameter:
398 .   mat - the explicit  operator
399 
400     Notes:
401     This computation is done by applying the operators to columns of the identity matrix.
402     This routine is costly in general, and is recommended for use only with relatively small systems.
403     Currently, this routine uses a dense matrix format if mattype == NULL.
404 
405     Level: advanced
406 
407 @*/
408 PetscErrorCode  MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
409 {
410   PetscFunctionBegin;
411   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
412   PetscValidPointer(mat,3);
413   PetscCall(MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat));
414   PetscFunctionReturn(0);
415 }
416 
417 /*@
418     MatComputeOperatorTranspose - Computes the explicit matrix representation of
419         a give matrix that can apply MatMultTranspose()
420 
421     Collective on Mat
422 
423     Input Parameters:
424 +   inmat - the matrix
425 -   mattype - the matrix type for the explicit operator
426 
427     Output Parameter:
428 .   mat - the explicit  operator transposed
429 
430     Notes:
431     This computation is done by applying the transpose of the operator to columns of the identity matrix.
432     This routine is costly in general, and is recommended for use only with relatively small systems.
433     Currently, this routine uses a dense matrix format if mattype == NULL.
434 
435     Level: advanced
436 
437 @*/
438 PetscErrorCode  MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
439 {
440   Mat            A;
441 
442   PetscFunctionBegin;
443   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
444   PetscValidPointer(mat,3);
445   PetscCall(MatCreateTranspose(inmat,&A));
446   PetscCall(MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat));
447   PetscCall(MatDestroy(&A));
448   PetscFunctionReturn(0);
449 }
450 
451 /*@
452   MatChop - Set all values in the matrix less than the tolerance to zero
453 
454   Input Parameters:
455 + A   - The matrix
456 - tol - The zero tolerance
457 
458   Output Parameters:
459 . A - The chopped matrix
460 
461   Level: intermediate
462 
463 .seealso: `MatCreate()`, `MatZeroEntries()`
464  @*/
465 PetscErrorCode MatChop(Mat A, PetscReal tol)
466 {
467   Mat            a;
468   PetscScalar    *newVals;
469   PetscInt       *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
470   PetscBool      flg;
471 
472   PetscFunctionBegin;
473   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, ""));
474   if (flg) {
475     PetscCall(MatDenseGetLocalMatrix(A, &a));
476     PetscCall(MatDenseGetLDA(a, &r));
477     PetscCall(MatGetSize(a, &rStart, &rEnd));
478     PetscCall(MatDenseGetArray(a, &newVals));
479     for (; colMax < rEnd; ++colMax) {
480       for (maxRows = 0; maxRows < rStart; ++maxRows) {
481         newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
482       }
483     }
484     PetscCall(MatDenseRestoreArray(a, &newVals));
485   } else {
486     PetscCall(MatGetOwnershipRange(A, &rStart, &rEnd));
487     PetscCall(MatGetRowUpperTriangular(A));
488     for (r = rStart; r < rEnd; ++r) {
489       PetscInt ncols;
490 
491       PetscCall(MatGetRow(A, r, &ncols, NULL, NULL));
492       colMax = PetscMax(colMax, ncols);
493       PetscCall(MatRestoreRow(A, r, &ncols, NULL, NULL));
494     }
495     numRows = rEnd - rStart;
496     PetscCall(MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A)));
497     PetscCall(PetscMalloc2(colMax, &newCols, colMax, &newVals));
498     PetscCall(MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg)); /* cache user-defined value */
499     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
500     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
501     /* that are potentially called many times depending on the distribution of A */
502     for (r = rStart; r < rStart+maxRows; ++r) {
503       const PetscScalar *vals;
504       const PetscInt    *cols;
505       PetscInt           ncols, newcols, c;
506 
507       if (r < rEnd) {
508         PetscCall(MatGetRow(A, r, &ncols, &cols, &vals));
509         for (c = 0; c < ncols; ++c) {
510           newCols[c] = cols[c];
511           newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
512         }
513         newcols = ncols;
514         PetscCall(MatRestoreRow(A, r, &ncols, &cols, &vals));
515         PetscCall(MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES));
516       }
517       PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
518       PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
519     }
520     PetscCall(MatRestoreRowUpperTriangular(A));
521     PetscCall(PetscFree2(newCols, newVals));
522     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg)); /* reset option to its user-defined value */
523   }
524   PetscFunctionReturn(0);
525 }
526