xref: /petsc/src/mat/utils/axpy.c (revision 40badf4fbc550ac1f60bd080eaff6de6d55b946d)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5 {
6   Mat            A,F;
7   PetscErrorCode (*f)(Mat,Mat*);
8 
9   PetscFunctionBegin;
10   CHKERRQ(PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f));
11   if (f) {
12     CHKERRQ(MatTransposeGetMat(T,&A));
13     if (T == X) {
14       CHKERRQ(PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
15       CHKERRQ(MatTranspose(A,MAT_INITIAL_MATRIX,&F));
16       A = Y;
17     } else {
18       CHKERRQ(PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
19       CHKERRQ(MatTranspose(X,MAT_INITIAL_MATRIX,&F));
20     }
21   } else {
22     CHKERRQ(MatHermitianTransposeGetMat(T,&A));
23     if (T == X) {
24       CHKERRQ(PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
25       CHKERRQ(MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F));
26       A = Y;
27     } else {
28       CHKERRQ(PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
29       CHKERRQ(MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F));
30     }
31   }
32   CHKERRQ(MatAXPY(A,a,F,str));
33   CHKERRQ(MatDestroy(&F));
34   PetscFunctionReturn(0);
35 }
36 
37 /*@
38    MatAXPY - Computes Y = a*X + Y.
39 
40    Logically  Collective on Mat
41 
42    Input Parameters:
43 +  a - the scalar multiplier
44 .  X - the first matrix
45 .  Y - the second matrix
46 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
47 
48    Notes: No operation is performed when a is zero.
49 
50    Level: intermediate
51 
52 .seealso: MatAYPX()
53  @*/
54 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
55 {
56   PetscInt       M1,M2,N1,N2;
57   PetscInt       m1,m2,n1,n2;
58   MatType        t1,t2;
59   PetscBool      sametype,transpose;
60 
61   PetscFunctionBegin;
62   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
63   PetscValidLogicalCollectiveScalar(Y,a,2);
64   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
65   PetscCheckSameComm(Y,1,X,3);
66   CHKERRQ(MatGetSize(X,&M1,&N1));
67   CHKERRQ(MatGetSize(Y,&M2,&N2));
68   CHKERRQ(MatGetLocalSize(X,&m1,&n1));
69   CHKERRQ(MatGetLocalSize(Y,&m2,&n2));
70   PetscCheck(M1 == M2 && N1 == N2,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT,M1,N1,M2,N2);
71   PetscCheck(m1 == m2 && n1 == n2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT,m1,n1,m2,n2);
72   PetscCheck(Y->assembled,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)");
73   PetscCheck(X->assembled,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)");
74   if (a == (PetscScalar)0.0) PetscFunctionReturn(0);
75   if (Y == X) {
76     CHKERRQ(MatScale(Y,1.0 + a));
77     PetscFunctionReturn(0);
78   }
79   CHKERRQ(MatGetType(X,&t1));
80   CHKERRQ(MatGetType(Y,&t2));
81   CHKERRQ(PetscStrcmp(t1,t2,&sametype));
82   CHKERRQ(PetscLogEventBegin(MAT_AXPY,Y,0,0,0));
83   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
84     CHKERRQ((*Y->ops->axpy)(Y,a,X,str));
85   } else {
86     CHKERRQ(PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose));
87     if (transpose) {
88       CHKERRQ(MatTransposeAXPY_Private(Y,a,X,str,X));
89     } else {
90       CHKERRQ(PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose));
91       if (transpose) {
92         CHKERRQ(MatTransposeAXPY_Private(Y,a,X,str,Y));
93       } else {
94         CHKERRQ(MatAXPY_Basic(Y,a,X,str));
95       }
96     }
97   }
98   CHKERRQ(PetscLogEventEnd(MAT_AXPY,Y,0,0,0));
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
103 {
104   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
105 
106   PetscFunctionBegin;
107   /* look for any available faster alternative to the general preallocator */
108   CHKERRQ(PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall));
109   if (preall) {
110     CHKERRQ((*preall)(Y,X,B));
111   } else { /* Use MatPrellocator, assumes same row-col distribution */
112     Mat      preallocator;
113     PetscInt r,rstart,rend;
114     PetscInt m,n,M,N;
115 
116     CHKERRQ(MatGetRowUpperTriangular(Y));
117     CHKERRQ(MatGetRowUpperTriangular(X));
118     CHKERRQ(MatGetSize(Y,&M,&N));
119     CHKERRQ(MatGetLocalSize(Y,&m,&n));
120     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)Y),&preallocator));
121     CHKERRQ(MatSetType(preallocator,MATPREALLOCATOR));
122     CHKERRQ(MatSetLayouts(preallocator,Y->rmap,Y->cmap));
123     CHKERRQ(MatSetUp(preallocator));
124     CHKERRQ(MatGetOwnershipRange(preallocator,&rstart,&rend));
125     for (r = rstart; r < rend; ++r) {
126       PetscInt          ncols;
127       const PetscInt    *row;
128       const PetscScalar *vals;
129 
130       CHKERRQ(MatGetRow(Y,r,&ncols,&row,&vals));
131       CHKERRQ(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
132       CHKERRQ(MatRestoreRow(Y,r,&ncols,&row,&vals));
133       CHKERRQ(MatGetRow(X,r,&ncols,&row,&vals));
134       CHKERRQ(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
135       CHKERRQ(MatRestoreRow(X,r,&ncols,&row,&vals));
136     }
137     CHKERRQ(MatSetOption(preallocator,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
138     CHKERRQ(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY));
139     CHKERRQ(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY));
140     CHKERRQ(MatRestoreRowUpperTriangular(Y));
141     CHKERRQ(MatRestoreRowUpperTriangular(X));
142 
143     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)Y),B));
144     CHKERRQ(MatSetType(*B,((PetscObject)Y)->type_name));
145     CHKERRQ(MatSetLayouts(*B,Y->rmap,Y->cmap));
146     CHKERRQ(MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B));
147     CHKERRQ(MatDestroy(&preallocator));
148   }
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
153 {
154   PetscBool      isshell,isdense,isnest;
155 
156   PetscFunctionBegin;
157   CHKERRQ(MatIsShell(Y,&isshell));
158   if (isshell) { /* MatShell has special support for AXPY */
159     PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);
160 
161     CHKERRQ(MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f));
162     if (f) {
163       CHKERRQ((*f)(Y,a,X,str));
164       PetscFunctionReturn(0);
165     }
166   }
167   /* no need to preallocate if Y is dense */
168   CHKERRQ(PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,""));
169   if (isdense) {
170     CHKERRQ(PetscObjectTypeCompare((PetscObject)X,MATNEST,&isnest));
171     if (isnest) {
172       CHKERRQ(MatAXPY_Dense_Nest(Y,a,X));
173       PetscFunctionReturn(0);
174     }
175     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
176   }
177   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
178     PetscInt          i,start,end,j,ncols,m,n;
179     const PetscInt    *row;
180     PetscScalar       *val;
181     const PetscScalar *vals;
182 
183     CHKERRQ(MatGetSize(X,&m,&n));
184     CHKERRQ(MatGetOwnershipRange(X,&start,&end));
185     CHKERRQ(MatGetRowUpperTriangular(X));
186     if (a == 1.0) {
187       for (i = start; i < end; i++) {
188         CHKERRQ(MatGetRow(X,i,&ncols,&row,&vals));
189         CHKERRQ(MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES));
190         CHKERRQ(MatRestoreRow(X,i,&ncols,&row,&vals));
191       }
192     } else {
193       PetscInt vs = 100;
194       /* realloc if needed, as this function may be used in parallel */
195       CHKERRQ(PetscMalloc1(vs,&val));
196       for (i=start; i<end; i++) {
197         CHKERRQ(MatGetRow(X,i,&ncols,&row,&vals));
198         if (vs < ncols) {
199           vs   = PetscMin(2*ncols,n);
200           CHKERRQ(PetscRealloc(vs*sizeof(*val),&val));
201         }
202         for (j=0; j<ncols; j++) val[j] = a*vals[j];
203         CHKERRQ(MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES));
204         CHKERRQ(MatRestoreRow(X,i,&ncols,&row,&vals));
205       }
206       CHKERRQ(PetscFree(val));
207     }
208     CHKERRQ(MatRestoreRowUpperTriangular(X));
209     CHKERRQ(MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY));
210     CHKERRQ(MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY));
211   } else {
212     Mat B;
213 
214     CHKERRQ(MatAXPY_Basic_Preallocate(Y,X,&B));
215     CHKERRQ(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
216     CHKERRQ(MatHeaderMerge(Y,&B));
217   }
218   PetscFunctionReturn(0);
219 }
220 
221 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
222 {
223   PetscInt          i,start,end,j,ncols,m,n;
224   const PetscInt    *row;
225   PetscScalar       *val;
226   const PetscScalar *vals;
227 
228   PetscFunctionBegin;
229   CHKERRQ(MatGetSize(X,&m,&n));
230   CHKERRQ(MatGetOwnershipRange(X,&start,&end));
231   CHKERRQ(MatGetRowUpperTriangular(Y));
232   CHKERRQ(MatGetRowUpperTriangular(X));
233   if (a == 1.0) {
234     for (i = start; i < end; i++) {
235       CHKERRQ(MatGetRow(Y,i,&ncols,&row,&vals));
236       CHKERRQ(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES));
237       CHKERRQ(MatRestoreRow(Y,i,&ncols,&row,&vals));
238 
239       CHKERRQ(MatGetRow(X,i,&ncols,&row,&vals));
240       CHKERRQ(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES));
241       CHKERRQ(MatRestoreRow(X,i,&ncols,&row,&vals));
242     }
243   } else {
244     PetscInt vs = 100;
245     /* realloc if needed, as this function may be used in parallel */
246     CHKERRQ(PetscMalloc1(vs,&val));
247     for (i=start; i<end; i++) {
248       CHKERRQ(MatGetRow(Y,i,&ncols,&row,&vals));
249       CHKERRQ(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES));
250       CHKERRQ(MatRestoreRow(Y,i,&ncols,&row,&vals));
251 
252       CHKERRQ(MatGetRow(X,i,&ncols,&row,&vals));
253       if (vs < ncols) {
254         vs   = PetscMin(2*ncols,n);
255         CHKERRQ(PetscRealloc(vs*sizeof(*val),&val));
256       }
257       for (j=0; j<ncols; j++) val[j] = a*vals[j];
258       CHKERRQ(MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES));
259       CHKERRQ(MatRestoreRow(X,i,&ncols,&row,&vals));
260     }
261     CHKERRQ(PetscFree(val));
262   }
263   CHKERRQ(MatRestoreRowUpperTriangular(Y));
264   CHKERRQ(MatRestoreRowUpperTriangular(X));
265   CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
266   CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
267   PetscFunctionReturn(0);
268 }
269 
270 /*@
271    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
272 
273    Neighbor-wise Collective on Mat
274 
275    Input Parameters:
276 +  Y - the matrices
277 -  a - the PetscScalar
278 
279    Level: intermediate
280 
281    Notes:
282     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
283    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
284    entry). No operation is performed when a is zero.
285 
286    To form Y = Y + diag(V) use MatDiagonalSet()
287 
288 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
289  @*/
290 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
291 {
292   PetscFunctionBegin;
293   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
294   PetscCheck(Y->assembled,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
295   PetscCheck(!Y->factortype,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
296   MatCheckPreallocated(Y,1);
297   if (a == 0.0) PetscFunctionReturn(0);
298 
299   if (Y->ops->shift) CHKERRQ((*Y->ops->shift)(Y,a));
300   else CHKERRQ(MatShift_Basic(Y,a));
301 
302   CHKERRQ(PetscObjectStateIncrease((PetscObject)Y));
303   PetscFunctionReturn(0);
304 }
305 
306 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
307 {
308   PetscInt          i,start,end;
309   const PetscScalar *v;
310 
311   PetscFunctionBegin;
312   CHKERRQ(MatGetOwnershipRange(Y,&start,&end));
313   CHKERRQ(VecGetArrayRead(D,&v));
314   for (i=start; i<end; i++) {
315     CHKERRQ(MatSetValues(Y,1,&i,1,&i,v+i-start,is));
316   }
317   CHKERRQ(VecRestoreArrayRead(D,&v));
318   CHKERRQ(MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY));
319   CHKERRQ(MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY));
320   PetscFunctionReturn(0);
321 }
322 
323 /*@
324    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
325    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
326    INSERT_VALUES.
327 
328    Neighbor-wise Collective on Mat
329 
330    Input Parameters:
331 +  Y - the input matrix
332 .  D - the diagonal matrix, represented as a vector
333 -  i - INSERT_VALUES or ADD_VALUES
334 
335    Notes:
336     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
337    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
338    entry).
339 
340    Level: intermediate
341 
342 .seealso: MatShift(), MatScale(), MatDiagonalScale()
343 @*/
344 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
345 {
346   PetscInt       matlocal,veclocal;
347 
348   PetscFunctionBegin;
349   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
350   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
351   CHKERRQ(MatGetLocalSize(Y,&matlocal,NULL));
352   CHKERRQ(VecGetLocalSize(D,&veclocal));
353   PetscCheck(matlocal == veclocal,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %" PetscInt_FMT " does not match that of vector for diagonal %" PetscInt_FMT,matlocal,veclocal);
354   if (Y->ops->diagonalset) {
355     CHKERRQ((*Y->ops->diagonalset)(Y,D,is));
356   } else {
357     CHKERRQ(MatDiagonalSet_Default(Y,D,is));
358   }
359   CHKERRQ(PetscObjectStateIncrease((PetscObject)Y));
360   PetscFunctionReturn(0);
361 }
362 
363 /*@
364    MatAYPX - Computes Y = a*Y + X.
365 
366    Logically on Mat
367 
368    Input Parameters:
369 +  a - the PetscScalar multiplier
370 .  Y - the first matrix
371 .  X - the second matrix
372 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
373 
374    Level: intermediate
375 
376 .seealso: MatAXPY()
377  @*/
378 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
379 {
380   PetscFunctionBegin;
381   CHKERRQ(MatScale(Y,a));
382   CHKERRQ(MatAXPY(Y,1.0,X,str));
383   PetscFunctionReturn(0);
384 }
385 
386 /*@
387     MatComputeOperator - Computes the explicit matrix
388 
389     Collective on Mat
390 
391     Input Parameters:
392 +   inmat - the matrix
393 -   mattype - the matrix type for the explicit operator
394 
395     Output Parameter:
396 .   mat - the explicit  operator
397 
398     Notes:
399     This computation is done by applying the operators to columns of the identity matrix.
400     This routine is costly in general, and is recommended for use only with relatively small systems.
401     Currently, this routine uses a dense matrix format if mattype == NULL.
402 
403     Level: advanced
404 
405 @*/
406 PetscErrorCode  MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
407 {
408   PetscFunctionBegin;
409   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
410   PetscValidPointer(mat,3);
411   CHKERRQ(MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat));
412   PetscFunctionReturn(0);
413 }
414 
415 /*@
416     MatComputeOperatorTranspose - Computes the explicit matrix representation of
417         a give matrix that can apply MatMultTranspose()
418 
419     Collective on Mat
420 
421     Input Parameters:
422 +   inmat - the matrix
423 -   mattype - the matrix type for the explicit operator
424 
425     Output Parameter:
426 .   mat - the explicit  operator transposed
427 
428     Notes:
429     This computation is done by applying the transpose of the operator to columns of the identity matrix.
430     This routine is costly in general, and is recommended for use only with relatively small systems.
431     Currently, this routine uses a dense matrix format if mattype == NULL.
432 
433     Level: advanced
434 
435 @*/
436 PetscErrorCode  MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
437 {
438   Mat            A;
439 
440   PetscFunctionBegin;
441   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
442   PetscValidPointer(mat,3);
443   CHKERRQ(MatCreateTranspose(inmat,&A));
444   CHKERRQ(MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat));
445   CHKERRQ(MatDestroy(&A));
446   PetscFunctionReturn(0);
447 }
448 
449 /*@
450   MatChop - Set all values in the matrix less than the tolerance to zero
451 
452   Input Parameters:
453 + A   - The matrix
454 - tol - The zero tolerance
455 
456   Output Parameters:
457 . A - The chopped matrix
458 
459   Level: intermediate
460 
461 .seealso: MatCreate(), MatZeroEntries()
462  @*/
463 PetscErrorCode MatChop(Mat A, PetscReal tol)
464 {
465   Mat            a;
466   PetscScalar    *newVals;
467   PetscInt       *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
468   PetscBool      flg;
469 
470   PetscFunctionBegin;
471   CHKERRQ(PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, ""));
472   if (flg) {
473     CHKERRQ(MatDenseGetLocalMatrix(A, &a));
474     CHKERRQ(MatDenseGetLDA(a, &r));
475     CHKERRQ(MatGetSize(a, &rStart, &rEnd));
476     CHKERRQ(MatDenseGetArray(a, &newVals));
477     for (; colMax < rEnd; ++colMax) {
478       for (maxRows = 0; maxRows < rStart; ++maxRows) {
479         newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
480       }
481     }
482     CHKERRQ(MatDenseRestoreArray(a, &newVals));
483   } else {
484     CHKERRQ(MatGetOwnershipRange(A, &rStart, &rEnd));
485     CHKERRQ(MatGetRowUpperTriangular(A));
486     for (r = rStart; r < rEnd; ++r) {
487       PetscInt ncols;
488 
489       CHKERRQ(MatGetRow(A, r, &ncols, NULL, NULL));
490       colMax = PetscMax(colMax, ncols);
491       CHKERRQ(MatRestoreRow(A, r, &ncols, NULL, NULL));
492     }
493     numRows = rEnd - rStart;
494     CHKERRMPI(MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A)));
495     CHKERRQ(PetscMalloc2(colMax, &newCols, colMax, &newVals));
496     CHKERRQ(MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg)); /* cache user-defined value */
497     CHKERRQ(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
498     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
499     /* that are potentially called many times depending on the distribution of A */
500     for (r = rStart; r < rStart+maxRows; ++r) {
501       const PetscScalar *vals;
502       const PetscInt    *cols;
503       PetscInt           ncols, newcols, c;
504 
505       if (r < rEnd) {
506         CHKERRQ(MatGetRow(A, r, &ncols, &cols, &vals));
507         for (c = 0; c < ncols; ++c) {
508           newCols[c] = cols[c];
509           newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
510         }
511         newcols = ncols;
512         CHKERRQ(MatRestoreRow(A, r, &ncols, &cols, &vals));
513         CHKERRQ(MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES));
514       }
515       CHKERRQ(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
516       CHKERRQ(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
517     }
518     CHKERRQ(MatRestoreRowUpperTriangular(A));
519     CHKERRQ(PetscFree2(newCols, newVals));
520     CHKERRQ(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg)); /* reset option to its user-defined value */
521   }
522   PetscFunctionReturn(0);
523 }
524