xref: /petsc/src/mat/utils/axpy.c (revision 6a5217c03994f2d95bb2e6dbd8bed42381aeb015)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5 {
6   Mat            A,F;
7   PetscErrorCode (*f)(Mat,Mat*);
8 
9   PetscFunctionBegin;
10   PetscCall(PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f));
11   if (f) {
12     PetscCall(MatTransposeGetMat(T,&A));
13     if (T == X) {
14       PetscCall(PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
15       PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&F));
16       A = Y;
17     } else {
18       PetscCall(PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
19       PetscCall(MatTranspose(X,MAT_INITIAL_MATRIX,&F));
20     }
21   } else {
22     PetscCall(MatHermitianTransposeGetMat(T,&A));
23     if (T == X) {
24       PetscCall(PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
25       PetscCall(MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F));
26       A = Y;
27     } else {
28       PetscCall(PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n"));
29       PetscCall(MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F));
30     }
31   }
32   PetscCall(MatAXPY(A,a,F,str));
33   PetscCall(MatDestroy(&F));
34   PetscFunctionReturn(0);
35 }
36 
37 /*@
38    MatAXPY - Computes Y = a*X + Y.
39 
40    Logically  Collective on Mat
41 
42    Input Parameters:
43 +  a - the scalar multiplier
44 .  X - the first matrix
45 .  Y - the second matrix
46 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
47 
48    Notes: No operation is performed when a is zero.
49 
50    Level: intermediate
51 
52 .seealso: MatAYPX()
53  @*/
54 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
55 {
56   PetscInt       M1,M2,N1,N2;
57   PetscInt       m1,m2,n1,n2;
58   MatType        t1,t2;
59   PetscBool      sametype,transpose;
60 
61   PetscFunctionBegin;
62   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
63   PetscValidLogicalCollectiveScalar(Y,a,2);
64   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
65   PetscCheckSameComm(Y,1,X,3);
66   PetscCall(MatGetSize(X,&M1,&N1));
67   PetscCall(MatGetSize(Y,&M2,&N2));
68   PetscCall(MatGetLocalSize(X,&m1,&n1));
69   PetscCall(MatGetLocalSize(Y,&m2,&n2));
70   PetscCheck(M1 == M2 && N1 == N2,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT,M1,N1,M2,N2);
71   PetscCheck(m1 == m2 && n1 == n2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT,m1,n1,m2,n2);
72   PetscCheck(Y->assembled,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)");
73   PetscCheck(X->assembled,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)");
74   if (a == (PetscScalar)0.0) PetscFunctionReturn(0);
75   if (Y == X) {
76     PetscCall(MatScale(Y,1.0 + a));
77     PetscFunctionReturn(0);
78   }
79   PetscCall(MatGetType(X,&t1));
80   PetscCall(MatGetType(Y,&t2));
81   PetscCall(PetscStrcmp(t1,t2,&sametype));
82   PetscCall(PetscLogEventBegin(MAT_AXPY,Y,0,0,0));
83   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
84     PetscCall((*Y->ops->axpy)(Y,a,X,str));
85   } else {
86     PetscCall(PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose));
87     if (transpose) {
88       PetscCall(MatTransposeAXPY_Private(Y,a,X,str,X));
89     } else {
90       PetscCall(PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose));
91       if (transpose) {
92         PetscCall(MatTransposeAXPY_Private(Y,a,X,str,Y));
93       } else {
94         PetscCall(MatAXPY_Basic(Y,a,X,str));
95       }
96     }
97   }
98   PetscCall(PetscLogEventEnd(MAT_AXPY,Y,0,0,0));
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
103 {
104   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
105 
106   PetscFunctionBegin;
107   /* look for any available faster alternative to the general preallocator */
108   PetscCall(PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall));
109   if (preall) {
110     PetscCall((*preall)(Y,X,B));
111   } else { /* Use MatPrellocator, assumes same row-col distribution */
112     Mat      preallocator;
113     PetscInt r,rstart,rend;
114     PetscInt m,n,M,N;
115 
116     PetscCall(MatGetRowUpperTriangular(Y));
117     PetscCall(MatGetRowUpperTriangular(X));
118     PetscCall(MatGetSize(Y,&M,&N));
119     PetscCall(MatGetLocalSize(Y,&m,&n));
120     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&preallocator));
121     PetscCall(MatSetType(preallocator,MATPREALLOCATOR));
122     PetscCall(MatSetLayouts(preallocator,Y->rmap,Y->cmap));
123     PetscCall(MatSetUp(preallocator));
124     PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend));
125     for (r = rstart; r < rend; ++r) {
126       PetscInt          ncols;
127       const PetscInt    *row;
128       const PetscScalar *vals;
129 
130       PetscCall(MatGetRow(Y,r,&ncols,&row,&vals));
131       PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
132       PetscCall(MatRestoreRow(Y,r,&ncols,&row,&vals));
133       PetscCall(MatGetRow(X,r,&ncols,&row,&vals));
134       PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
135       PetscCall(MatRestoreRow(X,r,&ncols,&row,&vals));
136     }
137     PetscCall(MatSetOption(preallocator,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
138     PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY));
139     PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY));
140     PetscCall(MatRestoreRowUpperTriangular(Y));
141     PetscCall(MatRestoreRowUpperTriangular(X));
142 
143     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),B));
144     PetscCall(MatSetType(*B,((PetscObject)Y)->type_name));
145     PetscCall(MatSetLayouts(*B,Y->rmap,Y->cmap));
146     PetscCall(MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B));
147     PetscCall(MatDestroy(&preallocator));
148   }
149   PetscFunctionReturn(0);
150 }
151 
152 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
153 {
154   PetscBool      isshell,isdense,isnest;
155 
156   PetscFunctionBegin;
157   PetscCall(MatIsShell(Y,&isshell));
158   if (isshell) { /* MatShell has special support for AXPY */
159     PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);
160 
161     PetscCall(MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f));
162     if (f) {
163       PetscCall((*f)(Y,a,X,str));
164       PetscFunctionReturn(0);
165     }
166   }
167   /* no need to preallocate if Y is dense */
168   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,""));
169   if (isdense) {
170     PetscCall(PetscObjectTypeCompare((PetscObject)X,MATNEST,&isnest));
171     if (isnest) {
172       PetscCall(MatAXPY_Dense_Nest(Y,a,X));
173       PetscFunctionReturn(0);
174     }
175     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
176   }
177   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
178     PetscInt          i,start,end,j,ncols,m,n;
179     const PetscInt    *row;
180     PetscScalar       *val;
181     const PetscScalar *vals;
182 
183     PetscCall(MatGetSize(X,&m,&n));
184     PetscCall(MatGetOwnershipRange(X,&start,&end));
185     PetscCall(MatGetRowUpperTriangular(X));
186     if (a == 1.0) {
187       for (i = start; i < end; i++) {
188         PetscCall(MatGetRow(X,i,&ncols,&row,&vals));
189         PetscCall(MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES));
190         PetscCall(MatRestoreRow(X,i,&ncols,&row,&vals));
191       }
192     } else {
193       PetscInt vs = 100;
194       /* realloc if needed, as this function may be used in parallel */
195       PetscCall(PetscMalloc1(vs,&val));
196       for (i=start; i<end; i++) {
197         PetscCall(MatGetRow(X,i,&ncols,&row,&vals));
198         if (vs < ncols) {
199           vs   = PetscMin(2*ncols,n);
200           PetscCall(PetscRealloc(vs*sizeof(*val),&val));
201         }
202         for (j=0; j<ncols; j++) val[j] = a*vals[j];
203         PetscCall(MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES));
204         PetscCall(MatRestoreRow(X,i,&ncols,&row,&vals));
205       }
206       PetscCall(PetscFree(val));
207     }
208     PetscCall(MatRestoreRowUpperTriangular(X));
209     PetscCall(MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY));
210     PetscCall(MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY));
211   } else {
212     Mat B;
213 
214     PetscCall(MatAXPY_Basic_Preallocate(Y,X,&B));
215     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
216     PetscCall(MatHeaderMerge(Y,&B));
217   }
218   PetscFunctionReturn(0);
219 }
220 
221 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
222 {
223   PetscInt          i,start,end,j,ncols,m,n;
224   const PetscInt    *row;
225   PetscScalar       *val;
226   const PetscScalar *vals;
227 
228   PetscFunctionBegin;
229   PetscCall(MatGetSize(X,&m,&n));
230   PetscCall(MatGetOwnershipRange(X,&start,&end));
231   PetscCall(MatGetRowUpperTriangular(Y));
232   PetscCall(MatGetRowUpperTriangular(X));
233   if (a == 1.0) {
234     for (i = start; i < end; i++) {
235       PetscCall(MatGetRow(Y,i,&ncols,&row,&vals));
236       PetscCall(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES));
237       PetscCall(MatRestoreRow(Y,i,&ncols,&row,&vals));
238 
239       PetscCall(MatGetRow(X,i,&ncols,&row,&vals));
240       PetscCall(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES));
241       PetscCall(MatRestoreRow(X,i,&ncols,&row,&vals));
242     }
243   } else {
244     PetscInt vs = 100;
245     /* realloc if needed, as this function may be used in parallel */
246     PetscCall(PetscMalloc1(vs,&val));
247     for (i=start; i<end; i++) {
248       PetscCall(MatGetRow(Y,i,&ncols,&row,&vals));
249       PetscCall(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES));
250       PetscCall(MatRestoreRow(Y,i,&ncols,&row,&vals));
251 
252       PetscCall(MatGetRow(X,i,&ncols,&row,&vals));
253       if (vs < ncols) {
254         vs   = PetscMin(2*ncols,n);
255         PetscCall(PetscRealloc(vs*sizeof(*val),&val));
256       }
257       for (j=0; j<ncols; j++) val[j] = a*vals[j];
258       PetscCall(MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES));
259       PetscCall(MatRestoreRow(X,i,&ncols,&row,&vals));
260     }
261     PetscCall(PetscFree(val));
262   }
263   PetscCall(MatRestoreRowUpperTriangular(Y));
264   PetscCall(MatRestoreRowUpperTriangular(X));
265   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
266   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
267   PetscFunctionReturn(0);
268 }
269 
270 /*@
271    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
272 
273    Neighbor-wise Collective on Mat
274 
275    Input Parameters:
276 +  Y - the matrices
277 -  a - the PetscScalar
278 
279    Level: intermediate
280 
281    Notes:
282     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
283    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
284    entry). No operation is performed when a is zero.
285 
286    To form Y = Y + diag(V) use MatDiagonalSet()
287 
288 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
289  @*/
290 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
291 {
292   PetscFunctionBegin;
293   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
294   PetscCheck(Y->assembled,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
295   PetscCheck(!Y->factortype,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
296   MatCheckPreallocated(Y,1);
297   if (a == 0.0) PetscFunctionReturn(0);
298 
299   if (Y->ops->shift) PetscCall((*Y->ops->shift)(Y,a));
300   else PetscCall(MatShift_Basic(Y,a));
301 
302   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
303   PetscFunctionReturn(0);
304 }
305 
306 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
307 {
308   PetscInt          i,start,end;
309   const PetscScalar *v;
310 
311   PetscFunctionBegin;
312   PetscCall(MatGetOwnershipRange(Y,&start,&end));
313   PetscCall(VecGetArrayRead(D,&v));
314   for (i=start; i<end; i++) {
315     PetscCall(MatSetValues(Y,1,&i,1,&i,v+i-start,is));
316   }
317   PetscCall(VecRestoreArrayRead(D,&v));
318   PetscCall(MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY));
319   PetscCall(MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY));
320   PetscFunctionReturn(0);
321 }
322 
323 /*@
324    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
325    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
326    INSERT_VALUES.
327 
328    Neighbor-wise Collective on Mat
329 
330    Input Parameters:
331 +  Y - the input matrix
332 .  D - the diagonal matrix, represented as a vector
333 -  i - INSERT_VALUES or ADD_VALUES
334 
335    Notes:
336     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
337    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
338    entry).
339 
340    Level: intermediate
341 
342 .seealso: MatShift(), MatScale(), MatDiagonalScale()
343 @*/
344 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
345 {
346   PetscInt       matlocal,veclocal;
347 
348   PetscFunctionBegin;
349   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
350   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
351   PetscCall(MatGetLocalSize(Y,&matlocal,NULL));
352   PetscCall(VecGetLocalSize(D,&veclocal));
353   PetscCheck(matlocal == veclocal,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %" PetscInt_FMT " does not match that of vector for diagonal %" PetscInt_FMT,matlocal,veclocal);
354   if (Y->ops->diagonalset) {
355     PetscCall((*Y->ops->diagonalset)(Y,D,is));
356   } else {
357     PetscCall(MatDiagonalSet_Default(Y,D,is));
358   }
359   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
360   PetscFunctionReturn(0);
361 }
362 
363 /*@
364    MatAYPX - Computes Y = a*Y + X.
365 
366    Logically on Mat
367 
368    Input Parameters:
369 +  a - the PetscScalar multiplier
370 .  Y - the first matrix
371 .  X - the second matrix
372 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
373 
374    Level: intermediate
375 
376 .seealso: MatAXPY()
377  @*/
378 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
379 {
380   PetscFunctionBegin;
381   PetscCall(MatScale(Y,a));
382   PetscCall(MatAXPY(Y,1.0,X,str));
383   PetscFunctionReturn(0);
384 }
385 
386 /*@
387     MatComputeOperator - Computes the explicit matrix
388 
389     Collective on Mat
390 
391     Input Parameters:
392 +   inmat - the matrix
393 -   mattype - the matrix type for the explicit operator
394 
395     Output Parameter:
396 .   mat - the explicit  operator
397 
398     Notes:
399     This computation is done by applying the operators to columns of the identity matrix.
400     This routine is costly in general, and is recommended for use only with relatively small systems.
401     Currently, this routine uses a dense matrix format if mattype == NULL.
402 
403     Level: advanced
404 
405 @*/
406 PetscErrorCode  MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
407 {
408   PetscFunctionBegin;
409   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
410   PetscValidPointer(mat,3);
411   PetscCall(MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat));
412   PetscFunctionReturn(0);
413 }
414 
415 /*@
416     MatComputeOperatorTranspose - Computes the explicit matrix representation of
417         a give matrix that can apply MatMultTranspose()
418 
419     Collective on Mat
420 
421     Input Parameters:
422 +   inmat - the matrix
423 -   mattype - the matrix type for the explicit operator
424 
425     Output Parameter:
426 .   mat - the explicit  operator transposed
427 
428     Notes:
429     This computation is done by applying the transpose of the operator to columns of the identity matrix.
430     This routine is costly in general, and is recommended for use only with relatively small systems.
431     Currently, this routine uses a dense matrix format if mattype == NULL.
432 
433     Level: advanced
434 
435 @*/
436 PetscErrorCode  MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
437 {
438   Mat            A;
439 
440   PetscFunctionBegin;
441   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
442   PetscValidPointer(mat,3);
443   PetscCall(MatCreateTranspose(inmat,&A));
444   PetscCall(MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat));
445   PetscCall(MatDestroy(&A));
446   PetscFunctionReturn(0);
447 }
448 
449 /*@
450   MatChop - Set all values in the matrix less than the tolerance to zero
451 
452   Input Parameters:
453 + A   - The matrix
454 - tol - The zero tolerance
455 
456   Output Parameters:
457 . A - The chopped matrix
458 
459   Level: intermediate
460 
461 .seealso: MatCreate(), MatZeroEntries()
462  @*/
463 PetscErrorCode MatChop(Mat A, PetscReal tol)
464 {
465   Mat            a;
466   PetscScalar    *newVals;
467   PetscInt       *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
468   PetscBool      flg;
469 
470   PetscFunctionBegin;
471   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, ""));
472   if (flg) {
473     PetscCall(MatDenseGetLocalMatrix(A, &a));
474     PetscCall(MatDenseGetLDA(a, &r));
475     PetscCall(MatGetSize(a, &rStart, &rEnd));
476     PetscCall(MatDenseGetArray(a, &newVals));
477     for (; colMax < rEnd; ++colMax) {
478       for (maxRows = 0; maxRows < rStart; ++maxRows) {
479         newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
480       }
481     }
482     PetscCall(MatDenseRestoreArray(a, &newVals));
483   } else {
484     PetscCall(MatGetOwnershipRange(A, &rStart, &rEnd));
485     PetscCall(MatGetRowUpperTriangular(A));
486     for (r = rStart; r < rEnd; ++r) {
487       PetscInt ncols;
488 
489       PetscCall(MatGetRow(A, r, &ncols, NULL, NULL));
490       colMax = PetscMax(colMax, ncols);
491       PetscCall(MatRestoreRow(A, r, &ncols, NULL, NULL));
492     }
493     numRows = rEnd - rStart;
494     PetscCall(MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A)));
495     PetscCall(PetscMalloc2(colMax, &newCols, colMax, &newVals));
496     PetscCall(MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg)); /* cache user-defined value */
497     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
498     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
499     /* that are potentially called many times depending on the distribution of A */
500     for (r = rStart; r < rStart+maxRows; ++r) {
501       const PetscScalar *vals;
502       const PetscInt    *cols;
503       PetscInt           ncols, newcols, c;
504 
505       if (r < rEnd) {
506         PetscCall(MatGetRow(A, r, &ncols, &cols, &vals));
507         for (c = 0; c < ncols; ++c) {
508           newCols[c] = cols[c];
509           newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
510         }
511         newcols = ncols;
512         PetscCall(MatRestoreRow(A, r, &ncols, &cols, &vals));
513         PetscCall(MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES));
514       }
515       PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
516       PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
517     }
518     PetscCall(MatRestoreRowUpperTriangular(A));
519     PetscCall(PetscFree2(newCols, newVals));
520     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg)); /* reset option to its user-defined value */
521   }
522   PetscFunctionReturn(0);
523 }
524