xref: /petsc/src/mat/utils/axpy.c (revision 0bacdada41612e60f5faaed79721bc8f857c9767)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5 {
6   PetscErrorCode ierr,(*f)(Mat,Mat*);
7   Mat            A,F;
8 
9   PetscFunctionBegin;
10   ierr = PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);CHKERRQ(ierr);
11   if (f) {
12     ierr = MatTransposeGetMat(T,&A);CHKERRQ(ierr);
13     if (T == X) {
14       ierr = PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");CHKERRQ(ierr);
15       ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
16       A = Y;
17     } else {
18       ierr = PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");CHKERRQ(ierr);
19       ierr = MatTranspose(X,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
20     }
21   } else {
22     ierr = MatHermitianTransposeGetMat(T,&A);CHKERRQ(ierr);
23     if (T == X) {
24       ierr = PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");CHKERRQ(ierr);
25       ierr = MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
26       A = Y;
27     } else {
28       ierr = PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");CHKERRQ(ierr);
29       ierr = MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);CHKERRQ(ierr);
30     }
31   }
32   ierr = MatAXPY(A,a,F,str);CHKERRQ(ierr);
33   ierr = MatDestroy(&F);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 
37 /*@
38    MatAXPY - Computes Y = a*X + Y.
39 
40    Logically  Collective on Mat
41 
42    Input Parameters:
43 +  a - the scalar multiplier
44 .  X - the first matrix
45 .  Y - the second matrix
46 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
47          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
48 
49    Level: intermediate
50 
51 .keywords: matrix, add
52 
53 .seealso: MatAYPX()
54  @*/
55 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
56 {
57   PetscErrorCode ierr;
58   PetscInt       M1,M2,N1,N2;
59   PetscInt       m1,m2,n1,n2;
60   MatType        t1,t2;
61   PetscBool      sametype,transpose;
62 
63   PetscFunctionBegin;
64   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
65   PetscValidLogicalCollectiveScalar(Y,a,2);
66   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
67   PetscCheckSameComm(Y,1,X,3);
68   ierr = MatGetSize(X,&M1,&N1);CHKERRQ(ierr);
69   ierr = MatGetSize(Y,&M2,&N2);CHKERRQ(ierr);
70   ierr = MatGetLocalSize(X,&m1,&n1);CHKERRQ(ierr);
71   ierr = MatGetLocalSize(Y,&m2,&n2);CHKERRQ(ierr);
72   if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
73   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
74 
75   ierr = MatGetType(X,&t1);CHKERRQ(ierr);
76   ierr = MatGetType(Y,&t2);CHKERRQ(ierr);
77   ierr = PetscStrcmp(t1,t2,&sametype);CHKERRQ(ierr);
78   ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
79   if (Y->ops->axpy && sametype) {
80     ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr);
81   } else {
82     ierr = PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);CHKERRQ(ierr);
83     if (transpose) {
84         ierr = MatTransposeAXPY_Private(Y,a,X,str,X);CHKERRQ(ierr);
85     } else {
86       ierr = PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);CHKERRQ(ierr);
87       if (transpose) {
88         ierr = MatTransposeAXPY_Private(Y,a,X,str,Y);CHKERRQ(ierr);
89       } else {
90         if (str != DIFFERENT_NONZERO_PATTERN) {
91           ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
92         } else {
93           Mat B;
94 
95           ierr = MatAXPY_Basic_Preallocate(Y,X,&B);CHKERRQ(ierr);
96           ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
97           ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
98         }
99       }
100     }
101   }
102   ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
103 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
104   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
105     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
106   }
107 #endif
108   PetscFunctionReturn(0);
109 }
110 
111 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
112 {
113   PetscErrorCode ierr;
114   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
115 
116   PetscFunctionBegin;
117   /* look for any available faster alternative to the general preallocator */
118   ierr = PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);CHKERRQ(ierr);
119   if (preall) {
120     ierr = (*preall)(Y,X,B);CHKERRQ(ierr);
121   } else { /* Use MatPrellocator, assumes same row-col distribution */
122     Mat      preallocator;
123     PetscInt r,rstart,rend;
124     PetscInt m,n,M,N;
125 
126     ierr = MatGetSize(Y,&M,&N);CHKERRQ(ierr);
127     ierr = MatGetLocalSize(Y,&m,&n);CHKERRQ(ierr);
128     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);CHKERRQ(ierr);
129     ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
130     ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
131     ierr = MatSetUp(preallocator);CHKERRQ(ierr);
132     ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
133     for (r = rstart; r < rend; ++r) {
134       PetscInt          ncols;
135       const PetscInt    *row;
136       const PetscScalar *vals;
137 
138       ierr = MatGetRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr);
139       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
140       ierr = MatRestoreRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr);
141       ierr = MatGetRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr);
142       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
143       ierr = MatRestoreRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr);
144     }
145     ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
146     ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
147 
148     ierr = MatCreate(PetscObjectComm((PetscObject)Y),B);CHKERRQ(ierr);
149     ierr = MatSetType(*B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
150     ierr = MatSetSizes(*B,m,n,M,N);CHKERRQ(ierr);
151     ierr = MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);CHKERRQ(ierr);
152     ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
153   }
154   PetscFunctionReturn(0);
155 }
156 
157 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
158 {
159   PetscInt          i,start,end,j,ncols,m,n;
160   PetscErrorCode    ierr;
161   const PetscInt    *row;
162   PetscScalar       *val;
163   const PetscScalar *vals;
164 
165   PetscFunctionBegin;
166   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
167   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
168   if (a == 1.0) {
169     for (i = start; i < end; i++) {
170       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
171       ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
172       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
173     }
174   } else {
175     PetscInt vs = 100;
176     /* realloc if needed, as this function may be used in parallel */
177     ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr);
178     for (i=start; i<end; i++) {
179       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
180       if (vs < ncols) {
181         vs   = PetscMin(2*ncols,n);
182         ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr);
183       }
184       for (j=0; j<ncols; j++) val[j] = a*vals[j];
185       ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
186       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
187     }
188     ierr = PetscFree(val);CHKERRQ(ierr);
189   }
190   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
191   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
192   PetscFunctionReturn(0);
193 }
194 
195 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
196 {
197   PetscInt          i,start,end,j,ncols,m,n;
198   PetscErrorCode    ierr;
199   const PetscInt    *row;
200   PetscScalar       *val;
201   const PetscScalar *vals;
202 
203   PetscFunctionBegin;
204   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
205   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
206   if (a == 1.0) {
207     for (i = start; i < end; i++) {
208       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
209       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
210       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
211 
212       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
213       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
214       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
215     }
216   } else {
217     PetscInt vs = 100;
218     /* realloc if needed, as this function may be used in parallel */
219     ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr);
220     for (i=start; i<end; i++) {
221       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
222       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
223       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
224 
225       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
226       if (vs < ncols) {
227         vs   = PetscMin(2*ncols,n);
228         ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr);
229       }
230       for (j=0; j<ncols; j++) val[j] = a*vals[j];
231       ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
232       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
233     }
234     ierr = PetscFree(val);CHKERRQ(ierr);
235   }
236   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
237   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
238   PetscFunctionReturn(0);
239 }
240 
241 /*@
242    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
243 
244    Neighbor-wise Collective on Mat
245 
246    Input Parameters:
247 +  Y - the matrices
248 -  a - the PetscScalar
249 
250    Level: intermediate
251 
252    Notes:
253     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
254    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
255    entry).
256 
257    To form Y = Y + diag(V) use MatDiagonalSet()
258 
259    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
260     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
261 
262 .keywords: matrix, add, shift
263 
264 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
265  @*/
266 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
267 {
268   PetscErrorCode ierr;
269 
270   PetscFunctionBegin;
271   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
272   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
273   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
274   MatCheckPreallocated(Y,1);
275 
276   if (Y->ops->shift) {
277     ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr);
278   } else {
279     ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
280   }
281 
282   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
283 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
284   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
285     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
286   }
287 #endif
288   PetscFunctionReturn(0);
289 }
290 
291 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
292 {
293   PetscErrorCode ierr;
294   PetscInt       i,start,end;
295   PetscScalar    *v;
296 
297   PetscFunctionBegin;
298   ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr);
299   ierr = VecGetArray(D,&v);CHKERRQ(ierr);
300   for (i=start; i<end; i++) {
301     ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr);
302   }
303   ierr = VecRestoreArray(D,&v);CHKERRQ(ierr);
304   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
305   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 /*@
310    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
311    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
312    INSERT_VALUES.
313 
314    Input Parameters:
315 +  Y - the input matrix
316 .  D - the diagonal matrix, represented as a vector
317 -  i - INSERT_VALUES or ADD_VALUES
318 
319    Neighbor-wise Collective on Mat and Vec
320 
321    Notes:
322     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
323    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
324    entry).
325 
326    Level: intermediate
327 
328 .keywords: matrix, add, shift, diagonal
329 
330 .seealso: MatShift(), MatScale(), MatDiagonalScale()
331 @*/
332 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
333 {
334   PetscErrorCode ierr;
335   PetscInt       matlocal,veclocal;
336 
337   PetscFunctionBegin;
338   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
339   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
340   ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr);
341   ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr);
342   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
343   if (Y->ops->diagonalset) {
344     ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr);
345   } else {
346     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
347   }
348   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /*@
353    MatAYPX - Computes Y = a*Y + X.
354 
355    Logically on Mat
356 
357    Input Parameters:
358 +  a - the PetscScalar multiplier
359 .  Y - the first matrix
360 .  X - the second matrix
361 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
362 
363    Level: intermediate
364 
365 .keywords: matrix, add
366 
367 .seealso: MatAXPY()
368  @*/
369 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
370 {
371   PetscScalar    one = 1.0;
372   PetscErrorCode ierr;
373   PetscInt       mX,mY,nX,nY;
374 
375   PetscFunctionBegin;
376   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
377   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
378   PetscValidLogicalCollectiveScalar(Y,a,2);
379   ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr);
380   ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr);
381   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
382   ierr = MatScale(Y,a);CHKERRQ(ierr);
383   ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr);
384   PetscFunctionReturn(0);
385 }
386 
387 /*@
388     MatComputeOperator - Computes the explicit matrix
389 
390     Collective on Mat
391 
392     Input Parameter:
393 +   inmat - the matrix
394 -   mattype - the matrix type for the explicit operator
395 
396     Output Parameter:
397 .   mat - the explict  operator
398 
399     Notes:
400     This computation is done by applying the operators to columns of the identity matrix.
401     This routine is costly in general, and is recommended for use only with relatively small systems.
402     Currently, this routine uses a dense matrix format if mattype == NULL.
403 
404     Level: advanced
405 
406 .keywords: Mat, compute, explicit, operator
407 @*/
408 PetscErrorCode  MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
409 {
410   PetscErrorCode ierr;
411 
412   PetscFunctionBegin;
413   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
414   PetscValidPointer(mat,3);
415   ierr = MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 /*@
420     MatComputeOperatorTranspose - Computes the explicit matrix representation of
421         a give matrix that can apply MatMultTranspose()
422 
423     Collective on Mat
424 
425     Input Parameter:
426 .   inmat - the matrix
427 
428     Output Parameter:
429 .   mat - the explict  operator transposed
430 
431     Notes:
432     This computation is done by applying the transpose of the operator to columns of the identity matrix.
433     This routine is costly in general, and is recommended for use only with relatively small systems.
434     Currently, this routine uses a dense matrix format if mattype == NULL.
435 
436     Level: advanced
437 
438 .keywords: Mat, compute, explicit, operator
439 @*/
440 PetscErrorCode  MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
441 {
442   Mat            A;
443   PetscErrorCode ierr;
444 
445   PetscFunctionBegin;
446   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
447   PetscValidPointer(mat,3);
448   ierr = MatCreateTranspose(inmat,&A);CHKERRQ(ierr);
449   ierr = MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);CHKERRQ(ierr);
450   ierr = MatDestroy(&A);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 /*@
455   MatChop - Set all values in the matrix less than the tolerance to zero
456 
457   Input Parameters:
458 + A   - The matrix
459 - tol - The zero tolerance
460 
461   Output Parameters:
462 . A - The chopped matrix
463 
464   Level: intermediate
465 
466 .seealso: MatCreate(), MatZeroEntries()
467  @*/
468 PetscErrorCode MatChop(Mat A, PetscReal tol)
469 {
470   PetscScalar    *newVals;
471   PetscInt       *newCols;
472   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr);
477   for (r = rStart; r < rEnd; ++r) {
478     PetscInt ncols;
479 
480     ierr   = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
481     colMax = PetscMax(colMax, ncols);CHKERRQ(ierr);
482     ierr   = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
483   }
484   numRows = rEnd - rStart;
485   ierr    = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
486   ierr    = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr);
487   for (r = rStart; r < rStart+maxRows; ++r) {
488     const PetscScalar *vals;
489     const PetscInt    *cols;
490     PetscInt           ncols, newcols, c;
491 
492     if (r < rEnd) {
493       ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
494       for (c = 0; c < ncols; ++c) {
495         newCols[c] = cols[c];
496         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
497       }
498       newcols = ncols;
499       ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
500       ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr);
501     }
502     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
503     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
504   }
505   ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508