xref: /petsc/src/mat/utils/axpy.c (revision 95cbbfd359aa2cc7e96afc77b64e4c0540dc7b23)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 /*@
5    MatAXPY - Computes Y = a*X + Y.
6 
7    Logically  Collective on Mat
8 
9    Input Parameters:
10 +  a - the scalar multiplier
11 .  X - the first matrix
12 .  Y - the second matrix
13 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
15 
16    Level: intermediate
17 
18 .keywords: matrix, add
19 
20 .seealso: MatAYPX()
21  @*/
22 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23 {
24   PetscErrorCode ierr;
25   PetscInt       M1,M2,N1,N2;
26   PetscInt       m1,m2,n1,n2;
27   PetscBool      sametype;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
31   PetscValidLogicalCollectiveScalar(Y,a,2);
32   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
33   PetscCheckSameComm(Y,1,X,3);
34   ierr = MatGetSize(X,&M1,&N1);CHKERRQ(ierr);
35   ierr = MatGetSize(Y,&M2,&N2);CHKERRQ(ierr);
36   ierr = MatGetLocalSize(X,&m1,&n1);CHKERRQ(ierr);
37   ierr = MatGetLocalSize(Y,&m2,&n2);CHKERRQ(ierr);
38   if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
39   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
40 
41   ierr = PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);CHKERRQ(ierr);
42   ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
43   if (Y->ops->axpy && sametype) {
44     ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr);
45   } else {
46     if (str != DIFFERENT_NONZERO_PATTERN) {
47       ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
48     } else {
49       Mat B;
50 
51       ierr = MatAXPY_Basic_Preallocate(Y,X,&B);CHKERRQ(ierr);
52       ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
53       ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
54     }
55   }
56   ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
57 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
58   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
59     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
60   }
61 #endif
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
66 {
67   PetscErrorCode ierr;
68   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
69 
70   PetscFunctionBegin;
71   /* look for any available faster alternative to the general preallocator */
72   ierr = PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);CHKERRQ(ierr);
73   if (preall) {
74     ierr = (*preall)(Y,X,B);CHKERRQ(ierr);
75   } else { /* Use MatPrellocator, assumes same row-col distribution */
76     Mat      preallocator;
77     PetscInt r,rstart,rend;
78     PetscInt m,n,M,N;
79 
80     ierr = MatGetSize(Y,&M,&N);CHKERRQ(ierr);
81     ierr = MatGetLocalSize(Y,&m,&n);CHKERRQ(ierr);
82     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);CHKERRQ(ierr);
83     ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
84     ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
85     ierr = MatSetUp(preallocator);CHKERRQ(ierr);
86     ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
87     for (r = rstart; r < rend; ++r) {
88       PetscInt          ncols;
89       const PetscInt    *row;
90       const PetscScalar *vals;
91 
92       ierr = MatGetRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr);
93       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
94       ierr = MatRestoreRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr);
95       ierr = MatGetRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr);
96       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
97       ierr = MatRestoreRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr);
98     }
99     ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
100     ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
101 
102     ierr = MatCreate(PetscObjectComm((PetscObject)Y),B);CHKERRQ(ierr);
103     ierr = MatSetType(*B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
104     ierr = MatSetSizes(*B,m,n,M,N);CHKERRQ(ierr);
105     ierr = MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);CHKERRQ(ierr);
106     ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
107   }
108   PetscFunctionReturn(0);
109 }
110 
111 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
112 {
113   PetscInt          i,start,end,j,ncols,m,n;
114   PetscErrorCode    ierr;
115   const PetscInt    *row;
116   PetscScalar       *val;
117   const PetscScalar *vals;
118 
119   PetscFunctionBegin;
120   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
121   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
122   if (a == 1.0) {
123     for (i = start; i < end; i++) {
124       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
125       ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
126       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
127     }
128   } else {
129     PetscInt vs = 100;
130     /* realloc if needed, as this function may be used in parallel */
131     ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr);
132     for (i=start; i<end; i++) {
133       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
134       if (vs < ncols) {
135         vs   = PetscMin(2*ncols,n);
136         ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr);
137       }
138       for (j=0; j<ncols; j++) val[j] = a*vals[j];
139       ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
140       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
141     }
142     ierr = PetscFree(val);CHKERRQ(ierr);
143   }
144   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
145   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
150 {
151   PetscInt          i,start,end,j,ncols,m,n;
152   PetscErrorCode    ierr;
153   const PetscInt    *row;
154   PetscScalar       *val;
155   const PetscScalar *vals;
156 
157   PetscFunctionBegin;
158   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
159   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
160   if (a == 1.0) {
161     for (i = start; i < end; i++) {
162       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
163       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
164       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
165 
166       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
167       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
168       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
169     }
170   } else {
171     PetscInt vs = 100;
172     /* realloc if needed, as this function may be used in parallel */
173     ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr);
174     for (i=start; i<end; i++) {
175       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
176       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
177       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
178 
179       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
180       if (vs < ncols) {
181         vs   = PetscMin(2*ncols,n);
182         ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr);
183       }
184       for (j=0; j<ncols; j++) val[j] = a*vals[j];
185       ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
186       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
187     }
188     ierr = PetscFree(val);CHKERRQ(ierr);
189   }
190   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
191   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
192   PetscFunctionReturn(0);
193 }
194 
195 /*@
196    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
197 
198    Neighbor-wise Collective on Mat
199 
200    Input Parameters:
201 +  Y - the matrices
202 -  a - the PetscScalar
203 
204    Level: intermediate
205 
206    Notes:
207     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
208    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
209    entry).
210 
211    To form Y = Y + diag(V) use MatDiagonalSet()
212 
213    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
214     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
215 
216 .keywords: matrix, add, shift
217 
218 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
219  @*/
220 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
221 {
222   PetscErrorCode ierr;
223 
224   PetscFunctionBegin;
225   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
226   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
227   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
228   MatCheckPreallocated(Y,1);
229 
230   if (Y->ops->shift) {
231     ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr);
232   } else {
233     ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
234   }
235 
236   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
237 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
238   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
239     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
240   }
241 #endif
242   PetscFunctionReturn(0);
243 }
244 
245 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
246 {
247   PetscErrorCode ierr;
248   PetscInt       i,start,end;
249   PetscScalar    *v;
250 
251   PetscFunctionBegin;
252   ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr);
253   ierr = VecGetArray(D,&v);CHKERRQ(ierr);
254   for (i=start; i<end; i++) {
255     ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr);
256   }
257   ierr = VecRestoreArray(D,&v);CHKERRQ(ierr);
258   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
259   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 
263 /*@
264    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
265    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
266    INSERT_VALUES.
267 
268    Input Parameters:
269 +  Y - the input matrix
270 .  D - the diagonal matrix, represented as a vector
271 -  i - INSERT_VALUES or ADD_VALUES
272 
273    Neighbor-wise Collective on Mat and Vec
274 
275    Notes:
276     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
277    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
278    entry).
279 
280    Level: intermediate
281 
282 .keywords: matrix, add, shift, diagonal
283 
284 .seealso: MatShift(), MatScale(), MatDiagonalScale()
285 @*/
286 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
287 {
288   PetscErrorCode ierr;
289   PetscInt       matlocal,veclocal;
290 
291   PetscFunctionBegin;
292   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
293   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
294   ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr);
295   ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr);
296   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
297   if (Y->ops->diagonalset) {
298     ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr);
299   } else {
300     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
301   }
302   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 /*@
307    MatAYPX - Computes Y = a*Y + X.
308 
309    Logically on Mat
310 
311    Input Parameters:
312 +  a - the PetscScalar multiplier
313 .  Y - the first matrix
314 .  X - the second matrix
315 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
316 
317    Level: intermediate
318 
319 .keywords: matrix, add
320 
321 .seealso: MatAXPY()
322  @*/
323 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
324 {
325   PetscScalar    one = 1.0;
326   PetscErrorCode ierr;
327   PetscInt       mX,mY,nX,nY;
328 
329   PetscFunctionBegin;
330   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
331   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
332   PetscValidLogicalCollectiveScalar(Y,a,2);
333   ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr);
334   ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr);
335   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
336 
337   ierr = MatScale(Y,a);CHKERRQ(ierr);
338   ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 
342 /*@
343     MatComputeExplicitOperator - Computes the explicit matrix
344 
345     Collective on Mat
346 
347     Input Parameter:
348 .   inmat - the matrix
349 
350     Output Parameter:
351 .   mat - the explict  operator
352 
353     Notes:
354     This computation is done by applying the operators to columns of the
355     identity matrix.
356 
357     Currently, this routine uses a dense matrix format when 1 processor
358     is used and a sparse format otherwise.  This routine is costly in general,
359     and is recommended for use only with relatively small systems.
360 
361     Level: advanced
362 
363 .keywords: Mat, compute, explicit, operator
364 @*/
365 PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
366 {
367   PetscErrorCode ierr;
368   MPI_Comm       comm;
369   PetscMPIInt    size;
370 
371   PetscFunctionBegin;
372   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
373   PetscValidPointer(mat,2);
374 
375   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
376   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
377   ierr = MatConvert_Shell(inmat,size == 1 ? MATSEQDENSE : MATAIJ,MAT_INITIAL_MATRIX,mat);CHKERRQ(ierr);
378   PetscFunctionReturn(0);
379 }
380 
381 /*@
382     MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
383         a give matrix that can apply MatMultTranspose()
384 
385     Collective on Mat
386 
387     Input Parameter:
388 .   inmat - the matrix
389 
390     Output Parameter:
391 .   mat - the explict  operator transposed
392 
393     Notes:
394     This computation is done by applying the transpose of the operator to columns of the
395     identity matrix.
396 
397     Currently, this routine uses a dense matrix format when 1 processor
398     is used and a sparse format otherwise.  This routine is costly in general,
399     and is recommended for use only with relatively small systems.
400 
401     Level: advanced
402 
403 .keywords: Mat, compute, explicit, operator
404 @*/
405 PetscErrorCode  MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
406 {
407   Vec            in,out;
408   PetscErrorCode ierr;
409   PetscInt       i,m,n,M,N,*rows,start,end;
410   MPI_Comm       comm;
411   PetscScalar    *array,zero = 0.0,one = 1.0;
412   PetscMPIInt    size;
413 
414   PetscFunctionBegin;
415   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
416   PetscValidPointer(mat,2);
417 
418   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
419   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
420 
421   ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr);
422   ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr);
423   ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr);
424   ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
425   ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr);
426   ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr);
427   for (i=0; i<m; i++) rows[i] = start + i;
428 
429   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
430   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
431   if (size == 1) {
432     ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr);
433     ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr);
434   } else {
435     ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
436     ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr);
437   }
438 
439   for (i=0; i<N; i++) {
440 
441     ierr = VecSet(in,zero);CHKERRQ(ierr);
442     ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr);
443     ierr = VecAssemblyBegin(in);CHKERRQ(ierr);
444     ierr = VecAssemblyEnd(in);CHKERRQ(ierr);
445 
446     ierr = MatMultTranspose(inmat,in,out);CHKERRQ(ierr);
447 
448     ierr = VecGetArray(out,&array);CHKERRQ(ierr);
449     ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
450     ierr = VecRestoreArray(out,&array);CHKERRQ(ierr);
451 
452   }
453   ierr = PetscFree(rows);CHKERRQ(ierr);
454   ierr = VecDestroy(&out);CHKERRQ(ierr);
455   ierr = VecDestroy(&in);CHKERRQ(ierr);
456   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
457   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 /*@
462   MatChop - Set all values in the matrix less than the tolerance to zero
463 
464   Input Parameters:
465 + A   - The matrix
466 - tol - The zero tolerance
467 
468   Output Parameters:
469 . A - The chopped matrix
470 
471   Level: intermediate
472 
473 .seealso: MatCreate(), MatZeroEntries()
474  @*/
475 PetscErrorCode MatChop(Mat A, PetscReal tol)
476 {
477   PetscScalar    *newVals;
478   PetscInt       *newCols;
479   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;
480   PetscErrorCode ierr;
481 
482   PetscFunctionBegin;
483   ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr);
484   for (r = rStart; r < rEnd; ++r) {
485     PetscInt ncols;
486 
487     ierr   = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
488     colMax = PetscMax(colMax, ncols);CHKERRQ(ierr);
489     ierr   = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
490   }
491   numRows = rEnd - rStart;
492   ierr    = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
493   ierr    = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr);
494   for (r = rStart; r < rStart+maxRows; ++r) {
495     const PetscScalar *vals;
496     const PetscInt    *cols;
497     PetscInt           ncols, newcols, c;
498 
499     if (r < rEnd) {
500       ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
501       for (c = 0; c < ncols; ++c) {
502         newCols[c] = cols[c];
503         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
504       }
505       newcols = ncols;
506       ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
507       ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr);
508     }
509     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
510     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
511   }
512   ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr);
513   PetscFunctionReturn(0);
514 }
515