xref: /petsc/src/mat/utils/axpy.c (revision 4e97f8ebb84dd841ee7d0d48926a4de5d5170225)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 /*@
5    MatAXPY - Computes Y = a*X + Y.
6 
7    Logically  Collective on Mat
8 
9    Input Parameters:
10 +  a - the scalar multiplier
11 .  X - the first matrix
12 .  Y - the second matrix
13 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
15 
16    Level: intermediate
17 
18 .keywords: matrix, add
19 
20 .seealso: MatAYPX()
21  @*/
22 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23 {
24   PetscErrorCode ierr;
25   PetscInt       M1,M2,N1,N2;
26   PetscInt       m1,m2,n1,n2;
27   PetscBool      sametype;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
31   PetscValidLogicalCollectiveScalar(Y,a,2);
32   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
33   PetscCheckSameComm(Y,1,X,3);
34   ierr = MatGetSize(X,&M1,&N1);CHKERRQ(ierr);
35   ierr = MatGetSize(Y,&M2,&N2);CHKERRQ(ierr);
36   ierr = MatGetLocalSize(X,&m1,&n1);CHKERRQ(ierr);
37   ierr = MatGetLocalSize(Y,&m2,&n2);CHKERRQ(ierr);
38   if (M1 != M2 || N1 != N2) SETERRQ4(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes %D x %D, %D x %D",M1,M2,N1,N2);
39   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes %D x %D, %D x %D",m1,m2,n1,n2);
40 
41   ierr = PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);CHKERRQ(ierr);
42   ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
43   if (Y->ops->axpy && sametype) {
44     ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr);
45   } else {
46     if (str != DIFFERENT_NONZERO_PATTERN) {
47       ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
48     } else {
49       Mat B;
50 
51       ierr = MatAXPY_Basic_Preallocate(Y,X,&B);CHKERRQ(ierr);
52       ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
53       ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
54     }
55   }
56   ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
57 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
58   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
59     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
60   }
61 #endif
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
66 {
67   PetscErrorCode ierr;
68   PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
69 
70   PetscFunctionBegin;
71   /* look for any available faster alternative to the general preallocator */
72   ierr = PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);CHKERRQ(ierr);
73   if (preall) {
74     ierr = (*preall)(Y,X,B);CHKERRQ(ierr);
75   } else { /* Use MatPrellocator, assumes same row-col distribution */
76     Mat      preallocator;
77     PetscInt r,rstart,rend;
78     PetscInt m,n,M,N;
79 
80     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
81     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
82     ierr = MatGetSize(Y,&M,&N);CHKERRQ(ierr);
83     ierr = MatGetLocalSize(Y,&m,&n);CHKERRQ(ierr);
84     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);CHKERRQ(ierr);
85     ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
86     ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
87     ierr = MatSetUp(preallocator);CHKERRQ(ierr);
88     ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
89     for (r = rstart; r < rend; ++r) {
90       PetscInt          ncols;
91       const PetscInt    *row;
92       const PetscScalar *vals;
93 
94       ierr = MatGetRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr);
95       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
96       ierr = MatRestoreRow(Y,r,&ncols,&row,&vals);CHKERRQ(ierr);
97       ierr = MatGetRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr);
98       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
99       ierr = MatRestoreRow(X,r,&ncols,&row,&vals);CHKERRQ(ierr);
100     }
101     ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
102     ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
103     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
104     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
105 
106     ierr = MatCreate(PetscObjectComm((PetscObject)Y),B);CHKERRQ(ierr);
107     ierr = MatSetType(*B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
108     ierr = MatSetSizes(*B,m,n,M,N);CHKERRQ(ierr);
109     ierr = MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);CHKERRQ(ierr);
110     ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
111   }
112   PetscFunctionReturn(0);
113 }
114 
115 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
116 {
117   PetscInt          i,start,end,j,ncols,m,n;
118   PetscErrorCode    ierr;
119   const PetscInt    *row;
120   PetscScalar       *val;
121   const PetscScalar *vals;
122 
123   PetscFunctionBegin;
124   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
125   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
126   ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
127   if (a == 1.0) {
128     for (i = start; i < end; i++) {
129       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
130       ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
131       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
132     }
133   } else {
134     PetscInt vs = 100;
135     /* realloc if needed, as this function may be used in parallel */
136     ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr);
137     for (i=start; i<end; i++) {
138       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
139       if (vs < ncols) {
140         vs   = PetscMin(2*ncols,n);
141         ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr);
142       }
143       for (j=0; j<ncols; j++) val[j] = a*vals[j];
144       ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
145       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
146     }
147     ierr = PetscFree(val);CHKERRQ(ierr);
148   }
149   ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
150   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
151   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
152   PetscFunctionReturn(0);
153 }
154 
155 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
156 {
157   PetscInt          i,start,end,j,ncols,m,n;
158   PetscErrorCode    ierr;
159   const PetscInt    *row;
160   PetscScalar       *val;
161   const PetscScalar *vals;
162 
163   PetscFunctionBegin;
164   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
165   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
166   ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
167   ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
168   if (a == 1.0) {
169     for (i = start; i < end; i++) {
170       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
171       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
172       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
173 
174       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
175       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
176       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
177     }
178   } else {
179     PetscInt vs = 100;
180     /* realloc if needed, as this function may be used in parallel */
181     ierr = PetscMalloc1(vs,&val);CHKERRQ(ierr);
182     for (i=start; i<end; i++) {
183       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
184       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
185       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
186 
187       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
188       if (vs < ncols) {
189         vs   = PetscMin(2*ncols,n);
190         ierr = PetscRealloc(vs*sizeof(*val),&val);CHKERRQ(ierr);
191       }
192       for (j=0; j<ncols; j++) val[j] = a*vals[j];
193       ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
194       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
195     }
196     ierr = PetscFree(val);CHKERRQ(ierr);
197   }
198   ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
199   ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
200   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
201   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
202   PetscFunctionReturn(0);
203 }
204 
205 /*@
206    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
207 
208    Neighbor-wise Collective on Mat
209 
210    Input Parameters:
211 +  Y - the matrices
212 -  a - the PetscScalar
213 
214    Level: intermediate
215 
216    Notes:
217     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
218    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
219    entry).
220 
221    To form Y = Y + diag(V) use MatDiagonalSet()
222 
223    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
224     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
225 
226 .keywords: matrix, add, shift
227 
228 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
229  @*/
230 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
231 {
232   PetscErrorCode ierr;
233 
234   PetscFunctionBegin;
235   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
236   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
237   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
238   MatCheckPreallocated(Y,1);
239 
240   if (Y->ops->shift) {
241     ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr);
242   } else {
243     ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
244   }
245 
246   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
247 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
248   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
249     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
250   }
251 #endif
252   PetscFunctionReturn(0);
253 }
254 
255 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
256 {
257   PetscErrorCode ierr;
258   PetscInt       i,start,end;
259   PetscScalar    *v;
260 
261   PetscFunctionBegin;
262   ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr);
263   ierr = VecGetArray(D,&v);CHKERRQ(ierr);
264   for (i=start; i<end; i++) {
265     ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr);
266   }
267   ierr = VecRestoreArray(D,&v);CHKERRQ(ierr);
268   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
269   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 
273 /*@
274    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
275    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
276    INSERT_VALUES.
277 
278    Input Parameters:
279 +  Y - the input matrix
280 .  D - the diagonal matrix, represented as a vector
281 -  i - INSERT_VALUES or ADD_VALUES
282 
283    Neighbor-wise Collective on Mat and Vec
284 
285    Notes:
286     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
287    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
288    entry).
289 
290    Level: intermediate
291 
292 .keywords: matrix, add, shift, diagonal
293 
294 .seealso: MatShift(), MatScale(), MatDiagonalScale()
295 @*/
296 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
297 {
298   PetscErrorCode ierr;
299   PetscInt       matlocal,veclocal;
300 
301   PetscFunctionBegin;
302   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
303   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
304   ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr);
305   ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr);
306   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
307   if (Y->ops->diagonalset) {
308     ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr);
309   } else {
310     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
311   }
312   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 /*@
317    MatAYPX - Computes Y = a*Y + X.
318 
319    Logically on Mat
320 
321    Input Parameters:
322 +  a - the PetscScalar multiplier
323 .  Y - the first matrix
324 .  X - the second matrix
325 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
326 
327    Level: intermediate
328 
329 .keywords: matrix, add
330 
331 .seealso: MatAXPY()
332  @*/
333 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
334 {
335   PetscScalar    one = 1.0;
336   PetscErrorCode ierr;
337   PetscInt       mX,mY,nX,nY;
338 
339   PetscFunctionBegin;
340   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
341   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
342   PetscValidLogicalCollectiveScalar(Y,a,2);
343   ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr);
344   ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr);
345   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
346 
347   ierr = MatScale(Y,a);CHKERRQ(ierr);
348   ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /*@
353     MatComputeExplicitOperator - Computes the explicit matrix
354 
355     Collective on Mat
356 
357     Input Parameter:
358 .   inmat - the matrix
359 
360     Output Parameter:
361 .   mat - the explict  operator
362 
363     Notes:
364     This computation is done by applying the operators to columns of the
365     identity matrix.
366 
367     Currently, this routine uses a dense matrix format when 1 processor
368     is used and a sparse format otherwise.  This routine is costly in general,
369     and is recommended for use only with relatively small systems.
370 
371     Level: advanced
372 
373 .keywords: Mat, compute, explicit, operator
374 @*/
375 PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
376 {
377   PetscErrorCode ierr;
378   MPI_Comm       comm;
379   PetscMPIInt    size;
380 
381   PetscFunctionBegin;
382   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
383   PetscValidPointer(mat,2);
384 
385   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
386   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
387   ierr = MatConvert_Shell(inmat,size == 1 ? MATSEQDENSE : MATAIJ,MAT_INITIAL_MATRIX,mat);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 /*@
392     MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
393         a give matrix that can apply MatMultTranspose()
394 
395     Collective on Mat
396 
397     Input Parameter:
398 .   inmat - the matrix
399 
400     Output Parameter:
401 .   mat - the explict  operator transposed
402 
403     Notes:
404     This computation is done by applying the transpose of the operator to columns of the
405     identity matrix.
406 
407     Currently, this routine uses a dense matrix format when 1 processor
408     is used and a sparse format otherwise.  This routine is costly in general,
409     and is recommended for use only with relatively small systems.
410 
411     Level: advanced
412 
413 .keywords: Mat, compute, explicit, operator
414 @*/
415 PetscErrorCode  MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
416 {
417   Vec            in,out;
418   PetscErrorCode ierr;
419   PetscInt       i,m,n,M,N,*rows,start,end;
420   MPI_Comm       comm;
421   PetscScalar    *array,zero = 0.0,one = 1.0;
422   PetscMPIInt    size;
423 
424   PetscFunctionBegin;
425   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
426   PetscValidPointer(mat,2);
427 
428   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
429   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
430 
431   ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr);
432   ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr);
433   ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr);
434   ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
435   ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr);
436   ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr);
437   for (i=0; i<m; i++) rows[i] = start + i;
438 
439   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
440   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
441   if (size == 1) {
442     ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr);
443     ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr);
444   } else {
445     ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
446     ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr);
447   }
448 
449   for (i=0; i<N; i++) {
450 
451     ierr = VecSet(in,zero);CHKERRQ(ierr);
452     ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr);
453     ierr = VecAssemblyBegin(in);CHKERRQ(ierr);
454     ierr = VecAssemblyEnd(in);CHKERRQ(ierr);
455 
456     ierr = MatMultTranspose(inmat,in,out);CHKERRQ(ierr);
457 
458     ierr = VecGetArray(out,&array);CHKERRQ(ierr);
459     ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
460     ierr = VecRestoreArray(out,&array);CHKERRQ(ierr);
461 
462   }
463   ierr = PetscFree(rows);CHKERRQ(ierr);
464   ierr = VecDestroy(&out);CHKERRQ(ierr);
465   ierr = VecDestroy(&in);CHKERRQ(ierr);
466   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 /*@
472   MatChop - Set all values in the matrix less than the tolerance to zero
473 
474   Input Parameters:
475 + A   - The matrix
476 - tol - The zero tolerance
477 
478   Output Parameters:
479 . A - The chopped matrix
480 
481   Level: intermediate
482 
483 .seealso: MatCreate(), MatZeroEntries()
484  @*/
485 PetscErrorCode MatChop(Mat A, PetscReal tol)
486 {
487   PetscScalar    *newVals;
488   PetscInt       *newCols;
489   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;
490   PetscErrorCode ierr;
491 
492   PetscFunctionBegin;
493   ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr);
494   for (r = rStart; r < rEnd; ++r) {
495     PetscInt ncols;
496 
497     ierr   = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
498     colMax = PetscMax(colMax, ncols);CHKERRQ(ierr);
499     ierr   = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
500   }
501   numRows = rEnd - rStart;
502   ierr    = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
503   ierr    = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr);
504   for (r = rStart; r < rStart+maxRows; ++r) {
505     const PetscScalar *vals;
506     const PetscInt    *cols;
507     PetscInt           ncols, newcols, c;
508 
509     if (r < rEnd) {
510       ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
511       for (c = 0; c < ncols; ++c) {
512         newCols[c] = cols[c];
513         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
514       }
515       newcols = ncols;
516       ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
517       ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr);
518     }
519     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
520     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
521   }
522   ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525