xref: /petsc/src/mat/utils/axpy.c (revision df4cd43f92eaa320656440c40edb1046daee8f75)
1 
2 #include <petsc/private/matimpl.h> /*I   "petscmat.h"  I*/
3 
4 static PetscErrorCode MatTransposeAXPY_Private(Mat Y, PetscScalar a, Mat X, MatStructure str, Mat T)
5 {
6   Mat A, F;
7   PetscErrorCode (*f)(Mat, Mat *);
8 
9   PetscFunctionBegin;
10   PetscCall(PetscObjectQueryFunction((PetscObject)T, "MatTransposeGetMat_C", &f));
11   if (f) {
12     PetscCall(MatTransposeGetMat(T, &A));
13     if (T == X) {
14       PetscCall(PetscInfo(NULL, "Explicitly transposing X of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
15       PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &F));
16       A = Y;
17     } else {
18       PetscCall(PetscInfo(NULL, "Transposing X because Y of type MATTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
19       PetscCall(MatTranspose(X, MAT_INITIAL_MATRIX, &F));
20     }
21   } else {
22     PetscCall(MatHermitianTransposeGetMat(T, &A));
23     if (T == X) {
24       PetscCall(PetscInfo(NULL, "Explicitly Hermitian transposing X of type MATHERITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
25       PetscCall(MatHermitianTranspose(A, MAT_INITIAL_MATRIX, &F));
26       A = Y;
27     } else {
28       PetscCall(PetscInfo(NULL, "Hermitian transposing X because Y of type MATHERITIANTRANSPOSEVIRTUAL to perform MatAXPY()\n"));
29       PetscCall(MatHermitianTranspose(X, MAT_INITIAL_MATRIX, &F));
30     }
31   }
32   PetscCall(MatAXPY(A, a, F, str));
33   PetscCall(MatDestroy(&F));
34   PetscFunctionReturn(PETSC_SUCCESS);
35 }
36 
37 /*@
38    MatAXPY - Computes Y = a*X + Y.
39 
40    Logically Collective
41 
42    Input Parameters:
43 +  a - the scalar multiplier
44 .  X - the first matrix
45 .  Y - the second matrix
46 -  str - either `SAME_NONZERO_PATTERN`, `DIFFERENT_NONZERO_PATTERN`, `UNKNOWN_NONZERO_PATTERN`, or `SUBSET_NONZERO_PATTERN` (nonzeros of `X` is a subset of `Y`'s)
47 
48    Level: intermediate
49 
50 .seealso: [](chapter_matrices), `Mat`, `MatAYPX()`
51  @*/
52 PetscErrorCode MatAXPY(Mat Y, PetscScalar a, Mat X, MatStructure str)
53 {
54   PetscInt  M1, M2, N1, N2;
55   PetscInt  m1, m2, n1, n2;
56   PetscBool sametype, transpose;
57 
58   PetscFunctionBegin;
59   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
60   PetscValidLogicalCollectiveScalar(Y, a, 2);
61   PetscValidHeaderSpecific(X, MAT_CLASSID, 3);
62   PetscCheckSameComm(Y, 1, X, 3);
63   PetscCall(MatGetSize(X, &M1, &N1));
64   PetscCall(MatGetSize(Y, &M2, &N2));
65   PetscCall(MatGetLocalSize(X, &m1, &n1));
66   PetscCall(MatGetLocalSize(Y, &m2, &n2));
67   PetscCheck(M1 == M2 && N1 == N2, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_SIZ, "Non conforming matrix add: global sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, M1, N1, M2, N2);
68   PetscCheck(m1 == m2 && n1 == n2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Non conforming matrix add: local sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT, m1, n1, m2, n2);
69   PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (Y)");
70   PetscCheck(X->assembled, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix (X)");
71   if (a == (PetscScalar)0.0) PetscFunctionReturn(PETSC_SUCCESS);
72   if (Y == X) {
73     PetscCall(MatScale(Y, 1.0 + a));
74     PetscFunctionReturn(PETSC_SUCCESS);
75   }
76   PetscCall(PetscObjectObjectTypeCompare((PetscObject)X, (PetscObject)Y, &sametype));
77   PetscCall(PetscLogEventBegin(MAT_AXPY, Y, 0, 0, 0));
78   if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
79     PetscUseTypeMethod(Y, axpy, a, X, str);
80   } else {
81     PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
82     if (transpose) {
83       PetscCall(MatTransposeAXPY_Private(Y, a, X, str, X));
84     } else {
85       PetscCall(PetscObjectTypeCompareAny((PetscObject)Y, &transpose, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
86       if (transpose) {
87         PetscCall(MatTransposeAXPY_Private(Y, a, X, str, Y));
88       } else {
89         PetscCall(MatAXPY_Basic(Y, a, X, str));
90       }
91     }
92   }
93   PetscCall(PetscLogEventEnd(MAT_AXPY, Y, 0, 0, 0));
94   PetscFunctionReturn(PETSC_SUCCESS);
95 }
96 
97 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
98 {
99   PetscErrorCode (*preall)(Mat, Mat, Mat *) = NULL;
100 
101   PetscFunctionBegin;
102   /* look for any available faster alternative to the general preallocator */
103   PetscCall(PetscObjectQueryFunction((PetscObject)Y, "MatAXPYGetPreallocation_C", &preall));
104   if (preall) {
105     PetscCall((*preall)(Y, X, B));
106   } else { /* Use MatPrellocator, assumes same row-col distribution */
107     Mat      preallocator;
108     PetscInt r, rstart, rend;
109     PetscInt m, n, M, N;
110 
111     PetscCall(MatGetRowUpperTriangular(Y));
112     PetscCall(MatGetRowUpperTriangular(X));
113     PetscCall(MatGetSize(Y, &M, &N));
114     PetscCall(MatGetLocalSize(Y, &m, &n));
115     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &preallocator));
116     PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
117     PetscCall(MatSetLayouts(preallocator, Y->rmap, Y->cmap));
118     PetscCall(MatSetUp(preallocator));
119     PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
120     for (r = rstart; r < rend; ++r) {
121       PetscInt           ncols;
122       const PetscInt    *row;
123       const PetscScalar *vals;
124 
125       PetscCall(MatGetRow(Y, r, &ncols, &row, &vals));
126       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
127       PetscCall(MatRestoreRow(Y, r, &ncols, &row, &vals));
128       PetscCall(MatGetRow(X, r, &ncols, &row, &vals));
129       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
130       PetscCall(MatRestoreRow(X, r, &ncols, &row, &vals));
131     }
132     PetscCall(MatSetOption(preallocator, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
133     PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
134     PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
135     PetscCall(MatRestoreRowUpperTriangular(Y));
136     PetscCall(MatRestoreRowUpperTriangular(X));
137 
138     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), B));
139     PetscCall(MatSetType(*B, ((PetscObject)Y)->type_name));
140     PetscCall(MatSetLayouts(*B, Y->rmap, Y->cmap));
141     PetscCall(MatPreallocatorPreallocate(preallocator, PETSC_FALSE, *B));
142     PetscCall(MatDestroy(&preallocator));
143   }
144   PetscFunctionReturn(PETSC_SUCCESS);
145 }
146 
147 PetscErrorCode MatAXPY_Basic(Mat Y, PetscScalar a, Mat X, MatStructure str)
148 {
149   PetscBool isshell, isdense, isnest;
150 
151   PetscFunctionBegin;
152   PetscCall(MatIsShell(Y, &isshell));
153   if (isshell) { /* MatShell has special support for AXPY */
154     PetscErrorCode (*f)(Mat, PetscScalar, Mat, MatStructure);
155 
156     PetscCall(MatGetOperation(Y, MATOP_AXPY, (void (**)(void)) & f));
157     if (f) {
158       PetscCall((*f)(Y, a, X, str));
159       PetscFunctionReturn(PETSC_SUCCESS);
160     }
161   }
162   /* no need to preallocate if Y is dense */
163   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)Y, &isdense, MATSEQDENSE, MATMPIDENSE, ""));
164   if (isdense) {
165     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATNEST, &isnest));
166     if (isnest) {
167       PetscCall(MatAXPY_Dense_Nest(Y, a, X));
168       PetscFunctionReturn(PETSC_SUCCESS);
169     }
170     if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
171   }
172   if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
173     PetscInt           i, start, end, j, ncols, m, n;
174     const PetscInt    *row;
175     PetscScalar       *val;
176     const PetscScalar *vals;
177 
178     PetscCall(MatGetSize(X, &m, &n));
179     PetscCall(MatGetOwnershipRange(X, &start, &end));
180     PetscCall(MatGetRowUpperTriangular(X));
181     if (a == 1.0) {
182       for (i = start; i < end; i++) {
183         PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
184         PetscCall(MatSetValues(Y, 1, &i, ncols, row, vals, ADD_VALUES));
185         PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
186       }
187     } else {
188       PetscInt vs = 100;
189       /* realloc if needed, as this function may be used in parallel */
190       PetscCall(PetscMalloc1(vs, &val));
191       for (i = start; i < end; i++) {
192         PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
193         if (vs < ncols) {
194           vs = PetscMin(2 * ncols, n);
195           PetscCall(PetscRealloc(vs * sizeof(*val), &val));
196         }
197         for (j = 0; j < ncols; j++) val[j] = a * vals[j];
198         PetscCall(MatSetValues(Y, 1, &i, ncols, row, val, ADD_VALUES));
199         PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
200       }
201       PetscCall(PetscFree(val));
202     }
203     PetscCall(MatRestoreRowUpperTriangular(X));
204     PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
205     PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
206   } else {
207     Mat B;
208 
209     PetscCall(MatAXPY_Basic_Preallocate(Y, X, &B));
210     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
211     PetscCall(MatHeaderMerge(Y, &B));
212   }
213   PetscFunctionReturn(PETSC_SUCCESS);
214 }
215 
216 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B, Mat Y, PetscScalar a, Mat X, MatStructure str)
217 {
218   PetscInt           i, start, end, j, ncols, m, n;
219   const PetscInt    *row;
220   PetscScalar       *val;
221   const PetscScalar *vals;
222 
223   PetscFunctionBegin;
224   PetscCall(MatGetSize(X, &m, &n));
225   PetscCall(MatGetOwnershipRange(X, &start, &end));
226   PetscCall(MatGetRowUpperTriangular(Y));
227   PetscCall(MatGetRowUpperTriangular(X));
228   if (a == 1.0) {
229     for (i = start; i < end; i++) {
230       PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
231       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
232       PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
233 
234       PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
235       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
236       PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
237     }
238   } else {
239     PetscInt vs = 100;
240     /* realloc if needed, as this function may be used in parallel */
241     PetscCall(PetscMalloc1(vs, &val));
242     for (i = start; i < end; i++) {
243       PetscCall(MatGetRow(Y, i, &ncols, &row, &vals));
244       PetscCall(MatSetValues(B, 1, &i, ncols, row, vals, ADD_VALUES));
245       PetscCall(MatRestoreRow(Y, i, &ncols, &row, &vals));
246 
247       PetscCall(MatGetRow(X, i, &ncols, &row, &vals));
248       if (vs < ncols) {
249         vs = PetscMin(2 * ncols, n);
250         PetscCall(PetscRealloc(vs * sizeof(*val), &val));
251       }
252       for (j = 0; j < ncols; j++) val[j] = a * vals[j];
253       PetscCall(MatSetValues(B, 1, &i, ncols, row, val, ADD_VALUES));
254       PetscCall(MatRestoreRow(X, i, &ncols, &row, &vals));
255     }
256     PetscCall(PetscFree(val));
257   }
258   PetscCall(MatRestoreRowUpperTriangular(Y));
259   PetscCall(MatRestoreRowUpperTriangular(X));
260   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
261   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 /*@
266    MatShift - Computes `Y =  Y + a I`, where `a` is a `PetscScalar`
267 
268    Neighbor-wise Collective
269 
270    Input Parameters:
271 +  Y - the matrix
272 -  a - the `PetscScalar`
273 
274    Level: intermediate
275 
276    Notes:
277     If `Y` is a rectangular matrix, the shift is done on the main diagonal of the matrix (https://en.wikipedia.org/wiki/Main_diagonal)
278 
279     If the matrix `Y` is missing some diagonal entries this routine can be very slow. To make it fast one should initially
280    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
281    entry). No operation is performed when a is zero.
282 
283    To form Y = Y + diag(V) use `MatDiagonalSet()`
284 
285 .seealso: [](chapter_matrices), `Mat`, `MatDiagonalSet()`, `MatScale()`, `MatDiagonalScale()`
286  @*/
287 PetscErrorCode MatShift(Mat Y, PetscScalar a)
288 {
289   PetscFunctionBegin;
290   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
291   PetscCheck(Y->assembled, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for unassembled matrix");
292   PetscCheck(!Y->factortype, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
293   MatCheckPreallocated(Y, 1);
294   if (a == 0.0) PetscFunctionReturn(PETSC_SUCCESS);
295 
296   if (Y->ops->shift) PetscUseTypeMethod(Y, shift, a);
297   else PetscCall(MatShift_Basic(Y, a));
298 
299   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
300   PetscFunctionReturn(PETSC_SUCCESS);
301 }
302 
303 PetscErrorCode MatDiagonalSet_Default(Mat Y, Vec D, InsertMode is)
304 {
305   PetscInt           i, start, end;
306   const PetscScalar *v;
307 
308   PetscFunctionBegin;
309   PetscCall(MatGetOwnershipRange(Y, &start, &end));
310   PetscCall(VecGetArrayRead(D, &v));
311   for (i = start; i < end; i++) PetscCall(MatSetValues(Y, 1, &i, 1, &i, v + i - start, is));
312   PetscCall(VecRestoreArrayRead(D, &v));
313   PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
314   PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
315   PetscFunctionReturn(PETSC_SUCCESS);
316 }
317 
318 /*@
319    MatDiagonalSet - Computes `Y` = `Y` + `D`, where `D` is a diagonal matrix
320    that is represented as a vector. Or Y[i,i] = D[i] if `InsertMode` is
321    `INSERT_VALUES`.
322 
323    Neighbor-wise Collective
324 
325    Input Parameters:
326 +  Y - the input matrix
327 .  D - the diagonal matrix, represented as a vector
328 -  i - `INSERT_VALUES` or `ADD_VALUES`
329 
330    Level: intermediate
331 
332    Note:
333     If the matrix `Y` is missing some diagonal entries this routine can be very slow. To make it fast one should initially
334    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
335    entry).
336 
337 .seealso: [](chapter_matrices), `Mat`, `MatShift()`, `MatScale()`, `MatDiagonalScale()`
338 @*/
339 PetscErrorCode MatDiagonalSet(Mat Y, Vec D, InsertMode is)
340 {
341   PetscInt matlocal, veclocal;
342 
343   PetscFunctionBegin;
344   PetscValidHeaderSpecific(Y, MAT_CLASSID, 1);
345   PetscValidHeaderSpecific(D, VEC_CLASSID, 2);
346   PetscCall(MatGetLocalSize(Y, &matlocal, NULL));
347   PetscCall(VecGetLocalSize(D, &veclocal));
348   PetscCheck(matlocal == veclocal, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number local rows of matrix %" PetscInt_FMT " does not match that of vector for diagonal %" PetscInt_FMT, matlocal, veclocal);
349   if (Y->ops->diagonalset) PetscUseTypeMethod(Y, diagonalset, D, is);
350   else PetscCall(MatDiagonalSet_Default(Y, D, is));
351   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
352   PetscFunctionReturn(PETSC_SUCCESS);
353 }
354 
355 /*@
356    MatAYPX - Computes Y = a*Y + X.
357 
358    Logically Collective
359 
360    Input Parameters:
361 +  a - the `PetscScalar` multiplier
362 .  Y - the first matrix
363 .  X - the second matrix
364 -  str - either `SAME_NONZERO_PATTERN`, `DIFFERENT_NONZERO_PATTERN`, `UNKNOWN_NONZERO_PATTERN`, or `SUBSET_NONZERO_PATTERN` (nonzeros of X is a subset of Y's)
365 
366    Level: intermediate
367 
368 .seealso: [](chapter_matrices), `Mat`, `MatAXPY()`
369  @*/
370 PetscErrorCode MatAYPX(Mat Y, PetscScalar a, Mat X, MatStructure str)
371 {
372   PetscFunctionBegin;
373   PetscCall(MatScale(Y, a));
374   PetscCall(MatAXPY(Y, 1.0, X, str));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*@
379     MatComputeOperator - Computes the explicit matrix
380 
381     Collective
382 
383     Input Parameters:
384 +   inmat - the matrix
385 -   mattype - the matrix type for the explicit operator
386 
387     Output Parameter:
388 .   mat - the explicit  operator
389 
390     Level: advanced
391 
392     Note:
393     This computation is done by applying the operators to columns of the identity matrix.
394     This routine is costly in general, and is recommended for use only with relatively small systems.
395     Currently, this routine uses a dense matrix format if `mattype` == `NULL`.
396 
397 .seealso: [](chapter_matrices), `Mat`, `MatConvert()`, `MatMult()`, `MatComputeOperatorTranspose()`
398 @*/
399 PetscErrorCode MatComputeOperator(Mat inmat, MatType mattype, Mat *mat)
400 {
401   PetscFunctionBegin;
402   PetscValidHeaderSpecific(inmat, MAT_CLASSID, 1);
403   PetscValidPointer(mat, 3);
404   PetscCall(MatConvert_Shell(inmat, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
405   PetscFunctionReturn(PETSC_SUCCESS);
406 }
407 
408 /*@
409     MatComputeOperatorTranspose - Computes the explicit matrix representation of
410         a give matrix that can apply `MatMultTranspose()`
411 
412     Collective
413 
414     Input Parameters:
415 +   inmat - the matrix
416 -   mattype - the matrix type for the explicit operator
417 
418     Output Parameter:
419 .   mat - the explicit  operator transposed
420 
421     Level: advanced
422 
423     Note:
424     This computation is done by applying the transpose of the operator to columns of the identity matrix.
425     This routine is costly in general, and is recommended for use only with relatively small systems.
426     Currently, this routine uses a dense matrix format if `mattype` == `NULL`.
427 
428 .seealso: [](chapter_matrices), `Mat`, `MatConvert()`, `MatMult()`, `MatComputeOperator()`
429 @*/
430 PetscErrorCode MatComputeOperatorTranspose(Mat inmat, MatType mattype, Mat *mat)
431 {
432   Mat A;
433 
434   PetscFunctionBegin;
435   PetscValidHeaderSpecific(inmat, MAT_CLASSID, 1);
436   PetscValidPointer(mat, 3);
437   PetscCall(MatCreateTranspose(inmat, &A));
438   PetscCall(MatConvert_Shell(A, mattype ? mattype : MATDENSE, MAT_INITIAL_MATRIX, mat));
439   PetscCall(MatDestroy(&A));
440   PetscFunctionReturn(PETSC_SUCCESS);
441 }
442 
443 /*@
444   MatChop - Set all values in the matrix less than the tolerance to zero
445 
446   Input Parameters:
447 + A   - The matrix
448 - tol - The zero tolerance
449 
450   Output Parameters:
451 . A - The chopped matrix
452 
453   Level: intermediate
454 
455 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatZeroEntries()`
456  @*/
457 PetscErrorCode MatChop(Mat A, PetscReal tol)
458 {
459   Mat          a;
460   PetscScalar *newVals;
461   PetscInt    *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
462   PetscBool    flg;
463 
464   PetscFunctionBegin;
465   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, ""));
466   if (flg) {
467     PetscCall(MatDenseGetLocalMatrix(A, &a));
468     PetscCall(MatDenseGetLDA(a, &r));
469     PetscCall(MatGetSize(a, &rStart, &rEnd));
470     PetscCall(MatDenseGetArray(a, &newVals));
471     for (; colMax < rEnd; ++colMax) {
472       for (maxRows = 0; maxRows < rStart; ++maxRows) newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
473     }
474     PetscCall(MatDenseRestoreArray(a, &newVals));
475   } else {
476     PetscCall(MatGetOwnershipRange(A, &rStart, &rEnd));
477     PetscCall(MatGetRowUpperTriangular(A));
478     for (r = rStart; r < rEnd; ++r) {
479       PetscInt ncols;
480 
481       PetscCall(MatGetRow(A, r, &ncols, NULL, NULL));
482       colMax = PetscMax(colMax, ncols);
483       PetscCall(MatRestoreRow(A, r, &ncols, NULL, NULL));
484     }
485     numRows = rEnd - rStart;
486     PetscCall(MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A)));
487     PetscCall(PetscMalloc2(colMax, &newCols, colMax, &newVals));
488     PetscCall(MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg)); /* cache user-defined value */
489     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
490     /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd()             */
491     /* that are potentially called many times depending on the distribution of A */
492     for (r = rStart; r < rStart + maxRows; ++r) {
493       const PetscScalar *vals;
494       const PetscInt    *cols;
495       PetscInt           ncols, newcols, c;
496 
497       if (r < rEnd) {
498         PetscCall(MatGetRow(A, r, &ncols, &cols, &vals));
499         for (c = 0; c < ncols; ++c) {
500           newCols[c] = cols[c];
501           newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
502         }
503         newcols = ncols;
504         PetscCall(MatRestoreRow(A, r, &ncols, &cols, &vals));
505         PetscCall(MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES));
506       }
507       PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
508       PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
509     }
510     PetscCall(MatRestoreRowUpperTriangular(A));
511     PetscCall(PetscFree2(newCols, newVals));
512     PetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg)); /* reset option to its user-defined value */
513   }
514   PetscFunctionReturn(PETSC_SUCCESS);
515 }
516