xref: /petsc/src/mat/utils/axpy.c (revision 70baa948e42e6bb1660071283af10f413c9269b6)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 #undef __FUNCT__
5 #define __FUNCT__ "MatAXPY"
6 /*@
7    MatAXPY - Computes Y = a*X + Y.
8 
9    Logically  Collective on Mat
10 
11    Input Parameters:
12 +  a - the scalar multiplier
13 .  X - the first matrix
14 .  Y - the second matrix
15 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
16          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
17 
18    Level: intermediate
19 
20 .keywords: matrix, add
21 
22 .seealso: MatAYPX()
23  @*/
24 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
25 {
26   PetscErrorCode ierr;
27   PetscInt       m1,m2,n1,n2;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
31   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
32   PetscValidLogicalCollectiveScalar(Y,a,2);
33   ierr = MatGetSize(X,&m1,&n1);CHKERRQ(ierr);
34   ierr = MatGetSize(Y,&m2,&n2);CHKERRQ(ierr);
35   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);
36 
37   ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
38   if (Y->ops->axpy) {
39     ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr);
40   } else {
41     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
42   }
43   ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
44 #if defined(PETSC_HAVE_CUSP)
45   if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
46     Y->valid_GPU_matrix = PETSC_CUSP_CPU;
47   }
48 #elif defined(PETSC_HAVE_VIENNACL)
49   if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
50     Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
51   }
52 #elif defined(PETSC_HAVE_VECCUDA)
53   if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
54     Y->valid_GPU_matrix = PETSC_CUDA_CPU;
55   }
56 #endif
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatAXPY_Basic"
62 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
63 {
64   PetscInt          i,start,end,j,ncols,m,n;
65   PetscErrorCode    ierr;
66   const PetscInt    *row;
67   PetscScalar       *val;
68   const PetscScalar *vals;
69 
70   PetscFunctionBegin;
71   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
72   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
73   if (a == 1.0) {
74     for (i = start; i < end; i++) {
75       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
76       ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
77       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
78     }
79   } else {
80     ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr);
81     for (i=start; i<end; i++) {
82       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
83       for (j=0; j<ncols; j++) {
84         val[j] = a*vals[j];
85       }
86       ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
87       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
88     }
89     ierr = PetscFree(val);CHKERRQ(ierr);
90   }
91   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 
96 #undef __FUNCT__
97 #define __FUNCT__ "MatAXPY_BasicWithPreallocation"
98 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
99 {
100   PetscInt          i,start,end,j,ncols,m,n;
101   PetscErrorCode    ierr;
102   const PetscInt    *row;
103   PetscScalar       *val;
104   const PetscScalar *vals;
105 
106   PetscFunctionBegin;
107   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
108   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
109   if (a == 1.0) {
110     for (i = start; i < end; i++) {
111       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
112       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
113       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
114 
115       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
116       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
117       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
118     }
119   } else {
120     ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr);
121     for (i=start; i<end; i++) {
122       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
123       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
124       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
125 
126       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
127       for (j=0; j<ncols; j++) {
128         val[j] = a*vals[j];
129       }
130       ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
131       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
132     }
133     ierr = PetscFree(val);CHKERRQ(ierr);
134   }
135   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
136   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "MatShift"
142 /*@
143    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
144 
145    Neighbor-wise Collective on Mat
146 
147    Input Parameters:
148 +  Y - the matrices
149 -  a - the PetscScalar
150 
151    Level: intermediate
152 
153    Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
154    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
155    entry).
156 
157    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
158     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
159 
160 .keywords: matrix, add, shift
161 
162 .seealso: MatDiagonalSet()
163  @*/
164 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
165 {
166   PetscErrorCode ierr;
167 
168   PetscFunctionBegin;
169   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
170   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
171   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
172   MatCheckPreallocated(Y,1);
173 
174   if (Y->ops->shift) {
175     ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr);
176   } else {
177     ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
178   }
179 
180 #if defined(PETSC_HAVE_CUSP)
181   if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
182     Y->valid_GPU_matrix = PETSC_CUSP_CPU;
183   }
184 #elif defined(PETSC_HAVE_VIENNACL)
185   if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
186     Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
187   }
188 #elif defined(PETSC_HAVE_VECCUDA)
189   if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
190     Y->valid_GPU_matrix = PETSC_CUDA_CPU;
191   }
192 #endif
193   PetscFunctionReturn(0);
194 }
195 
196 #undef __FUNCT__
197 #define __FUNCT__ "MatDiagonalSet_Default"
198 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
199 {
200   PetscErrorCode ierr;
201   PetscInt       i,start,end;
202   PetscScalar    *v;
203 
204   PetscFunctionBegin;
205   ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr);
206   ierr = VecGetArray(D,&v);CHKERRQ(ierr);
207   for (i=start; i<end; i++) {
208     ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr);
209   }
210   ierr = VecRestoreArray(D,&v);CHKERRQ(ierr);
211   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
212   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
213   PetscFunctionReturn(0);
214 }
215 
216 #undef __FUNCT__
217 #define __FUNCT__ "MatDiagonalSet"
218 /*@
219    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
220    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
221    INSERT_VALUES.
222 
223    Input Parameters:
224 +  Y - the input matrix
225 .  D - the diagonal matrix, represented as a vector
226 -  i - INSERT_VALUES or ADD_VALUES
227 
228    Neighbor-wise Collective on Mat and Vec
229 
230    Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
231    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
232    entry).
233 
234    Level: intermediate
235 
236 .keywords: matrix, add, shift, diagonal
237 
238 .seealso: MatShift()
239 @*/
240 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
241 {
242   PetscErrorCode ierr;
243   PetscInt       matlocal,veclocal;
244 
245   PetscFunctionBegin;
246   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
247   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
248   ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr);
249   ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr);
250   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
251   if (Y->ops->diagonalset) {
252     ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr);
253   } else {
254     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatAYPX"
261 /*@
262    MatAYPX - Computes Y = a*Y + X.
263 
264    Logically on Mat
265 
266    Input Parameters:
267 +  a - the PetscScalar multiplier
268 .  Y - the first matrix
269 .  X - the second matrix
270 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
271 
272    Level: intermediate
273 
274 .keywords: matrix, add
275 
276 .seealso: MatAXPY()
277  @*/
278 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
279 {
280   PetscScalar    one = 1.0;
281   PetscErrorCode ierr;
282   PetscInt       mX,mY,nX,nY;
283 
284   PetscFunctionBegin;
285   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
286   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
287   PetscValidLogicalCollectiveScalar(Y,a,2);
288   ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr);
289   ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr);
290   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
291 
292   ierr = MatScale(Y,a);CHKERRQ(ierr);
293   ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr);
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "MatComputeExplicitOperator"
299 /*@
300     MatComputeExplicitOperator - Computes the explicit matrix
301 
302     Collective on Mat
303 
304     Input Parameter:
305 .   inmat - the matrix
306 
307     Output Parameter:
308 .   mat - the explict preconditioned operator
309 
310     Notes:
311     This computation is done by applying the operators to columns of the
312     identity matrix.
313 
314     Currently, this routine uses a dense matrix format when 1 processor
315     is used and a sparse format otherwise.  This routine is costly in general,
316     and is recommended for use only with relatively small systems.
317 
318     Level: advanced
319 
320 .keywords: Mat, compute, explicit, operator
321 @*/
322 PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
323 {
324   Vec            in,out;
325   PetscErrorCode ierr;
326   PetscInt       i,m,n,M,N,*rows,start,end;
327   MPI_Comm       comm;
328   PetscScalar    *array,zero = 0.0,one = 1.0;
329   PetscMPIInt    size;
330 
331   PetscFunctionBegin;
332   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
333   PetscValidPointer(mat,2);
334 
335   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
336   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
337 
338   ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr);
339   ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr);
340   ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr);
341   ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
342   ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr);
343   ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr);
344   for (i=0; i<m; i++) rows[i] = start + i;
345 
346   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
347   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
348   if (size == 1) {
349     ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr);
350     ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr);
351   } else {
352     ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr);
354   }
355 
356   for (i=0; i<N; i++) {
357 
358     ierr = VecSet(in,zero);CHKERRQ(ierr);
359     ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr);
360     ierr = VecAssemblyBegin(in);CHKERRQ(ierr);
361     ierr = VecAssemblyEnd(in);CHKERRQ(ierr);
362 
363     ierr = MatMult(inmat,in,out);CHKERRQ(ierr);
364 
365     ierr = VecGetArray(out,&array);CHKERRQ(ierr);
366     ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
367     ierr = VecRestoreArray(out,&array);CHKERRQ(ierr);
368 
369   }
370   ierr = PetscFree(rows);CHKERRQ(ierr);
371   ierr = VecDestroy(&out);CHKERRQ(ierr);
372   ierr = VecDestroy(&in);CHKERRQ(ierr);
373   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
374   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
375   PetscFunctionReturn(0);
376 }
377 
378 #undef __FUNCT__
379 #define __FUNCT__ "MatChop"
380 /*@
381   MatChop - Set all values in the matrix less than the tolerance to zero
382 
383   Input Parameters:
384 + A   - The matrix
385 - tol - The zero tolerance
386 
387   Output Parameters:
388 . A - The chopped matrix
389 
390   Level: intermediate
391 
392 .seealso: MatCreate(), MatZeroEntries()
393  @*/
394 PetscErrorCode MatChop(Mat A, PetscReal tol)
395 {
396   PetscScalar    *newVals;
397   PetscInt       *newCols;
398   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;
399   PetscErrorCode ierr;
400 
401   PetscFunctionBegin;
402   ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr);
403   for (r = rStart; r < rEnd; ++r) {
404     PetscInt ncols;
405 
406     ierr   = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
407     colMax = PetscMax(colMax, ncols);CHKERRQ(ierr);
408     ierr   = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
409   }
410   numRows = rEnd - rStart;
411   ierr    = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
412   ierr    = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr);
413   for (r = rStart; r < rStart+maxRows; ++r) {
414     const PetscScalar *vals;
415     const PetscInt    *cols;
416     PetscInt           ncols, newcols, c;
417 
418     if (r < rEnd) {
419       ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
420       for (c = 0; c < ncols; ++c) {
421         newCols[c] = cols[c];
422         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
423       }
424       newcols = ncols;
425       ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
426       ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr);
427     }
428     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
429     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
430   }
431   ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434