xref: /petsc/src/mat/utils/axpy.c (revision 609bdbee21ea3be08735c64dbe00a9ab27759925)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 /*@
5    MatAXPY - Computes Y = a*X + Y.
6 
7    Logically  Collective on Mat
8 
9    Input Parameters:
10 +  a - the scalar multiplier
11 .  X - the first matrix
12 .  Y - the second matrix
13 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
15 
16    Level: intermediate
17 
18 .keywords: matrix, add
19 
20 .seealso: MatAYPX()
21  @*/
22 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23 {
24   PetscErrorCode ierr;
25   PetscInt       m1,m2,n1,n2;
26 
27   PetscFunctionBegin;
28   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
29   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
30   PetscValidLogicalCollectiveScalar(Y,a,2);
31   ierr = MatGetSize(X,&m1,&n1);CHKERRQ(ierr);
32   ierr = MatGetSize(Y,&m2,&n2);CHKERRQ(ierr);
33   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);
34 
35   ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
36   if (Y->ops->axpy) {
37     ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr);
38   } else {
39     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
40   }
41   ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
42 #if defined(PETSC_HAVE_CUSP)
43   if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
44     Y->valid_GPU_matrix = PETSC_CUSP_CPU;
45   }
46 #elif defined(PETSC_HAVE_VIENNACL)
47   if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
48     Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
49   }
50 #elif defined(PETSC_HAVE_VECCUDA)
51   if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
52     Y->valid_GPU_matrix = PETSC_CUDA_CPU;
53   }
54 #endif
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
59 {
60   PetscInt          i,start,end,j,ncols,m,n;
61   PetscErrorCode    ierr;
62   const PetscInt    *row;
63   PetscScalar       *val;
64   const PetscScalar *vals;
65 
66   PetscFunctionBegin;
67   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
68   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
69   if (a == 1.0) {
70     for (i = start; i < end; i++) {
71       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
72       ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
73       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
74     }
75   } else {
76     ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr);
77     for (i=start; i<end; i++) {
78       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
79       for (j=0; j<ncols; j++) {
80         val[j] = a*vals[j];
81       }
82       ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
83       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
84     }
85     ierr = PetscFree(val);CHKERRQ(ierr);
86   }
87   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
88   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
93 {
94   PetscInt          i,start,end,j,ncols,m,n;
95   PetscErrorCode    ierr;
96   const PetscInt    *row;
97   PetscScalar       *val;
98   const PetscScalar *vals;
99 
100   PetscFunctionBegin;
101   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
102   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
103   if (a == 1.0) {
104     for (i = start; i < end; i++) {
105       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
106       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
107       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
108 
109       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
110       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
111       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
112     }
113   } else {
114     ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr);
115     for (i=start; i<end; i++) {
116       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
117       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
118       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
119 
120       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
121       for (j=0; j<ncols; j++) {
122         val[j] = a*vals[j];
123       }
124       ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
125       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
126     }
127     ierr = PetscFree(val);CHKERRQ(ierr);
128   }
129   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
130   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 /*@
135    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
136 
137    Neighbor-wise Collective on Mat
138 
139    Input Parameters:
140 +  Y - the matrices
141 -  a - the PetscScalar
142 
143    Level: intermediate
144 
145    Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
146    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
147    entry).
148 
149    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
150     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
151 
152 .keywords: matrix, add, shift
153 
154 .seealso: MatDiagonalSet()
155  @*/
156 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
157 {
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
162   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
163   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
164   MatCheckPreallocated(Y,1);
165 
166   if (Y->ops->shift) {
167     ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr);
168   } else {
169     ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
170   }
171 
172 #if defined(PETSC_HAVE_CUSP)
173   if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
174     Y->valid_GPU_matrix = PETSC_CUSP_CPU;
175   }
176 #elif defined(PETSC_HAVE_VIENNACL)
177   if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
178     Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
179   }
180 #elif defined(PETSC_HAVE_VECCUDA)
181   if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
182     Y->valid_GPU_matrix = PETSC_CUDA_CPU;
183   }
184 #endif
185   PetscFunctionReturn(0);
186 }
187 
188 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
189 {
190   PetscErrorCode ierr;
191   PetscInt       i,start,end;
192   PetscScalar    *v;
193 
194   PetscFunctionBegin;
195   ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr);
196   ierr = VecGetArray(D,&v);CHKERRQ(ierr);
197   for (i=start; i<end; i++) {
198     ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr);
199   }
200   ierr = VecRestoreArray(D,&v);CHKERRQ(ierr);
201   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
202   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 /*@
207    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
208    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
209    INSERT_VALUES.
210 
211    Input Parameters:
212 +  Y - the input matrix
213 .  D - the diagonal matrix, represented as a vector
214 -  i - INSERT_VALUES or ADD_VALUES
215 
216    Neighbor-wise Collective on Mat and Vec
217 
218    Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
219    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
220    entry).
221 
222    Level: intermediate
223 
224 .keywords: matrix, add, shift, diagonal
225 
226 .seealso: MatShift()
227 @*/
228 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
229 {
230   PetscErrorCode ierr;
231   PetscInt       matlocal,veclocal;
232 
233   PetscFunctionBegin;
234   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
235   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
236   ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr);
237   ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr);
238   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
239   if (Y->ops->diagonalset) {
240     ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr);
241   } else {
242     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
243   }
244   PetscFunctionReturn(0);
245 }
246 
247 /*@
248    MatAYPX - Computes Y = a*Y + X.
249 
250    Logically on Mat
251 
252    Input Parameters:
253 +  a - the PetscScalar multiplier
254 .  Y - the first matrix
255 .  X - the second matrix
256 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
257 
258    Level: intermediate
259 
260 .keywords: matrix, add
261 
262 .seealso: MatAXPY()
263  @*/
264 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
265 {
266   PetscScalar    one = 1.0;
267   PetscErrorCode ierr;
268   PetscInt       mX,mY,nX,nY;
269 
270   PetscFunctionBegin;
271   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
272   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
273   PetscValidLogicalCollectiveScalar(Y,a,2);
274   ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr);
275   ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr);
276   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
277 
278   ierr = MatScale(Y,a);CHKERRQ(ierr);
279   ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr);
280   PetscFunctionReturn(0);
281 }
282 
283 /*@
284     MatComputeExplicitOperator - Computes the explicit matrix
285 
286     Collective on Mat
287 
288     Input Parameter:
289 .   inmat - the matrix
290 
291     Output Parameter:
292 .   mat - the explict preconditioned operator
293 
294     Notes:
295     This computation is done by applying the operators to columns of the
296     identity matrix.
297 
298     Currently, this routine uses a dense matrix format when 1 processor
299     is used and a sparse format otherwise.  This routine is costly in general,
300     and is recommended for use only with relatively small systems.
301 
302     Level: advanced
303 
304 .keywords: Mat, compute, explicit, operator
305 @*/
306 PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
307 {
308   Vec            in,out;
309   PetscErrorCode ierr;
310   PetscInt       i,m,n,M,N,*rows,start,end;
311   MPI_Comm       comm;
312   PetscScalar    *array,zero = 0.0,one = 1.0;
313   PetscMPIInt    size;
314 
315   PetscFunctionBegin;
316   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
317   PetscValidPointer(mat,2);
318 
319   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
320   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
321 
322   ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr);
323   ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr);
324   ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr);
325   ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
326   ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr);
327   ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr);
328   for (i=0; i<m; i++) rows[i] = start + i;
329 
330   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
331   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
332   if (size == 1) {
333     ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr);
334     ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr);
335   } else {
336     ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
337     ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr);
338   }
339 
340   for (i=0; i<N; i++) {
341 
342     ierr = VecSet(in,zero);CHKERRQ(ierr);
343     ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr);
344     ierr = VecAssemblyBegin(in);CHKERRQ(ierr);
345     ierr = VecAssemblyEnd(in);CHKERRQ(ierr);
346 
347     ierr = MatMult(inmat,in,out);CHKERRQ(ierr);
348 
349     ierr = VecGetArray(out,&array);CHKERRQ(ierr);
350     ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
351     ierr = VecRestoreArray(out,&array);CHKERRQ(ierr);
352 
353   }
354   ierr = PetscFree(rows);CHKERRQ(ierr);
355   ierr = VecDestroy(&out);CHKERRQ(ierr);
356   ierr = VecDestroy(&in);CHKERRQ(ierr);
357   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*@
363   MatChop - Set all values in the matrix less than the tolerance to zero
364 
365   Input Parameters:
366 + A   - The matrix
367 - tol - The zero tolerance
368 
369   Output Parameters:
370 . A - The chopped matrix
371 
372   Level: intermediate
373 
374 .seealso: MatCreate(), MatZeroEntries()
375  @*/
376 PetscErrorCode MatChop(Mat A, PetscReal tol)
377 {
378   PetscScalar    *newVals;
379   PetscInt       *newCols;
380   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr);
385   for (r = rStart; r < rEnd; ++r) {
386     PetscInt ncols;
387 
388     ierr   = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
389     colMax = PetscMax(colMax, ncols);CHKERRQ(ierr);
390     ierr   = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
391   }
392   numRows = rEnd - rStart;
393   ierr    = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
394   ierr    = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr);
395   for (r = rStart; r < rStart+maxRows; ++r) {
396     const PetscScalar *vals;
397     const PetscInt    *cols;
398     PetscInt           ncols, newcols, c;
399 
400     if (r < rEnd) {
401       ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
402       for (c = 0; c < ncols; ++c) {
403         newCols[c] = cols[c];
404         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
405       }
406       newcols = ncols;
407       ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
408       ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr);
409     }
410     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
411     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
412   }
413   ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416