xref: /petsc/src/mat/utils/axpy.c (revision 285fb4e2b69b3de46a0633bd0adc6a7f684caa1e)
1 
2 #include <petsc/private/matimpl.h>  /*I   "petscmat.h"  I*/
3 
4 /*@
5    MatAXPY - Computes Y = a*X + Y.
6 
7    Logically  Collective on Mat
8 
9    Input Parameters:
10 +  a - the scalar multiplier
11 .  X - the first matrix
12 .  Y - the second matrix
13 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
14          or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
15 
16    Level: intermediate
17 
18 .keywords: matrix, add
19 
20 .seealso: MatAYPX()
21  @*/
22 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
23 {
24   PetscErrorCode ierr;
25   PetscInt       m1,m2,n1,n2;
26   PetscBool      sametype;
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
30   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
31   PetscValidLogicalCollectiveScalar(Y,a,2);
32   ierr = MatGetSize(X,&m1,&n1);CHKERRQ(ierr);
33   ierr = MatGetSize(Y,&m2,&n2);CHKERRQ(ierr);
34   if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);
35 
36   ierr = PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);CHKERRQ(ierr);
37   ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
38   if (Y->ops->axpy && sametype) {
39     ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr);
40   } else {
41     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
42   }
43   ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr);
44 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA)
45   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
46     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
47   }
48 #endif
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
53 {
54   PetscInt          i,start,end,j,ncols,m,n;
55   PetscErrorCode    ierr;
56   const PetscInt    *row;
57   PetscScalar       *val;
58   const PetscScalar *vals;
59 
60   PetscFunctionBegin;
61   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
62   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
63   if (a == 1.0) {
64     for (i = start; i < end; i++) {
65       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
66       ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
67       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
68     }
69   } else {
70     ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr);
71     for (i=start; i<end; i++) {
72       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
73       for (j=0; j<ncols; j++) {
74         val[j] = a*vals[j];
75       }
76       ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
77       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
78     }
79     ierr = PetscFree(val);CHKERRQ(ierr);
80   }
81   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
82   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
87 {
88   PetscInt          i,start,end,j,ncols,m,n;
89   PetscErrorCode    ierr;
90   const PetscInt    *row;
91   PetscScalar       *val;
92   const PetscScalar *vals;
93 
94   PetscFunctionBegin;
95   ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr);
96   ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr);
97   if (a == 1.0) {
98     for (i = start; i < end; i++) {
99       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
100       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
101       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
102 
103       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
104       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
105       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
106     }
107   } else {
108     ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr);
109     for (i=start; i<end; i++) {
110       ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
111       ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr);
112       ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr);
113 
114       ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
115       for (j=0; j<ncols; j++) {
116         val[j] = a*vals[j];
117       }
118       ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr);
119       ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr);
120     }
121     ierr = PetscFree(val);CHKERRQ(ierr);
122   }
123   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
125   PetscFunctionReturn(0);
126 }
127 
128 /*@
129    MatShift - Computes Y =  Y + a I, where a is a PetscScalar and I is the identity matrix.
130 
131    Neighbor-wise Collective on Mat
132 
133    Input Parameters:
134 +  Y - the matrices
135 -  a - the PetscScalar
136 
137    Level: intermediate
138 
139    Notes:
140     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
141    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
142    entry).
143 
144    To form Y = Y + diag(V) use MatDiagonalSet()
145 
146    Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
147     preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
148 
149 .keywords: matrix, add, shift
150 
151 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
152  @*/
153 PetscErrorCode  MatShift(Mat Y,PetscScalar a)
154 {
155   PetscErrorCode ierr;
156 
157   PetscFunctionBegin;
158   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
159   if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
160   if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
161   MatCheckPreallocated(Y,1);
162 
163   if (Y->ops->shift) {
164     ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr);
165   } else {
166     ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
167   }
168 
169 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA)
170   if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) {
171     Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU;
172   }
173 #endif
174   PetscFunctionReturn(0);
175 }
176 
177 PetscErrorCode  MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
178 {
179   PetscErrorCode ierr;
180   PetscInt       i,start,end;
181   PetscScalar    *v;
182 
183   PetscFunctionBegin;
184   ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr);
185   ierr = VecGetArray(D,&v);CHKERRQ(ierr);
186   for (i=start; i<end; i++) {
187     ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr);
188   }
189   ierr = VecRestoreArray(D,&v);CHKERRQ(ierr);
190   ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
191   ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
192   PetscFunctionReturn(0);
193 }
194 
195 /*@
196    MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
197    that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
198    INSERT_VALUES.
199 
200    Input Parameters:
201 +  Y - the input matrix
202 .  D - the diagonal matrix, represented as a vector
203 -  i - INSERT_VALUES or ADD_VALUES
204 
205    Neighbor-wise Collective on Mat and Vec
206 
207    Notes:
208     If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
209    fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
210    entry).
211 
212    Level: intermediate
213 
214 .keywords: matrix, add, shift, diagonal
215 
216 .seealso: MatShift(), MatScale(), MatDiagonalScale()
217 @*/
218 PetscErrorCode  MatDiagonalSet(Mat Y,Vec D,InsertMode is)
219 {
220   PetscErrorCode ierr;
221   PetscInt       matlocal,veclocal;
222 
223   PetscFunctionBegin;
224   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
225   PetscValidHeaderSpecific(D,VEC_CLASSID,2);
226   ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr);
227   ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr);
228   if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
229   if (Y->ops->diagonalset) {
230     ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr);
231   } else {
232     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 /*@
238    MatAYPX - Computes Y = a*Y + X.
239 
240    Logically on Mat
241 
242    Input Parameters:
243 +  a - the PetscScalar multiplier
244 .  Y - the first matrix
245 .  X - the second matrix
246 -  str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
247 
248    Level: intermediate
249 
250 .keywords: matrix, add
251 
252 .seealso: MatAXPY()
253  @*/
254 PetscErrorCode  MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
255 {
256   PetscScalar    one = 1.0;
257   PetscErrorCode ierr;
258   PetscInt       mX,mY,nX,nY;
259 
260   PetscFunctionBegin;
261   PetscValidHeaderSpecific(X,MAT_CLASSID,3);
262   PetscValidHeaderSpecific(Y,MAT_CLASSID,1);
263   PetscValidLogicalCollectiveScalar(Y,a,2);
264   ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr);
265   ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr);
266   if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
267 
268   ierr = MatScale(Y,a);CHKERRQ(ierr);
269   ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 
273 /*@
274     MatComputeExplicitOperator - Computes the explicit matrix
275 
276     Collective on Mat
277 
278     Input Parameter:
279 .   inmat - the matrix
280 
281     Output Parameter:
282 .   mat - the explict  operator
283 
284     Notes:
285     This computation is done by applying the operators to columns of the
286     identity matrix.
287 
288     Currently, this routine uses a dense matrix format when 1 processor
289     is used and a sparse format otherwise.  This routine is costly in general,
290     and is recommended for use only with relatively small systems.
291 
292     Level: advanced
293 
294 .keywords: Mat, compute, explicit, operator
295 @*/
296 PetscErrorCode  MatComputeExplicitOperator(Mat inmat,Mat *mat)
297 {
298   Vec            in,out;
299   PetscErrorCode ierr;
300   PetscInt       i,m,n,M,N,*rows,start,end;
301   MPI_Comm       comm;
302   PetscScalar    *array,zero = 0.0,one = 1.0;
303   PetscMPIInt    size;
304 
305   PetscFunctionBegin;
306   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
307   PetscValidPointer(mat,2);
308 
309   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
310   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
311 
312   ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr);
313   ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr);
314   ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr);
315   ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
316   ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr);
317   ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr);
318   for (i=0; i<m; i++) rows[i] = start + i;
319 
320   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
321   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
322   if (size == 1) {
323     ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr);
324     ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr);
325   } else {
326     ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
327     ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr);
328   }
329 
330   for (i=0; i<N; i++) {
331 
332     ierr = VecSet(in,zero);CHKERRQ(ierr);
333     ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr);
334     ierr = VecAssemblyBegin(in);CHKERRQ(ierr);
335     ierr = VecAssemblyEnd(in);CHKERRQ(ierr);
336 
337     ierr = MatMult(inmat,in,out);CHKERRQ(ierr);
338 
339     ierr = VecGetArray(out,&array);CHKERRQ(ierr);
340     ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
341     ierr = VecRestoreArray(out,&array);CHKERRQ(ierr);
342 
343   }
344   ierr = PetscFree(rows);CHKERRQ(ierr);
345   ierr = VecDestroy(&out);CHKERRQ(ierr);
346   ierr = VecDestroy(&in);CHKERRQ(ierr);
347   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /*@
353     MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of
354         a give matrix that can apply MatMultTranspose()
355 
356     Collective on Mat
357 
358     Input Parameter:
359 .   inmat - the matrix
360 
361     Output Parameter:
362 .   mat - the explict  operator transposed
363 
364     Notes:
365     This computation is done by applying the transpose of the operator to columns of the
366     identity matrix.
367 
368     Currently, this routine uses a dense matrix format when 1 processor
369     is used and a sparse format otherwise.  This routine is costly in general,
370     and is recommended for use only with relatively small systems.
371 
372     Level: advanced
373 
374 .keywords: Mat, compute, explicit, operator
375 @*/
376 PetscErrorCode  MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat)
377 {
378   Vec            in,out;
379   PetscErrorCode ierr;
380   PetscInt       i,m,n,M,N,*rows,start,end;
381   MPI_Comm       comm;
382   PetscScalar    *array,zero = 0.0,one = 1.0;
383   PetscMPIInt    size;
384 
385   PetscFunctionBegin;
386   PetscValidHeaderSpecific(inmat,MAT_CLASSID,1);
387   PetscValidPointer(mat,2);
388 
389   ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr);
390   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
391 
392   ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr);
393   ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr);
394   ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr);
395   ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
396   ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr);
397   ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr);
398   for (i=0; i<m; i++) rows[i] = start + i;
399 
400   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
401   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
402   if (size == 1) {
403     ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr);
404     ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr);
405   } else {
406     ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
407     ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr);
408   }
409 
410   for (i=0; i<N; i++) {
411 
412     ierr = VecSet(in,zero);CHKERRQ(ierr);
413     ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr);
414     ierr = VecAssemblyBegin(in);CHKERRQ(ierr);
415     ierr = VecAssemblyEnd(in);CHKERRQ(ierr);
416 
417     ierr = MatMultTranspose(inmat,in,out);CHKERRQ(ierr);
418 
419     ierr = VecGetArray(out,&array);CHKERRQ(ierr);
420     ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
421     ierr = VecRestoreArray(out,&array);CHKERRQ(ierr);
422 
423   }
424   ierr = PetscFree(rows);CHKERRQ(ierr);
425   ierr = VecDestroy(&out);CHKERRQ(ierr);
426   ierr = VecDestroy(&in);CHKERRQ(ierr);
427   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
428   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 /*@
433   MatChop - Set all values in the matrix less than the tolerance to zero
434 
435   Input Parameters:
436 + A   - The matrix
437 - tol - The zero tolerance
438 
439   Output Parameters:
440 . A - The chopped matrix
441 
442   Level: intermediate
443 
444 .seealso: MatCreate(), MatZeroEntries()
445  @*/
446 PetscErrorCode MatChop(Mat A, PetscReal tol)
447 {
448   PetscScalar    *newVals;
449   PetscInt       *newCols;
450   PetscInt       rStart, rEnd, numRows, maxRows, r, colMax = 0;
451   PetscErrorCode ierr;
452 
453   PetscFunctionBegin;
454   ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr);
455   for (r = rStart; r < rEnd; ++r) {
456     PetscInt ncols;
457 
458     ierr   = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
459     colMax = PetscMax(colMax, ncols);CHKERRQ(ierr);
460     ierr   = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr);
461   }
462   numRows = rEnd - rStart;
463   ierr    = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
464   ierr    = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr);
465   for (r = rStart; r < rStart+maxRows; ++r) {
466     const PetscScalar *vals;
467     const PetscInt    *cols;
468     PetscInt           ncols, newcols, c;
469 
470     if (r < rEnd) {
471       ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
472       for (c = 0; c < ncols; ++c) {
473         newCols[c] = cols[c];
474         newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
475       }
476       newcols = ncols;
477       ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
478       ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr);
479     }
480     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
481     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
482   }
483   ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486