xref: /petsc/src/vec/vec/impls/seq/bvec1.c (revision 55cda6f51c86a483abe5781578f3bbdf986e1625)
1 /*
2    Defines the BLAS based vector operations. Code shared by parallel
3   and sequential vectors.
4 */
5 
6 #include <../src/vec/vec/impls/dvecimpl.h> /*I "petscvec.h" I*/
7 #include <petscblaslapack.h>
8 
9 #if defined(PETSC_USE_REAL_SINGLE) && defined(PETSC_BLASLAPACK_SNRM2_RETURNS_DOUBLE) && !defined(PETSC_USE_COMPLEX)
VecXDot_Seq_Private(Vec xin,Vec yin,PetscScalar * z,double (* const BLASfn)(const PetscBLASInt *,const PetscScalar *,const PetscBLASInt *,const PetscScalar *,const PetscBLASInt *))10 static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, double (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
11 #else
12 static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, PetscScalar (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
13 #endif
14 {
15   const PetscInt     n   = xin->map->n;
16   const PetscBLASInt one = 1;
17   const PetscScalar *ya, *xa;
18   PetscBLASInt       bn;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscBLASIntCast(n, &bn));
22   if (n > 0) PetscCall(PetscLogFlops(2.0 * n - 1));
23   PetscCall(VecGetArrayRead(xin, &xa));
24   PetscCall(VecGetArrayRead(yin, &ya));
25   /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc
26      the second */
27   PetscCallBLAS("BLASdot", *z = BLASfn(&bn, ya, &one, xa, &one));
28   PetscCall(VecRestoreArrayRead(xin, &xa));
29   PetscCall(VecRestoreArrayRead(yin, &ya));
30   PetscFunctionReturn(PETSC_SUCCESS);
31 }
32 
VecDot_Seq(Vec xin,Vec yin,PetscScalar * z)33 PetscErrorCode VecDot_Seq(Vec xin, Vec yin, PetscScalar *z)
34 {
35   PetscFunctionBegin;
36   PetscCall(VecXDot_Seq_Private(xin, yin, z, BLASdot_));
37   PetscFunctionReturn(PETSC_SUCCESS);
38 }
39 
VecTDot_Seq(Vec xin,Vec yin,PetscScalar * z)40 PetscErrorCode VecTDot_Seq(Vec xin, Vec yin, PetscScalar *z)
41 {
42   PetscFunctionBegin;
43   /*
44     pay close attention!!! xin and yin are SWAPPED here so that the eventual BLAS call is
45     dot(&bn, xa, &one, ya, &one)
46   */
47   PetscCall(VecXDot_Seq_Private(yin, xin, z, BLASdotu_));
48   PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 
VecScale_Seq(Vec xin,PetscScalar alpha)51 PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
52 {
53   PetscFunctionBegin;
54   if (alpha == (PetscScalar)0.0) {
55     PetscCall(VecSet_Seq(xin, alpha));
56   } else if (alpha != (PetscScalar)1.0) {
57     const PetscBLASInt one = 1;
58     PetscBLASInt       bn;
59     PetscScalar       *xarray;
60 
61     PetscCall(PetscBLASIntCast(xin->map->n, &bn));
62     PetscCall(PetscLogFlops(bn));
63     PetscCall(VecGetArray(xin, &xarray));
64     PetscCallBLAS("BLASscal", BLASscal_(&bn, &alpha, xarray, &one));
65     PetscCall(VecRestoreArray(xin, &xarray));
66   }
67   PetscFunctionReturn(PETSC_SUCCESS);
68 }
69 
VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)70 PetscErrorCode VecAXPY_Seq(Vec yin, PetscScalar alpha, Vec xin)
71 {
72   PetscFunctionBegin;
73   /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
74   if (alpha != (PetscScalar)0.0) {
75     const PetscScalar *xarray;
76     PetscScalar       *yarray;
77     const PetscBLASInt one = 1;
78     PetscBLASInt       bn;
79 
80     PetscCall(PetscBLASIntCast(yin->map->n, &bn));
81     PetscCall(PetscLogFlops(2.0 * bn));
82     PetscCall(VecGetArrayRead(xin, &xarray));
83     PetscCall(VecGetArray(yin, &yarray));
84     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bn, &alpha, xarray, &one, yarray, &one));
85     PetscCall(VecRestoreArrayRead(xin, &xarray));
86     PetscCall(VecRestoreArray(yin, &yarray));
87   }
88   PetscFunctionReturn(PETSC_SUCCESS);
89 }
90 
VecAXPBY_Seq(Vec yin,PetscScalar a,PetscScalar b,Vec xin)91 PetscErrorCode VecAXPBY_Seq(Vec yin, PetscScalar a, PetscScalar b, Vec xin)
92 {
93   PetscFunctionBegin;
94   if (a == (PetscScalar)0.0) {
95     PetscCall(VecScale_Seq(yin, b));
96   } else if (b == (PetscScalar)1.0) {
97     PetscCall(VecAXPY_Seq(yin, a, xin));
98   } else if (a == (PetscScalar)1.0) {
99     PetscCall(VecAYPX_Seq(yin, b, xin));
100   } else {
101     const PetscInt     n = yin->map->n;
102     const PetscScalar *xx;
103     PetscScalar       *yy;
104 
105     PetscCall(VecGetArrayRead(xin, &xx));
106     PetscCall(VecGetArray(yin, &yy));
107     if (b == (PetscScalar)0.0) {
108       for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i];
109       PetscCall(PetscLogFlops(n));
110     } else {
111       for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i] + b * yy[i];
112       PetscCall(PetscLogFlops(3.0 * n));
113     }
114     PetscCall(VecRestoreArrayRead(xin, &xx));
115     PetscCall(VecRestoreArray(yin, &yy));
116   }
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)120 PetscErrorCode VecAXPBYPCZ_Seq(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
121 {
122   const PetscInt     n = zin->map->n;
123   const PetscScalar *yy, *xx;
124   PetscInt           flops = 4 * n; // common case
125   PetscScalar       *zz;
126 
127   PetscFunctionBegin;
128   PetscCall(VecGetArrayRead(xin, &xx));
129   PetscCall(VecGetArrayRead(yin, &yy));
130   PetscCall(VecGetArray(zin, &zz));
131   if (alpha == (PetscScalar)1.0) {
132     for (PetscInt i = 0; i < n; ++i) zz[i] = xx[i] + beta * yy[i] + gamma * zz[i];
133   } else if (gamma == (PetscScalar)1.0) {
134     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + zz[i];
135   } else if (gamma == (PetscScalar)0.0) {
136     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i];
137     flops -= n;
138   } else {
139     for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + gamma * zz[i];
140     flops += n;
141   }
142   PetscCall(VecRestoreArrayRead(xin, &xx));
143   PetscCall(VecRestoreArrayRead(yin, &yy));
144   PetscCall(VecRestoreArray(zin, &zz));
145   PetscCall(PetscLogFlops(flops));
146   PetscFunctionReturn(PETSC_SUCCESS);
147 }
148