1 /*
2 Defines the BLAS based vector operations. Code shared by parallel
3 and sequential vectors.
4 */
5
6 #include <../src/vec/vec/impls/dvecimpl.h> /*I "petscvec.h" I*/
7 #include <petscblaslapack.h>
8
9 #if defined(PETSC_USE_REAL_SINGLE) && defined(PETSC_BLASLAPACK_SNRM2_RETURNS_DOUBLE) && !defined(PETSC_USE_COMPLEX)
VecXDot_Seq_Private(Vec xin,Vec yin,PetscScalar * z,double (* const BLASfn)(const PetscBLASInt *,const PetscScalar *,const PetscBLASInt *,const PetscScalar *,const PetscBLASInt *))10 static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, double (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
11 #else
12 static PetscErrorCode VecXDot_Seq_Private(Vec xin, Vec yin, PetscScalar *z, PetscScalar (*const BLASfn)(const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *, const PetscScalar *, const PetscBLASInt *))
13 #endif
14 {
15 const PetscInt n = xin->map->n;
16 const PetscBLASInt one = 1;
17 const PetscScalar *ya, *xa;
18 PetscBLASInt bn;
19
20 PetscFunctionBegin;
21 PetscCall(PetscBLASIntCast(n, &bn));
22 if (n > 0) PetscCall(PetscLogFlops(2.0 * n - 1));
23 PetscCall(VecGetArrayRead(xin, &xa));
24 PetscCall(VecGetArrayRead(yin, &ya));
25 /* arguments ya, xa are reversed because BLAS complex conjugates the first argument, PETSc
26 the second */
27 PetscCallBLAS("BLASdot", *z = BLASfn(&bn, ya, &one, xa, &one));
28 PetscCall(VecRestoreArrayRead(xin, &xa));
29 PetscCall(VecRestoreArrayRead(yin, &ya));
30 PetscFunctionReturn(PETSC_SUCCESS);
31 }
32
VecDot_Seq(Vec xin,Vec yin,PetscScalar * z)33 PetscErrorCode VecDot_Seq(Vec xin, Vec yin, PetscScalar *z)
34 {
35 PetscFunctionBegin;
36 PetscCall(VecXDot_Seq_Private(xin, yin, z, BLASdot_));
37 PetscFunctionReturn(PETSC_SUCCESS);
38 }
39
VecTDot_Seq(Vec xin,Vec yin,PetscScalar * z)40 PetscErrorCode VecTDot_Seq(Vec xin, Vec yin, PetscScalar *z)
41 {
42 PetscFunctionBegin;
43 /*
44 pay close attention!!! xin and yin are SWAPPED here so that the eventual BLAS call is
45 dot(&bn, xa, &one, ya, &one)
46 */
47 PetscCall(VecXDot_Seq_Private(yin, xin, z, BLASdotu_));
48 PetscFunctionReturn(PETSC_SUCCESS);
49 }
50
VecScale_Seq(Vec xin,PetscScalar alpha)51 PetscErrorCode VecScale_Seq(Vec xin, PetscScalar alpha)
52 {
53 PetscFunctionBegin;
54 if (alpha == (PetscScalar)0.0) {
55 PetscCall(VecSet_Seq(xin, alpha));
56 } else if (alpha != (PetscScalar)1.0) {
57 const PetscBLASInt one = 1;
58 PetscBLASInt bn;
59 PetscScalar *xarray;
60
61 PetscCall(PetscBLASIntCast(xin->map->n, &bn));
62 PetscCall(PetscLogFlops(bn));
63 PetscCall(VecGetArray(xin, &xarray));
64 PetscCallBLAS("BLASscal", BLASscal_(&bn, &alpha, xarray, &one));
65 PetscCall(VecRestoreArray(xin, &xarray));
66 }
67 PetscFunctionReturn(PETSC_SUCCESS);
68 }
69
VecAXPY_Seq(Vec yin,PetscScalar alpha,Vec xin)70 PetscErrorCode VecAXPY_Seq(Vec yin, PetscScalar alpha, Vec xin)
71 {
72 PetscFunctionBegin;
73 /* assume that the BLAS handles alpha == 1.0 efficiently since we have no fast code for it */
74 if (alpha != (PetscScalar)0.0) {
75 const PetscScalar *xarray;
76 PetscScalar *yarray;
77 const PetscBLASInt one = 1;
78 PetscBLASInt bn;
79
80 PetscCall(PetscBLASIntCast(yin->map->n, &bn));
81 PetscCall(PetscLogFlops(2.0 * bn));
82 PetscCall(VecGetArrayRead(xin, &xarray));
83 PetscCall(VecGetArray(yin, &yarray));
84 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bn, &alpha, xarray, &one, yarray, &one));
85 PetscCall(VecRestoreArrayRead(xin, &xarray));
86 PetscCall(VecRestoreArray(yin, &yarray));
87 }
88 PetscFunctionReturn(PETSC_SUCCESS);
89 }
90
VecAXPBY_Seq(Vec yin,PetscScalar a,PetscScalar b,Vec xin)91 PetscErrorCode VecAXPBY_Seq(Vec yin, PetscScalar a, PetscScalar b, Vec xin)
92 {
93 PetscFunctionBegin;
94 if (a == (PetscScalar)0.0) {
95 PetscCall(VecScale_Seq(yin, b));
96 } else if (b == (PetscScalar)1.0) {
97 PetscCall(VecAXPY_Seq(yin, a, xin));
98 } else if (a == (PetscScalar)1.0) {
99 PetscCall(VecAYPX_Seq(yin, b, xin));
100 } else {
101 const PetscInt n = yin->map->n;
102 const PetscScalar *xx;
103 PetscScalar *yy;
104
105 PetscCall(VecGetArrayRead(xin, &xx));
106 PetscCall(VecGetArray(yin, &yy));
107 if (b == (PetscScalar)0.0) {
108 for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i];
109 PetscCall(PetscLogFlops(n));
110 } else {
111 for (PetscInt i = 0; i < n; ++i) yy[i] = a * xx[i] + b * yy[i];
112 PetscCall(PetscLogFlops(3.0 * n));
113 }
114 PetscCall(VecRestoreArrayRead(xin, &xx));
115 PetscCall(VecRestoreArray(yin, &yy));
116 }
117 PetscFunctionReturn(PETSC_SUCCESS);
118 }
119
VecAXPBYPCZ_Seq(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)120 PetscErrorCode VecAXPBYPCZ_Seq(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
121 {
122 const PetscInt n = zin->map->n;
123 const PetscScalar *yy, *xx;
124 PetscInt flops = 4 * n; // common case
125 PetscScalar *zz;
126
127 PetscFunctionBegin;
128 PetscCall(VecGetArrayRead(xin, &xx));
129 PetscCall(VecGetArrayRead(yin, &yy));
130 PetscCall(VecGetArray(zin, &zz));
131 if (alpha == (PetscScalar)1.0) {
132 for (PetscInt i = 0; i < n; ++i) zz[i] = xx[i] + beta * yy[i] + gamma * zz[i];
133 } else if (gamma == (PetscScalar)1.0) {
134 for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + zz[i];
135 } else if (gamma == (PetscScalar)0.0) {
136 for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i];
137 flops -= n;
138 } else {
139 for (PetscInt i = 0; i < n; ++i) zz[i] = alpha * xx[i] + beta * yy[i] + gamma * zz[i];
140 flops += n;
141 }
142 PetscCall(VecRestoreArrayRead(xin, &xx));
143 PetscCall(VecRestoreArrayRead(yin, &yy));
144 PetscCall(VecRestoreArray(zin, &zz));
145 PetscCall(PetscLogFlops(flops));
146 PetscFunctionReturn(PETSC_SUCCESS);
147 }
148