xref: /petsc/src/vec/vec/impls/seq/seqviennacl/vecviennacl.cxx (revision 6d8694c4fbab79f9439f1ad13c0386ba7ee1ca4b)
1 /*
2    Implements the sequential ViennaCL vectors.
3 */
4 
5 #include <petscconf.h>
6 #include <petsc/private/vecimpl.h> /*I "petscvec.h" I*/
7 #include <../src/vec/vec/impls/dvecimpl.h>
8 #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
9 
10 #include "viennacl/linalg/inner_prod.hpp"
11 #include "viennacl/linalg/norm_1.hpp"
12 #include "viennacl/linalg/norm_2.hpp"
13 #include "viennacl/linalg/norm_inf.hpp"
14 
15 #ifdef VIENNACL_WITH_OPENCL
16   #include "viennacl/ocl/backend.hpp"
17 #endif
18 
VecViennaCLGetArray(Vec v,ViennaCLVector ** a)19 PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
20 {
21   PetscFunctionBegin;
22   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
23   *a = 0;
24   PetscCall(VecViennaCLCopyToGPU(v));
25   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
26   ViennaCLWaitForGPU();
27   PetscFunctionReturn(PETSC_SUCCESS);
28 }
29 
VecViennaCLRestoreArray(Vec v,ViennaCLVector ** a)30 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
31 {
32   PetscFunctionBegin;
33   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
34   v->offloadmask = PETSC_OFFLOAD_GPU;
35 
36   PetscCall(PetscObjectStateIncrease((PetscObject)v));
37   PetscFunctionReturn(PETSC_SUCCESS);
38 }
39 
VecViennaCLGetArrayRead(Vec v,const ViennaCLVector ** a)40 PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
41 {
42   PetscFunctionBegin;
43   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
44   *a = 0;
45   PetscCall(VecViennaCLCopyToGPU(v));
46   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
47   ViennaCLWaitForGPU();
48   PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 
VecViennaCLRestoreArrayRead(Vec v,const ViennaCLVector ** a)51 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
52 {
53   PetscFunctionBegin;
54   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
55   PetscFunctionReturn(PETSC_SUCCESS);
56 }
57 
VecViennaCLGetArrayWrite(Vec v,ViennaCLVector ** a)58 PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
59 {
60   PetscFunctionBegin;
61   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
62   *a = 0;
63   PetscCall(VecViennaCLAllocateCheck(v));
64   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
65   ViennaCLWaitForGPU();
66   PetscFunctionReturn(PETSC_SUCCESS);
67 }
68 
VecViennaCLRestoreArrayWrite(Vec v,ViennaCLVector ** a)69 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
70 {
71   PetscFunctionBegin;
72   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
73   v->offloadmask = PETSC_OFFLOAD_GPU;
74 
75   PetscCall(PetscObjectStateIncrease((PetscObject)v));
76   PetscFunctionReturn(PETSC_SUCCESS);
77 }
78 
PetscViennaCLInit()79 PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
80 {
81   char      string[20];
82   PetscBool flg, flg_cuda, flg_opencl, flg_openmp;
83 
84   PetscFunctionBegin;
85   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
86   PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_backend", string, sizeof(string), &flg));
87   if (flg) {
88     try {
89       PetscCall(PetscStrcasecmp(string, "cuda", &flg_cuda));
90       PetscCall(PetscStrcasecmp(string, "opencl", &flg_opencl));
91       PetscCall(PetscStrcasecmp(string, "openmp", &flg_openmp));
92 
93       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
94       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
95 #if defined(PETSC_HAVE_CUDA)
96       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
97 #endif
98 #if defined(PETSC_HAVE_OPENCL)
99       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
100 #endif
101       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
102     } catch (std::exception const &ex) {
103       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
104     }
105   }
106 
107 #if defined(PETSC_HAVE_OPENCL)
108   /* ViennaCL OpenCL device type configuration */
109   PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_opencl_device_type", string, sizeof(string), &flg));
110   if (flg) {
111     try {
112       PetscCall(PetscStrcasecmp(string, "cpu", &flg));
113       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
114 
115       PetscCall(PetscStrcasecmp(string, "gpu", &flg));
116       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
117 
118       PetscCall(PetscStrcasecmp(string, "accelerator", &flg));
119       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
120     } catch (std::exception const &ex) {
121       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
122     }
123   }
124 #endif
125 
126   /* Print available backends */
127   PetscCall(PetscOptionsHasName(NULL, NULL, "-viennacl_view", &flg));
128   if (flg) {
129     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: "));
130 #if defined(PETSC_HAVE_CUDA)
131     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA, "));
132 #endif
133 #if defined(PETSC_HAVE_OPENCL)
134     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL, "));
135 #endif
136 #if defined(PETSC_HAVE_OPENMP)
137     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
138 #else
139     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) "));
140 #endif
141     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
142 
143     /* Print selected backends */
144     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: "));
145 #if defined(PETSC_HAVE_CUDA)
146     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA "));
147 #endif
148 #if defined(PETSC_HAVE_OPENCL)
149     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL "));
150 #endif
151 #if defined(PETSC_HAVE_OPENMP)
152     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
153 #else
154     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) "));
155 #endif
156     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
157   }
158   PetscFunctionReturn(PETSC_SUCCESS);
159 }
160 
161 /*
162     Allocates space for the vector array on the Host if it does not exist.
163     Does NOT change the PetscViennaCLFlag for the vector
164     Does NOT zero the ViennaCL array
165  */
VecViennaCLAllocateCheckHost(Vec v)166 PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
167 {
168   PetscScalar *array;
169   Vec_Seq     *s;
170   PetscInt     n = v->map->n;
171 
172   PetscFunctionBegin;
173   s = (Vec_Seq *)v->data;
174   PetscCall(VecViennaCLAllocateCheck(v));
175   if (s->array == 0) {
176     PetscCall(PetscMalloc1(n, &array));
177     s->array           = array;
178     s->array_allocated = array;
179   }
180   PetscFunctionReturn(PETSC_SUCCESS);
181 }
182 
183 /*
184     Allocates space for the vector array on the GPU if it does not exist.
185     Does NOT change the PetscViennaCLFlag for the vector
186     Does NOT zero the ViennaCL array
187 
188  */
VecViennaCLAllocateCheck(Vec v)189 PetscErrorCode VecViennaCLAllocateCheck(Vec v)
190 {
191   PetscFunctionBegin;
192   if (!v->spptr) {
193     try {
194       v->spptr                                       = new Vec_ViennaCL;
195       ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
196       ((Vec_ViennaCL *)v->spptr)->GPUarray           = ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
197 
198     } catch (std::exception const &ex) {
199       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
200     }
201   }
202   PetscFunctionReturn(PETSC_SUCCESS);
203 }
204 
205 /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
VecViennaCLCopyToGPU(Vec v)206 PetscErrorCode VecViennaCLCopyToGPU(Vec v)
207 {
208   PetscFunctionBegin;
209   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
210   PetscCall(VecViennaCLAllocateCheck(v));
211   if (v->map->n > 0) {
212     if (v->offloadmask == PETSC_OFFLOAD_CPU) {
213       PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
214       try {
215         ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
216         viennacl::fast_copy(*(PetscScalar **)v->data, *(PetscScalar **)v->data + v->map->n, vec->begin());
217         ViennaCLWaitForGPU();
218       } catch (std::exception const &ex) {
219         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
220       }
221       PetscCall(PetscLogCpuToGpu(v->map->n * sizeof(PetscScalar)));
222       PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
223       v->offloadmask = PETSC_OFFLOAD_BOTH;
224     }
225   }
226   PetscFunctionReturn(PETSC_SUCCESS);
227 }
228 
229 /*
230      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
231 */
VecViennaCLCopyFromGPU(Vec v)232 PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
233 {
234   PetscFunctionBegin;
235   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
236   PetscCall(VecViennaCLAllocateCheckHost(v));
237   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
238     PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
239     try {
240       ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
241       viennacl::fast_copy(vec->begin(), vec->end(), *(PetscScalar **)v->data);
242       ViennaCLWaitForGPU();
243     } catch (std::exception const &ex) {
244       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
245     }
246     PetscCall(PetscLogGpuToCpu(v->map->n * sizeof(PetscScalar)));
247     PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
248     v->offloadmask = PETSC_OFFLOAD_BOTH;
249   }
250   PetscFunctionReturn(PETSC_SUCCESS);
251 }
252 
253 /* Copy on CPU */
VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)254 static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin, Vec yin)
255 {
256   PetscScalar       *ya;
257   const PetscScalar *xa;
258 
259   PetscFunctionBegin;
260   PetscCall(VecViennaCLAllocateCheckHost(xin));
261   PetscCall(VecViennaCLAllocateCheckHost(yin));
262   if (xin != yin) {
263     PetscCall(VecGetArrayRead(xin, &xa));
264     PetscCall(VecGetArray(yin, &ya));
265     PetscCall(PetscArraycpy(ya, xa, xin->map->n));
266     PetscCall(VecRestoreArrayRead(xin, &xa));
267     PetscCall(VecRestoreArray(yin, &ya));
268   }
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)272 static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin, PetscRandom r)
273 {
274   PetscInt     n = xin->map->n, i;
275   PetscScalar *xx;
276 
277   PetscFunctionBegin;
278   PetscCall(VecGetArray(xin, &xx));
279   for (i = 0; i < n; i++) PetscCall(PetscRandomGetValue(r, &xx[i]));
280   PetscCall(VecRestoreArray(xin, &xx));
281   PetscFunctionReturn(PETSC_SUCCESS);
282 }
283 
VecDestroy_SeqViennaCL_Private(Vec v)284 static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
285 {
286   Vec_Seq *vs = (Vec_Seq *)v->data;
287 
288   PetscFunctionBegin;
289   PetscCall(PetscObjectSAWsViewOff(v));
290   PetscCall(PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n));
291   if (vs->array_allocated) PetscCall(PetscFree(vs->array_allocated));
292   PetscCall(PetscFree(vs));
293   PetscFunctionReturn(PETSC_SUCCESS);
294 }
295 
VecResetArray_SeqViennaCL_Private(Vec vin)296 static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
297 {
298   Vec_Seq *v = (Vec_Seq *)vin->data;
299 
300   PetscFunctionBegin;
301   v->array         = v->unplacedarray;
302   v->unplacedarray = 0;
303   PetscFunctionReturn(PETSC_SUCCESS);
304 }
305 
306 /*MC
307    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
308 
309    Options Database Keys:
310 . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
311 
312   Level: beginner
313 
314 .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateSeqWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateSeq()`
315 M*/
316 
VecAYPX_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)317 PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
318 {
319   const ViennaCLVector *xgpu;
320   ViennaCLVector       *ygpu;
321 
322   PetscFunctionBegin;
323   PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
324   PetscCall(VecViennaCLGetArray(yin, &ygpu));
325   PetscCall(PetscLogGpuTimeBegin());
326   try {
327     if (alpha != 0.0 && xin->map->n > 0) {
328       *ygpu = *xgpu + alpha * *ygpu;
329       PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
330     } else {
331       *ygpu = *xgpu;
332     }
333     ViennaCLWaitForGPU();
334   } catch (std::exception const &ex) {
335     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
336   }
337   PetscCall(PetscLogGpuTimeEnd());
338   PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
339   PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
340   PetscFunctionReturn(PETSC_SUCCESS);
341 }
342 
VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)343 PetscErrorCode VecAXPY_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
344 {
345   const ViennaCLVector *xgpu;
346   ViennaCLVector       *ygpu;
347 
348   PetscFunctionBegin;
349   if (alpha != 0.0 && xin->map->n > 0) {
350     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
351     PetscCall(VecViennaCLGetArray(yin, &ygpu));
352     PetscCall(PetscLogGpuTimeBegin());
353     try {
354       *ygpu += alpha * *xgpu;
355       ViennaCLWaitForGPU();
356     } catch (std::exception const &ex) {
357       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
358     }
359     PetscCall(PetscLogGpuTimeEnd());
360     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
361     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
362     PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
363   }
364   PetscFunctionReturn(PETSC_SUCCESS);
365 }
366 
VecPointwiseDivide_SeqViennaCL(Vec win,Vec xin,Vec yin)367 PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
368 {
369   const ViennaCLVector *xgpu, *ygpu;
370   ViennaCLVector       *wgpu;
371 
372   PetscFunctionBegin;
373   if (xin->map->n > 0) {
374     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
375     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
376     PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
377     PetscCall(PetscLogGpuTimeBegin());
378     try {
379       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
380       ViennaCLWaitForGPU();
381     } catch (std::exception const &ex) {
382       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
383     }
384     PetscCall(PetscLogGpuTimeEnd());
385     PetscCall(PetscLogGpuFlops(win->map->n));
386     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
387     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
388     PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
389   }
390   PetscFunctionReturn(PETSC_SUCCESS);
391 }
392 
VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin,Vec yin)393 PetscErrorCode VecWAXPY_SeqViennaCL(Vec win, PetscScalar alpha, Vec xin, Vec yin)
394 {
395   const ViennaCLVector *xgpu, *ygpu;
396   ViennaCLVector       *wgpu;
397 
398   PetscFunctionBegin;
399   if (alpha == 0.0 && xin->map->n > 0) {
400     PetscCall(VecCopy_SeqViennaCL(yin, win));
401   } else {
402     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
403     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
404     PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
405     PetscCall(PetscLogGpuTimeBegin());
406     if (alpha == 1.0) {
407       try {
408         *wgpu = *ygpu + *xgpu;
409       } catch (std::exception const &ex) {
410         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
411       }
412       PetscCall(PetscLogGpuFlops(win->map->n));
413     } else if (alpha == -1.0) {
414       try {
415         *wgpu = *ygpu - *xgpu;
416       } catch (std::exception const &ex) {
417         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
418       }
419       PetscCall(PetscLogGpuFlops(win->map->n));
420     } else {
421       try {
422         *wgpu = *ygpu + alpha * *xgpu;
423       } catch (std::exception const &ex) {
424         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
425       }
426       PetscCall(PetscLogGpuFlops(2 * win->map->n));
427     }
428     ViennaCLWaitForGPU();
429     PetscCall(PetscLogGpuTimeEnd());
430     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
431     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
432     PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
433   }
434   PetscFunctionReturn(PETSC_SUCCESS);
435 }
436 
437 /*
438  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
439  *
440  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
441  * hence there is an iterated application of these until the final result is obtained
442  */
VecMAXPY_SeqViennaCL(Vec xin,PetscInt nv,const PetscScalar * alpha,Vec * y)443 PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
444 {
445   PetscInt j;
446 
447   PetscFunctionBegin;
448   for (j = 0; j < nv; ++j) {
449     if (j + 1 < nv) {
450       PetscCall(VecAXPBYPCZ_SeqViennaCL(xin, alpha[j], alpha[j + 1], 1.0, y[j], y[j + 1]));
451       ++j;
452     } else {
453       PetscCall(VecAXPY_SeqViennaCL(xin, alpha[j], y[j]));
454     }
455   }
456   ViennaCLWaitForGPU();
457   PetscFunctionReturn(PETSC_SUCCESS);
458 }
459 
VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar * z)460 PetscErrorCode VecDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
461 {
462   const ViennaCLVector *xgpu, *ygpu;
463 
464   PetscFunctionBegin;
465   if (xin->map->n > 0) {
466     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
467     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
468     PetscCall(PetscLogGpuTimeBegin());
469     try {
470       *z = viennacl::linalg::inner_prod(*xgpu, *ygpu);
471     } catch (std::exception const &ex) {
472       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
473     }
474     ViennaCLWaitForGPU();
475     PetscCall(PetscLogGpuTimeEnd());
476     if (xin->map->n > 0) PetscCall(PetscLogGpuFlops(2.0 * xin->map->n - 1));
477     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
478     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
479   } else *z = 0.0;
480   PetscFunctionReturn(PETSC_SUCCESS);
481 }
482 
483 /*
484  * Operation z[j] = dot(x, y[j])
485  *
486  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
487  */
VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar * z)488 PetscErrorCode VecMDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
489 {
490   PetscInt                                                n = xin->map->n, i;
491   const ViennaCLVector                                   *xgpu, *ygpu;
492   Vec                                                    *yyin = (Vec *)yin;
493   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
494 
495   PetscFunctionBegin;
496   if (xin->map->n > 0) {
497     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
498     for (i = 0; i < nv; i++) {
499       PetscCall(VecViennaCLGetArrayRead(yyin[i], &ygpu));
500       ygpu_array[i] = ygpu;
501     }
502     PetscCall(PetscLogGpuTimeBegin());
503     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
504     ViennaCLVector                      result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
505     viennacl::copy(result.begin(), result.end(), z);
506     for (i = 0; i < nv; i++) PetscCall(VecViennaCLRestoreArrayRead(yyin[i], &ygpu));
507     ViennaCLWaitForGPU();
508     PetscCall(PetscLogGpuTimeEnd());
509     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
510     PetscCall(PetscLogGpuFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
511   } else {
512     for (i = 0; i < nv; i++) z[i] = 0.0;
513   }
514   PetscFunctionReturn(PETSC_SUCCESS);
515 }
516 
VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar * z)517 static PetscErrorCode VecMTDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
518 {
519   PetscFunctionBegin;
520   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
521   PetscCall(VecMDot_SeqViennaCL(xin, nv, yin, z));
522   ViennaCLWaitForGPU();
523   PetscFunctionReturn(PETSC_SUCCESS);
524 }
525 
VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)526 PetscErrorCode VecSet_SeqViennaCL(Vec xin, PetscScalar alpha)
527 {
528   ViennaCLVector *xgpu;
529 
530   PetscFunctionBegin;
531   if (xin->map->n > 0) {
532     PetscCall(VecViennaCLGetArrayWrite(xin, &xgpu));
533     PetscCall(PetscLogGpuTimeBegin());
534     try {
535       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
536       ViennaCLWaitForGPU();
537     } catch (std::exception const &ex) {
538       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
539     }
540     PetscCall(PetscLogGpuTimeEnd());
541     PetscCall(VecViennaCLRestoreArrayWrite(xin, &xgpu));
542   }
543   PetscFunctionReturn(PETSC_SUCCESS);
544 }
545 
VecScale_SeqViennaCL(Vec xin,PetscScalar alpha)546 PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
547 {
548   ViennaCLVector *xgpu;
549 
550   PetscFunctionBegin;
551   if (alpha == 0.0 && xin->map->n > 0) {
552     PetscCall(VecSet_SeqViennaCL(xin, alpha));
553     PetscCall(PetscLogGpuFlops(xin->map->n));
554   } else if (alpha != 1.0 && xin->map->n > 0) {
555     PetscCall(VecViennaCLGetArray(xin, &xgpu));
556     PetscCall(PetscLogGpuTimeBegin());
557     try {
558       *xgpu *= alpha;
559       ViennaCLWaitForGPU();
560     } catch (std::exception const &ex) {
561       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
562     }
563     PetscCall(PetscLogGpuTimeEnd());
564     PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
565     PetscCall(PetscLogGpuFlops(xin->map->n));
566   }
567   PetscFunctionReturn(PETSC_SUCCESS);
568 }
569 
VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar * z)570 PetscErrorCode VecTDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
571 {
572   PetscFunctionBegin;
573   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
574   PetscCall(VecDot_SeqViennaCL(xin, yin, z));
575   ViennaCLWaitForGPU();
576   PetscFunctionReturn(PETSC_SUCCESS);
577 }
578 
VecCopy_SeqViennaCL(Vec xin,Vec yin)579 PetscErrorCode VecCopy_SeqViennaCL(Vec xin, Vec yin)
580 {
581   const ViennaCLVector *xgpu;
582   ViennaCLVector       *ygpu;
583 
584   PetscFunctionBegin;
585   if (xin != yin && xin->map->n > 0) {
586     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
587       PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
588       PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
589       PetscCall(PetscLogGpuTimeBegin());
590       try {
591         *ygpu = *xgpu;
592         ViennaCLWaitForGPU();
593       } catch (std::exception const &ex) {
594         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
595       }
596       PetscCall(PetscLogGpuTimeEnd());
597       PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
598       PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
599 
600     } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
601       /* copy in CPU if we are on the CPU*/
602       PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
603       ViennaCLWaitForGPU();
604     } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
605       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
606       if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
607         /* copy in CPU */
608         PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
609         ViennaCLWaitForGPU();
610       } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
611         /* copy in GPU */
612         PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
613         PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
614         PetscCall(PetscLogGpuTimeBegin());
615         try {
616           *ygpu = *xgpu;
617           ViennaCLWaitForGPU();
618         } catch (std::exception const &ex) {
619           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
620         }
621         PetscCall(PetscLogGpuTimeEnd());
622         PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
623         PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
624       } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
625         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
626            default to copy in GPU (this is an arbitrary choice) */
627         PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
628         PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
629         PetscCall(PetscLogGpuTimeBegin());
630         try {
631           *ygpu = *xgpu;
632           ViennaCLWaitForGPU();
633         } catch (std::exception const &ex) {
634           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
635         }
636         PetscCall(PetscLogGpuTimeEnd());
637         PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
638         PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
639       } else {
640         PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
641         ViennaCLWaitForGPU();
642       }
643     }
644   }
645   PetscFunctionReturn(PETSC_SUCCESS);
646 }
647 
VecSwap_SeqViennaCL(Vec xin,Vec yin)648 PetscErrorCode VecSwap_SeqViennaCL(Vec xin, Vec yin)
649 {
650   ViennaCLVector *xgpu, *ygpu;
651 
652   PetscFunctionBegin;
653   if (xin != yin && xin->map->n > 0) {
654     PetscCall(VecViennaCLGetArray(xin, &xgpu));
655     PetscCall(VecViennaCLGetArray(yin, &ygpu));
656     PetscCall(PetscLogGpuTimeBegin());
657     try {
658       viennacl::swap(*xgpu, *ygpu);
659       ViennaCLWaitForGPU();
660     } catch (std::exception const &ex) {
661       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
662     }
663     PetscCall(PetscLogGpuTimeEnd());
664     PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
665     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
666   }
667   PetscFunctionReturn(PETSC_SUCCESS);
668 }
669 
670 // y = alpha * x + beta * y
VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)671 PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
672 {
673   PetscScalar           a = alpha, b = beta;
674   const ViennaCLVector *xgpu;
675   ViennaCLVector       *ygpu;
676 
677   PetscFunctionBegin;
678   if (a == 0.0 && xin->map->n > 0) {
679     PetscCall(VecScale_SeqViennaCL(yin, beta));
680   } else if (b == 1.0 && xin->map->n > 0) {
681     PetscCall(VecAXPY_SeqViennaCL(yin, alpha, xin));
682   } else if (a == 1.0 && xin->map->n > 0) {
683     PetscCall(VecAYPX_SeqViennaCL(yin, beta, xin));
684   } else if (b == 0.0 && xin->map->n > 0) {
685     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
686     PetscCall(VecViennaCLGetArray(yin, &ygpu));
687     PetscCall(PetscLogGpuTimeBegin());
688     try {
689       *ygpu = *xgpu * alpha;
690       ViennaCLWaitForGPU();
691     } catch (std::exception const &ex) {
692       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
693     }
694     PetscCall(PetscLogGpuTimeEnd());
695     PetscCall(PetscLogGpuFlops(xin->map->n));
696     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
697     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
698   } else if (xin->map->n > 0) {
699     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
700     PetscCall(VecViennaCLGetArray(yin, &ygpu));
701     PetscCall(PetscLogGpuTimeBegin());
702     try {
703       *ygpu = *xgpu * alpha + *ygpu * beta;
704       ViennaCLWaitForGPU();
705     } catch (std::exception const &ex) {
706       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
707     }
708     PetscCall(PetscLogGpuTimeEnd());
709     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
710     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
711     PetscCall(PetscLogGpuFlops(3.0 * xin->map->n));
712   }
713   PetscFunctionReturn(PETSC_SUCCESS);
714 }
715 
716 /* operation  z = alpha * x + beta *y + gamma *z*/
VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)717 PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
718 {
719   PetscInt              n = zin->map->n;
720   const ViennaCLVector *xgpu, *ygpu;
721   ViennaCLVector       *zgpu;
722 
723   PetscFunctionBegin;
724   PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
725   PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
726   PetscCall(VecViennaCLGetArray(zin, &zgpu));
727   if (alpha == 0.0 && xin->map->n > 0) {
728     PetscCall(PetscLogGpuTimeBegin());
729     try {
730       if (beta == 0.0) {
731         *zgpu = gamma * *zgpu;
732         ViennaCLWaitForGPU();
733         PetscCall(PetscLogGpuFlops(1.0 * n));
734       } else if (gamma == 0.0) {
735         *zgpu = beta * *ygpu;
736         ViennaCLWaitForGPU();
737         PetscCall(PetscLogGpuFlops(1.0 * n));
738       } else {
739         *zgpu = beta * *ygpu + gamma * *zgpu;
740         ViennaCLWaitForGPU();
741         PetscCall(PetscLogGpuFlops(3.0 * n));
742       }
743     } catch (std::exception const &ex) {
744       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
745     }
746     PetscCall(PetscLogGpuTimeEnd());
747     PetscCall(PetscLogGpuFlops(3.0 * n));
748   } else if (beta == 0.0 && xin->map->n > 0) {
749     PetscCall(PetscLogGpuTimeBegin());
750     try {
751       if (gamma == 0.0) {
752         *zgpu = alpha * *xgpu;
753         ViennaCLWaitForGPU();
754         PetscCall(PetscLogGpuFlops(1.0 * n));
755       } else {
756         *zgpu = alpha * *xgpu + gamma * *zgpu;
757         ViennaCLWaitForGPU();
758         PetscCall(PetscLogGpuFlops(3.0 * n));
759       }
760     } catch (std::exception const &ex) {
761       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
762     }
763     PetscCall(PetscLogGpuTimeEnd());
764   } else if (gamma == 0.0 && xin->map->n > 0) {
765     PetscCall(PetscLogGpuTimeBegin());
766     try {
767       *zgpu = alpha * *xgpu + beta * *ygpu;
768       ViennaCLWaitForGPU();
769     } catch (std::exception const &ex) {
770       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
771     }
772     PetscCall(PetscLogGpuTimeEnd());
773     PetscCall(PetscLogGpuFlops(3.0 * n));
774   } else if (xin->map->n > 0) {
775     PetscCall(PetscLogGpuTimeBegin());
776     try {
777       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
778       if (gamma != 1.0) *zgpu *= gamma;
779       *zgpu += alpha * *xgpu + beta * *ygpu;
780       ViennaCLWaitForGPU();
781     } catch (std::exception const &ex) {
782       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
783     }
784     PetscCall(PetscLogGpuTimeEnd());
785     PetscCall(VecViennaCLRestoreArray(zin, &zgpu));
786     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
787     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
788     PetscCall(PetscLogGpuFlops(5.0 * n));
789   }
790   PetscFunctionReturn(PETSC_SUCCESS);
791 }
792 
VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)793 PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win, Vec xin, Vec yin)
794 {
795   PetscInt              n = win->map->n;
796   const ViennaCLVector *xgpu, *ygpu;
797   ViennaCLVector       *wgpu;
798 
799   PetscFunctionBegin;
800   if (xin->map->n > 0) {
801     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
802     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
803     PetscCall(VecViennaCLGetArray(win, &wgpu));
804     PetscCall(PetscLogGpuTimeBegin());
805     try {
806       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
807       ViennaCLWaitForGPU();
808     } catch (std::exception const &ex) {
809       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
810     }
811     PetscCall(PetscLogGpuTimeEnd());
812     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
813     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
814     PetscCall(VecViennaCLRestoreArray(win, &wgpu));
815     PetscCall(PetscLogGpuFlops(n));
816   }
817   PetscFunctionReturn(PETSC_SUCCESS);
818 }
819 
VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal * z)820 PetscErrorCode VecNorm_SeqViennaCL(Vec xin, NormType type, PetscReal *z)
821 {
822   PetscInt              n = xin->map->n;
823   PetscBLASInt          bn;
824   const ViennaCLVector *xgpu;
825 
826   PetscFunctionBegin;
827   if (xin->map->n > 0) {
828     PetscCall(PetscBLASIntCast(n, &bn));
829     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
830     if (type == NORM_2 || type == NORM_FROBENIUS) {
831       PetscCall(PetscLogGpuTimeBegin());
832       try {
833         *z = viennacl::linalg::norm_2(*xgpu);
834         ViennaCLWaitForGPU();
835       } catch (std::exception const &ex) {
836         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
837       }
838       PetscCall(PetscLogGpuTimeEnd());
839       PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
840     } else if (type == NORM_INFINITY) {
841       PetscCall(PetscLogGpuTimeBegin());
842       try {
843         *z = viennacl::linalg::norm_inf(*xgpu);
844         ViennaCLWaitForGPU();
845       } catch (std::exception const &ex) {
846         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
847       }
848       PetscCall(PetscLogGpuTimeEnd());
849     } else if (type == NORM_1) {
850       PetscCall(PetscLogGpuTimeBegin());
851       try {
852         *z = viennacl::linalg::norm_1(*xgpu);
853         ViennaCLWaitForGPU();
854       } catch (std::exception const &ex) {
855         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
856       }
857       PetscCall(PetscLogGpuTimeEnd());
858       PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
859     } else if (type == NORM_1_AND_2) {
860       PetscCall(PetscLogGpuTimeBegin());
861       try {
862         *z       = viennacl::linalg::norm_1(*xgpu);
863         *(z + 1) = viennacl::linalg::norm_2(*xgpu);
864         ViennaCLWaitForGPU();
865       } catch (std::exception const &ex) {
866         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
867       }
868       PetscCall(PetscLogGpuTimeEnd());
869       PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
870       PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
871     }
872     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
873   } else if (type == NORM_1_AND_2) {
874     *z       = 0.0;
875     *(z + 1) = 0.0;
876   } else *z = 0.0;
877   PetscFunctionReturn(PETSC_SUCCESS);
878 }
879 
VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)880 PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin, PetscRandom r)
881 {
882   PetscFunctionBegin;
883   PetscCall(VecSetRandom_SeqViennaCL_Private(xin, r));
884   xin->offloadmask = PETSC_OFFLOAD_CPU;
885   PetscFunctionReturn(PETSC_SUCCESS);
886 }
887 
VecResetArray_SeqViennaCL(Vec vin)888 PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
889 {
890   PetscFunctionBegin;
891   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
892   PetscCall(VecViennaCLCopyFromGPU(vin));
893   PetscCall(VecResetArray_SeqViennaCL_Private(vin));
894   vin->offloadmask = PETSC_OFFLOAD_CPU;
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar * a)898 PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
899 {
900   PetscFunctionBegin;
901   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
902   PetscCall(VecViennaCLCopyFromGPU(vin));
903   PetscCall(VecPlaceArray_Seq(vin, a));
904   vin->offloadmask = PETSC_OFFLOAD_CPU;
905   PetscFunctionReturn(PETSC_SUCCESS);
906 }
907 
VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar * a)908 PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
909 {
910   PetscFunctionBegin;
911   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
912   PetscCall(VecViennaCLCopyFromGPU(vin));
913   PetscCall(VecReplaceArray_Seq(vin, a));
914   vin->offloadmask = PETSC_OFFLOAD_CPU;
915   PetscFunctionReturn(PETSC_SUCCESS);
916 }
917 
918 /*@C
919   VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
920 
921   Collective
922 
923   Input Parameters:
924 + comm - the communicator, should be PETSC_COMM_SELF
925 - n    - the vector length
926 
927   Output Parameter:
928 . v - the vector
929 
930   Notes:
931   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
932   same type as an existing vector.
933 
934   Level: intermediate
935 
936 .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
937 @*/
VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec * v)938 PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm, PetscInt n, Vec *v)
939 {
940   PetscFunctionBegin;
941   PetscCall(VecCreate(comm, v));
942   PetscCall(VecSetSizes(*v, n, n));
943   PetscCall(VecSetType(*v, VECSEQVIENNACL));
944   PetscFunctionReturn(PETSC_SUCCESS);
945 }
946 
947 /*@C
948   VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
949   where the user provides the array space to store the vector values.
950 
951   Collective
952 
953   Input Parameters:
954 + comm  - the communicator, should be PETSC_COMM_SELF
955 . bs    - the block size
956 . n     - the vector length
957 - array - viennacl array where the vector elements are to be stored.
958 
959   Output Parameter:
960 . V - the vector
961 
962   Notes:
963   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
964   same type as an existing vector.
965 
966   If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
967   at a later stage to SET the array for storing the vector values.
968 
969   PETSc does NOT free the array when the vector is destroyed via VecDestroy().
970   The user should not free the array until the vector is destroyed.
971 
972   Level: intermediate
973 
974 .seealso: `VecCreateMPIViennaCLWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
975           `VecCreateGhost()`, `VecCreateSeq()`, `VecCUDAPlaceArray()`, `VecCreateSeqWithArray()`,
976           `VecCreateMPIWithArray()`
977 @*/
VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector * array,Vec * V)978 PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const ViennaCLVector *array, Vec *V) PeNS
979 {
980   PetscMPIInt size;
981 
982   PetscFunctionBegin;
983   PetscCall(VecCreate(comm, V));
984   PetscCall(VecSetSizes(*V, n, n));
985   PetscCall(VecSetBlockSize(*V, bs));
986   PetscCallMPI(MPI_Comm_size(comm, &size));
987   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
988   PetscCall(VecCreate_SeqViennaCL_Private(*V, array));
989   PetscFunctionReturn(PETSC_SUCCESS);
990 }
991 
992 /*@C
993   VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
994   the user provides the array space to store the vector values.
995 
996   Collective
997 
998   Input Parameters:
999 + comm        - the communicator, should be PETSC_COMM_SELF
1000 . bs          - the block size
1001 . n           - the vector length
1002 . cpuarray    - CPU memory where the vector elements are to be stored.
1003 - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
1004 
1005   Output Parameter:
1006 . V - the vector
1007 
1008   Notes:
1009   If both cpuarray and viennaclvec are provided, the caller must ensure that
1010   the provided arrays have identical values.
1011 
1012   PETSc does NOT free the provided arrays when the vector is destroyed via
1013   VecDestroy(). The user should not free the array until the vector is
1014   destroyed.
1015 
1016   Level: intermediate
1017 
1018 .seealso: `VecCreateMPIViennaCLWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
1019           `VecViennaCLPlaceArray()`, `VecPlaceArray()`, `VecCreateSeqCUDAWithArrays()`,
1020           `VecViennaCLAllocateCheckHost()`
1021 @*/
VecCreateSeqViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const ViennaCLVector * viennaclvec,Vec * V)1022 PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *V) PeNS
1023 {
1024   PetscMPIInt size;
1025 
1026   PetscFunctionBegin;
1027   PetscCallMPI(MPI_Comm_size(comm, &size));
1028   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
1029 
1030   // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1031   PetscCall(VecCreateSeqViennaCLWithArray(comm, bs, n, viennaclvec, V));
1032 
1033   if (cpuarray && viennaclvec) {
1034     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
1035     s->array          = (PetscScalar *)cpuarray;
1036     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1037   } else if (cpuarray) {
1038     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
1039     s->array          = (PetscScalar *)cpuarray;
1040     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1041   } else if (viennaclvec) {
1042     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1043   } else {
1044     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1045   }
1046   PetscFunctionReturn(PETSC_SUCCESS);
1047 }
1048 
1049 /*@C
1050   VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1051   the one provided by the user. This is useful to avoid a copy.
1052 
1053   Not Collective
1054 
1055   Input Parameters:
1056 + vin - the vector
1057 - a   - the ViennaCL vector
1058 
1059   Notes:
1060   You can return to the original viennacl vector with a call to `VecViennaCLResetArray()`.
1061   It is not possible to use `VecViennaCLPlaceArray()` and `VecPlaceArray()` at the same time on
1062   the same vector.
1063 
1064   Level: intermediate
1065 
1066 .seealso: `VecPlaceArray()`, `VecSetValues()`, `VecViennaCLResetArray()`,
1067           `VecCUDAPlaceArray()`,
1068 
1069 @*/
VecViennaCLPlaceArray(Vec vin,const ViennaCLVector * a)1070 PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin, const ViennaCLVector *a)
1071 {
1072   PetscFunctionBegin;
1073   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1074   PetscCall(VecViennaCLCopyToGPU(vin));
1075   PetscCheck(!((Vec_Seq *)vin->data)->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1076   ((Vec_Seq *)vin->data)->unplacedarray  = (PetscScalar *)((Vec_ViennaCL *)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1077   ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)a;
1078   vin->offloadmask                       = PETSC_OFFLOAD_GPU;
1079   PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1080   PetscFunctionReturn(PETSC_SUCCESS);
1081 }
1082 
1083 /*@C
1084   VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1085   after the use of VecViennaCLPlaceArray().
1086 
1087   Not Collective
1088 
1089   Input Parameters:
1090 . vin - the vector
1091 
1092   Level: developer
1093 
1094 .seealso: `VecViennaCLPlaceArray()`, `VecResetArray()`, `VecCUDAResetArray()`, `VecPlaceArray()`
1095 @*/
VecViennaCLResetArray(Vec vin)1096 PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1097 {
1098   PetscFunctionBegin;
1099   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1100   PetscCall(VecViennaCLCopyToGPU(vin));
1101   ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)((Vec_Seq *)vin->data)->unplacedarray;
1102   ((Vec_Seq *)vin->data)->unplacedarray  = 0;
1103   vin->offloadmask                       = PETSC_OFFLOAD_GPU;
1104   PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1105   PetscFunctionReturn(PETSC_SUCCESS);
1106 }
1107 
1108 /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1109  *
1110  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1111  */
VecDotNorm2_SeqViennaCL(Vec s,Vec t,PetscScalar * dp,PetscScalar * nm)1112 PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1113 {
1114   PetscFunctionBegin;
1115   PetscCall(VecDot_SeqViennaCL(s, t, dp));
1116   PetscCall(VecNorm_SeqViennaCL(t, NORM_2, nm));
1117   *nm *= *nm; //squared norm required
1118   PetscFunctionReturn(PETSC_SUCCESS);
1119 }
1120 
VecDuplicate_SeqViennaCL(Vec win,Vec * V)1121 PetscErrorCode VecDuplicate_SeqViennaCL(Vec win, Vec *V)
1122 {
1123   PetscFunctionBegin;
1124   PetscCall(VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win), win->map->n, V));
1125   PetscCall(PetscLayoutReference(win->map, &(*V)->map));
1126   PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*V)->olist));
1127   PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*V)->qlist));
1128   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1129   PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131 
VecDestroy_SeqViennaCL(Vec v)1132 PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1133 {
1134   PetscFunctionBegin;
1135   try {
1136     if (v->spptr) {
1137       delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
1138       delete (Vec_ViennaCL *)v->spptr;
1139     }
1140   } catch (char *ex) {
1141     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
1142   }
1143   PetscCall(VecDestroy_SeqViennaCL_Private(v));
1144   PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146 
VecGetArray_SeqViennaCL(Vec v,PetscScalar ** a)1147 PetscErrorCode VecGetArray_SeqViennaCL(Vec v, PetscScalar **a)
1148 {
1149   PetscFunctionBegin;
1150   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1151     PetscCall(VecViennaCLCopyFromGPU(v));
1152   } else {
1153     PetscCall(VecViennaCLAllocateCheckHost(v));
1154   }
1155   *a = *((PetscScalar **)v->data);
1156   PetscFunctionReturn(PETSC_SUCCESS);
1157 }
1158 
VecRestoreArray_SeqViennaCL(Vec v,PetscScalar ** a)1159 PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v, PetscScalar **a)
1160 {
1161   PetscFunctionBegin;
1162   v->offloadmask = PETSC_OFFLOAD_CPU;
1163   PetscFunctionReturn(PETSC_SUCCESS);
1164 }
1165 
VecGetArrayWrite_SeqViennaCL(Vec v,PetscScalar ** a)1166 PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v, PetscScalar **a)
1167 {
1168   PetscFunctionBegin;
1169   PetscCall(VecViennaCLAllocateCheckHost(v));
1170   *a = *((PetscScalar **)v->data);
1171   PetscFunctionReturn(PETSC_SUCCESS);
1172 }
1173 
VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)1174 static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V, PetscBool flg)
1175 {
1176   PetscFunctionBegin;
1177   V->boundtocpu = flg;
1178   if (flg) {
1179     PetscCall(VecViennaCLCopyFromGPU(V));
1180     V->offloadmask          = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1181     V->ops->dot             = VecDot_Seq;
1182     V->ops->norm            = VecNorm_Seq;
1183     V->ops->tdot            = VecTDot_Seq;
1184     V->ops->scale           = VecScale_Seq;
1185     V->ops->copy            = VecCopy_Seq;
1186     V->ops->set             = VecSet_Seq;
1187     V->ops->swap            = VecSwap_Seq;
1188     V->ops->axpy            = VecAXPY_Seq;
1189     V->ops->axpby           = VecAXPBY_Seq;
1190     V->ops->axpbypcz        = VecAXPBYPCZ_Seq;
1191     V->ops->pointwisemult   = VecPointwiseMult_Seq;
1192     V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1193     V->ops->setrandom       = VecSetRandom_Seq;
1194     V->ops->dot_local       = VecDot_Seq;
1195     V->ops->tdot_local      = VecTDot_Seq;
1196     V->ops->norm_local      = VecNorm_Seq;
1197     V->ops->mdot_local      = VecMDot_Seq;
1198     V->ops->mtdot_local     = VecMTDot_Seq;
1199     V->ops->maxpy           = VecMAXPY_Seq;
1200     V->ops->mdot            = VecMDot_Seq;
1201     V->ops->mtdot           = VecMTDot_Seq;
1202     V->ops->aypx            = VecAYPX_Seq;
1203     V->ops->waxpy           = VecWAXPY_Seq;
1204     V->ops->dotnorm2        = NULL;
1205     V->ops->placearray      = VecPlaceArray_Seq;
1206     V->ops->replacearray    = VecReplaceArray_Seq;
1207     V->ops->resetarray      = VecResetArray_Seq;
1208     V->ops->duplicate       = VecDuplicate_Seq;
1209   } else {
1210     V->ops->dot             = VecDot_SeqViennaCL;
1211     V->ops->norm            = VecNorm_SeqViennaCL;
1212     V->ops->tdot            = VecTDot_SeqViennaCL;
1213     V->ops->scale           = VecScale_SeqViennaCL;
1214     V->ops->copy            = VecCopy_SeqViennaCL;
1215     V->ops->set             = VecSet_SeqViennaCL;
1216     V->ops->swap            = VecSwap_SeqViennaCL;
1217     V->ops->axpy            = VecAXPY_SeqViennaCL;
1218     V->ops->axpby           = VecAXPBY_SeqViennaCL;
1219     V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1220     V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1221     V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1222     V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1223     V->ops->dot_local       = VecDot_SeqViennaCL;
1224     V->ops->tdot_local      = VecTDot_SeqViennaCL;
1225     V->ops->norm_local      = VecNorm_SeqViennaCL;
1226     V->ops->mdot_local      = VecMDot_SeqViennaCL;
1227     V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1228     V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1229     V->ops->mdot            = VecMDot_SeqViennaCL;
1230     V->ops->mtdot           = VecMTDot_SeqViennaCL;
1231     V->ops->aypx            = VecAYPX_SeqViennaCL;
1232     V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1233     V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1234     V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1235     V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1236     V->ops->resetarray      = VecResetArray_SeqViennaCL;
1237     V->ops->destroy         = VecDestroy_SeqViennaCL;
1238     V->ops->duplicate       = VecDuplicate_SeqViennaCL;
1239     V->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
1240     V->ops->getarray        = VecGetArray_SeqViennaCL;
1241     V->ops->restorearray    = VecRestoreArray_SeqViennaCL;
1242   }
1243   V->ops->duplicatevecs = VecDuplicateVecs_Default;
1244   PetscFunctionReturn(PETSC_SUCCESS);
1245 }
1246 
VecCreate_SeqViennaCL(Vec V)1247 PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1248 {
1249   PetscMPIInt size;
1250 
1251   PetscFunctionBegin;
1252   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1253   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1254   PetscCall(VecCreate_Seq_Private(V, 0));
1255   PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1256 
1257   PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1258   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1259 
1260   PetscCall(VecViennaCLAllocateCheck(V));
1261   PetscCall(VecSet_SeqViennaCL(V, 0.0));
1262   PetscFunctionReturn(PETSC_SUCCESS);
1263 }
1264 
1265 /*@C
1266   VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1267 
1268   Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1269   invoking clReleaseContext().
1270 
1271   Input Parameter:
1272 . v - the vector
1273 
1274   Output Parameter:
1275 . ctx - pointer to the underlying CL context
1276 
1277   Level: intermediate
1278 
1279 .seealso: `VecViennaCLGetCLQueue()`, `VecViennaCLGetCLMemRead()`
1280 @*/
VecViennaCLGetCLContext(Vec v,PETSC_UINTPTR_T * ctx)1281 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T *ctx)
1282 {
1283 #if !defined(PETSC_HAVE_OPENCL)
1284   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_context.");
1285 #else
1286 
1287   PetscFunctionBegin;
1288   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1289   const ViennaCLVector *v_vcl;
1290   PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1291   try {
1292     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1293     const cl_context       ocl_ctx = vcl_ctx.handle().get();
1294     clRetainContext(ocl_ctx);
1295     *ctx = (PETSC_UINTPTR_T)ocl_ctx;
1296   } catch (std::exception const &ex) {
1297     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1298   }
1299   PetscFunctionReturn(PETSC_SUCCESS);
1300 #endif
1301 }
1302 
1303 /*@C
1304   VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1305   operations of the Vec are enqueued.
1306 
1307   Caller should cast (*queue) to (const cl_command_queue). Caller is
1308   responsible for invoking clReleaseCommandQueue().
1309 
1310   Input Parameter:
1311 . v - the vector
1312 
1313   Output Parameter:
1314 . queue - pointer to the CL command queue
1315 
1316   Level: intermediate
1317 
1318 .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemRead()`
1319 @*/
VecViennaCLGetCLQueue(Vec v,PETSC_UINTPTR_T * queue)1320 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T *queue)
1321 {
1322 #if !defined(PETSC_HAVE_OPENCL)
1323   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1324 #else
1325   PetscFunctionBegin;
1326   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1327   const ViennaCLVector *v_vcl;
1328   PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1329   try {
1330     viennacl::ocl::context              vcl_ctx   = v_vcl->handle().opencl_handle().context();
1331     const viennacl::ocl::command_queue &vcl_queue = vcl_ctx.current_queue();
1332     const cl_command_queue              ocl_queue = vcl_queue.handle().get();
1333     clRetainCommandQueue(ocl_queue);
1334     *queue = (PETSC_UINTPTR_T)ocl_queue;
1335   } catch (std::exception const &ex) {
1336     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1337   }
1338   PetscFunctionReturn(PETSC_SUCCESS);
1339 #endif
1340 }
1341 
1342 /*@C
1343   VecViennaCLGetCLMemRead - Provides access to the CL buffer inside a Vec.
1344 
1345   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1346   invoking clReleaseMemObject().
1347 
1348   Input Parameter:
1349 . v - the vector
1350 
1351   Output Parameter:
1352 . mem - pointer to the device buffer
1353 
1354   Level: intermediate
1355 
1356 .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1357 @*/
VecViennaCLGetCLMemRead(Vec v,PETSC_UINTPTR_T * mem)1358 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T *mem)
1359 {
1360 #if !defined(PETSC_HAVE_OPENCL)
1361   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1362 #else
1363   PetscFunctionBegin;
1364   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1365   const ViennaCLVector *v_vcl;
1366   PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1367   try {
1368     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1369     clRetainMemObject(ocl_mem);
1370     *mem = (PETSC_UINTPTR_T)ocl_mem;
1371   } catch (std::exception const &ex) {
1372     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1373   }
1374   PetscFunctionReturn(PETSC_SUCCESS);
1375 #endif
1376 }
1377 
1378 /*@C
1379   VecViennaCLGetCLMemWrite - Provides access to the CL buffer inside a Vec.
1380 
1381   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1382   invoking clReleaseMemObject().
1383 
1384   The device pointer has to be released by calling
1385   VecViennaCLRestoreCLMemWrite().  Upon restoring the vector data the data on
1386   the host will be marked as out of date.  A subsequent access of the host data
1387   will thus incur a data transfer from the device to the host.
1388 
1389   Input Parameter:
1390 . v - the vector
1391 
1392   Output Parameter:
1393 . mem - pointer to the device buffer
1394 
1395   Level: intermediate
1396 
1397 .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMemWrite()`
1398 @*/
VecViennaCLGetCLMemWrite(Vec v,PETSC_UINTPTR_T * mem)1399 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T *mem)
1400 {
1401 #if !defined(PETSC_HAVE_OPENCL)
1402   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1403 #else
1404   PetscFunctionBegin;
1405   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1406   ViennaCLVector *v_vcl;
1407   PetscCall(VecViennaCLGetArrayWrite(v, &v_vcl));
1408   try {
1409     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1410     clRetainMemObject(ocl_mem);
1411     *mem = (PETSC_UINTPTR_T)ocl_mem;
1412   } catch (std::exception const &ex) {
1413     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1414   }
1415   PetscFunctionReturn(PETSC_SUCCESS);
1416 #endif
1417 }
1418 
1419 /*@C
1420   VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1421   acquired with VecViennaCLGetCLMemWrite().
1422 
1423   This marks the host data as out of date.  Subsequent access to the
1424   vector data on the host side with for instance VecGetArray() incurs a
1425   data transfer.
1426 
1427   Input Parameter:
1428 . v - the vector
1429 
1430   Level: intermediate
1431 
1432 .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1433 @*/
VecViennaCLRestoreCLMemWrite(Vec v)1434 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1435 {
1436 #if !defined(PETSC_HAVE_OPENCL)
1437   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1438 #else
1439   PetscFunctionBegin;
1440   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1441   PetscCall(VecViennaCLRestoreArrayWrite(v, NULL));
1442   PetscFunctionReturn(PETSC_SUCCESS);
1443 #endif
1444 }
1445 
1446 /*@C
1447   VecViennaCLGetCLMem - Provides access to the CL buffer inside a Vec.
1448 
1449   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1450   invoking clReleaseMemObject().
1451 
1452   The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1453   Upon restoring the vector data the data on the host will be marked as out of
1454   date.  A subsequent access of the host data will thus incur a data transfer
1455   from the device to the host.
1456 
1457   Input Parameter:
1458 . v - the vector
1459 
1460   Output Parameter:
1461 . mem - pointer to the device buffer
1462 
1463   Level: intermediate
1464 
1465 .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMem()`
1466 @*/
VecViennaCLGetCLMem(Vec v,PETSC_UINTPTR_T * mem)1467 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T *mem)
1468 {
1469 #if !defined(PETSC_HAVE_OPENCL)
1470   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1471 #else
1472   PetscFunctionBegin;
1473   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1474   ViennaCLVector *v_vcl;
1475   PetscCall(VecViennaCLGetArray(v, &v_vcl));
1476   try {
1477     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1478     clRetainMemObject(ocl_mem);
1479     *mem = (PETSC_UINTPTR_T)ocl_mem;
1480   } catch (std::exception const &ex) {
1481     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1482   }
1483   PetscFunctionReturn(PETSC_SUCCESS);
1484 #endif
1485 }
1486 
1487 /*@C
1488   VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1489   acquired with VecViennaCLGetCLMem().
1490 
1491   This marks the host data as out of date. Subsequent access to the vector
1492   data on the host side with for instance VecGetArray() incurs a data
1493   transfer.
1494 
1495   Input Parameter:
1496 . v - the vector
1497 
1498   Level: intermediate
1499 
1500 .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMem()`
1501 @*/
VecViennaCLRestoreCLMem(Vec v)1502 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1503 {
1504 #if !defined(PETSC_HAVE_OPENCL)
1505   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1506 #else
1507   PetscFunctionBegin;
1508   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1509   PetscCall(VecViennaCLRestoreArray(v, NULL));
1510   PetscFunctionReturn(PETSC_SUCCESS);
1511 #endif
1512 }
1513 
VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector * array)1514 PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V, const ViennaCLVector *array)
1515 {
1516   Vec_ViennaCL *vecviennacl;
1517   PetscMPIInt   size;
1518 
1519   PetscFunctionBegin;
1520   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1521   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1522   PetscCall(VecCreate_Seq_Private(V, 0));
1523   PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1524   PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1525   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1526 
1527   if (array) {
1528     if (!V->spptr) V->spptr = new Vec_ViennaCL;
1529     vecviennacl                     = (Vec_ViennaCL *)V->spptr;
1530     vecviennacl->GPUarray_allocated = 0;
1531     vecviennacl->GPUarray           = (ViennaCLVector *)array;
1532     V->offloadmask                  = PETSC_OFFLOAD_UNALLOCATED;
1533   }
1534   PetscFunctionReturn(PETSC_SUCCESS);
1535 }
1536