1 /*
2 Implements the sequential ViennaCL vectors.
3 */
4
5 #include <petscconf.h>
6 #include <petsc/private/vecimpl.h> /*I "petscvec.h" I*/
7 #include <../src/vec/vec/impls/dvecimpl.h>
8 #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
9
10 #include "viennacl/linalg/inner_prod.hpp"
11 #include "viennacl/linalg/norm_1.hpp"
12 #include "viennacl/linalg/norm_2.hpp"
13 #include "viennacl/linalg/norm_inf.hpp"
14
15 #ifdef VIENNACL_WITH_OPENCL
16 #include "viennacl/ocl/backend.hpp"
17 #endif
18
VecViennaCLGetArray(Vec v,ViennaCLVector ** a)19 PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
20 {
21 PetscFunctionBegin;
22 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
23 *a = 0;
24 PetscCall(VecViennaCLCopyToGPU(v));
25 *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
26 ViennaCLWaitForGPU();
27 PetscFunctionReturn(PETSC_SUCCESS);
28 }
29
VecViennaCLRestoreArray(Vec v,ViennaCLVector ** a)30 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
31 {
32 PetscFunctionBegin;
33 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
34 v->offloadmask = PETSC_OFFLOAD_GPU;
35
36 PetscCall(PetscObjectStateIncrease((PetscObject)v));
37 PetscFunctionReturn(PETSC_SUCCESS);
38 }
39
VecViennaCLGetArrayRead(Vec v,const ViennaCLVector ** a)40 PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
41 {
42 PetscFunctionBegin;
43 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
44 *a = 0;
45 PetscCall(VecViennaCLCopyToGPU(v));
46 *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
47 ViennaCLWaitForGPU();
48 PetscFunctionReturn(PETSC_SUCCESS);
49 }
50
VecViennaCLRestoreArrayRead(Vec v,const ViennaCLVector ** a)51 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
52 {
53 PetscFunctionBegin;
54 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
55 PetscFunctionReturn(PETSC_SUCCESS);
56 }
57
VecViennaCLGetArrayWrite(Vec v,ViennaCLVector ** a)58 PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
59 {
60 PetscFunctionBegin;
61 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
62 *a = 0;
63 PetscCall(VecViennaCLAllocateCheck(v));
64 *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
65 ViennaCLWaitForGPU();
66 PetscFunctionReturn(PETSC_SUCCESS);
67 }
68
VecViennaCLRestoreArrayWrite(Vec v,ViennaCLVector ** a)69 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
70 {
71 PetscFunctionBegin;
72 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
73 v->offloadmask = PETSC_OFFLOAD_GPU;
74
75 PetscCall(PetscObjectStateIncrease((PetscObject)v));
76 PetscFunctionReturn(PETSC_SUCCESS);
77 }
78
PetscViennaCLInit()79 PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
80 {
81 char string[20];
82 PetscBool flg, flg_cuda, flg_opencl, flg_openmp;
83
84 PetscFunctionBegin;
85 /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
86 PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_backend", string, sizeof(string), &flg));
87 if (flg) {
88 try {
89 PetscCall(PetscStrcasecmp(string, "cuda", &flg_cuda));
90 PetscCall(PetscStrcasecmp(string, "opencl", &flg_opencl));
91 PetscCall(PetscStrcasecmp(string, "openmp", &flg_openmp));
92
93 /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
94 if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
95 #if defined(PETSC_HAVE_CUDA)
96 else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
97 #endif
98 #if defined(PETSC_HAVE_OPENCL)
99 else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
100 #endif
101 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
102 } catch (std::exception const &ex) {
103 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
104 }
105 }
106
107 #if defined(PETSC_HAVE_OPENCL)
108 /* ViennaCL OpenCL device type configuration */
109 PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_opencl_device_type", string, sizeof(string), &flg));
110 if (flg) {
111 try {
112 PetscCall(PetscStrcasecmp(string, "cpu", &flg));
113 if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);
114
115 PetscCall(PetscStrcasecmp(string, "gpu", &flg));
116 if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);
117
118 PetscCall(PetscStrcasecmp(string, "accelerator", &flg));
119 if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
120 } catch (std::exception const &ex) {
121 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
122 }
123 }
124 #endif
125
126 /* Print available backends */
127 PetscCall(PetscOptionsHasName(NULL, NULL, "-viennacl_view", &flg));
128 if (flg) {
129 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: "));
130 #if defined(PETSC_HAVE_CUDA)
131 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA, "));
132 #endif
133 #if defined(PETSC_HAVE_OPENCL)
134 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL, "));
135 #endif
136 #if defined(PETSC_HAVE_OPENMP)
137 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
138 #else
139 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) "));
140 #endif
141 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
142
143 /* Print selected backends */
144 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend selected: "));
145 #if defined(PETSC_HAVE_CUDA)
146 if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA "));
147 #endif
148 #if defined(PETSC_HAVE_OPENCL)
149 if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL "));
150 #endif
151 #if defined(PETSC_HAVE_OPENMP)
152 if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
153 #else
154 if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) "));
155 #endif
156 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
157 }
158 PetscFunctionReturn(PETSC_SUCCESS);
159 }
160
161 /*
162 Allocates space for the vector array on the Host if it does not exist.
163 Does NOT change the PetscViennaCLFlag for the vector
164 Does NOT zero the ViennaCL array
165 */
VecViennaCLAllocateCheckHost(Vec v)166 PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
167 {
168 PetscScalar *array;
169 Vec_Seq *s;
170 PetscInt n = v->map->n;
171
172 PetscFunctionBegin;
173 s = (Vec_Seq *)v->data;
174 PetscCall(VecViennaCLAllocateCheck(v));
175 if (s->array == 0) {
176 PetscCall(PetscMalloc1(n, &array));
177 s->array = array;
178 s->array_allocated = array;
179 }
180 PetscFunctionReturn(PETSC_SUCCESS);
181 }
182
183 /*
184 Allocates space for the vector array on the GPU if it does not exist.
185 Does NOT change the PetscViennaCLFlag for the vector
186 Does NOT zero the ViennaCL array
187
188 */
VecViennaCLAllocateCheck(Vec v)189 PetscErrorCode VecViennaCLAllocateCheck(Vec v)
190 {
191 PetscFunctionBegin;
192 if (!v->spptr) {
193 try {
194 v->spptr = new Vec_ViennaCL;
195 ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
196 ((Vec_ViennaCL *)v->spptr)->GPUarray = ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
197
198 } catch (std::exception const &ex) {
199 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
200 }
201 }
202 PetscFunctionReturn(PETSC_SUCCESS);
203 }
204
205 /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
VecViennaCLCopyToGPU(Vec v)206 PetscErrorCode VecViennaCLCopyToGPU(Vec v)
207 {
208 PetscFunctionBegin;
209 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
210 PetscCall(VecViennaCLAllocateCheck(v));
211 if (v->map->n > 0) {
212 if (v->offloadmask == PETSC_OFFLOAD_CPU) {
213 PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
214 try {
215 ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
216 viennacl::fast_copy(*(PetscScalar **)v->data, *(PetscScalar **)v->data + v->map->n, vec->begin());
217 ViennaCLWaitForGPU();
218 } catch (std::exception const &ex) {
219 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
220 }
221 PetscCall(PetscLogCpuToGpu(v->map->n * sizeof(PetscScalar)));
222 PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
223 v->offloadmask = PETSC_OFFLOAD_BOTH;
224 }
225 }
226 PetscFunctionReturn(PETSC_SUCCESS);
227 }
228
229 /*
230 VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
231 */
VecViennaCLCopyFromGPU(Vec v)232 PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
233 {
234 PetscFunctionBegin;
235 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
236 PetscCall(VecViennaCLAllocateCheckHost(v));
237 if (v->offloadmask == PETSC_OFFLOAD_GPU) {
238 PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
239 try {
240 ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
241 viennacl::fast_copy(vec->begin(), vec->end(), *(PetscScalar **)v->data);
242 ViennaCLWaitForGPU();
243 } catch (std::exception const &ex) {
244 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
245 }
246 PetscCall(PetscLogGpuToCpu(v->map->n * sizeof(PetscScalar)));
247 PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
248 v->offloadmask = PETSC_OFFLOAD_BOTH;
249 }
250 PetscFunctionReturn(PETSC_SUCCESS);
251 }
252
253 /* Copy on CPU */
VecCopy_SeqViennaCL_Private(Vec xin,Vec yin)254 static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin, Vec yin)
255 {
256 PetscScalar *ya;
257 const PetscScalar *xa;
258
259 PetscFunctionBegin;
260 PetscCall(VecViennaCLAllocateCheckHost(xin));
261 PetscCall(VecViennaCLAllocateCheckHost(yin));
262 if (xin != yin) {
263 PetscCall(VecGetArrayRead(xin, &xa));
264 PetscCall(VecGetArray(yin, &ya));
265 PetscCall(PetscArraycpy(ya, xa, xin->map->n));
266 PetscCall(VecRestoreArrayRead(xin, &xa));
267 PetscCall(VecRestoreArray(yin, &ya));
268 }
269 PetscFunctionReturn(PETSC_SUCCESS);
270 }
271
VecSetRandom_SeqViennaCL_Private(Vec xin,PetscRandom r)272 static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin, PetscRandom r)
273 {
274 PetscInt n = xin->map->n, i;
275 PetscScalar *xx;
276
277 PetscFunctionBegin;
278 PetscCall(VecGetArray(xin, &xx));
279 for (i = 0; i < n; i++) PetscCall(PetscRandomGetValue(r, &xx[i]));
280 PetscCall(VecRestoreArray(xin, &xx));
281 PetscFunctionReturn(PETSC_SUCCESS);
282 }
283
VecDestroy_SeqViennaCL_Private(Vec v)284 static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
285 {
286 Vec_Seq *vs = (Vec_Seq *)v->data;
287
288 PetscFunctionBegin;
289 PetscCall(PetscObjectSAWsViewOff(v));
290 PetscCall(PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n));
291 if (vs->array_allocated) PetscCall(PetscFree(vs->array_allocated));
292 PetscCall(PetscFree(vs));
293 PetscFunctionReturn(PETSC_SUCCESS);
294 }
295
VecResetArray_SeqViennaCL_Private(Vec vin)296 static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
297 {
298 Vec_Seq *v = (Vec_Seq *)vin->data;
299
300 PetscFunctionBegin;
301 v->array = v->unplacedarray;
302 v->unplacedarray = 0;
303 PetscFunctionReturn(PETSC_SUCCESS);
304 }
305
306 /*MC
307 VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL
308
309 Options Database Keys:
310 . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()
311
312 Level: beginner
313
314 .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateSeqWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateSeq()`
315 M*/
316
VecAYPX_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)317 PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
318 {
319 const ViennaCLVector *xgpu;
320 ViennaCLVector *ygpu;
321
322 PetscFunctionBegin;
323 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
324 PetscCall(VecViennaCLGetArray(yin, &ygpu));
325 PetscCall(PetscLogGpuTimeBegin());
326 try {
327 if (alpha != 0.0 && xin->map->n > 0) {
328 *ygpu = *xgpu + alpha * *ygpu;
329 PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
330 } else {
331 *ygpu = *xgpu;
332 }
333 ViennaCLWaitForGPU();
334 } catch (std::exception const &ex) {
335 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
336 }
337 PetscCall(PetscLogGpuTimeEnd());
338 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
339 PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
340 PetscFunctionReturn(PETSC_SUCCESS);
341 }
342
VecAXPY_SeqViennaCL(Vec yin,PetscScalar alpha,Vec xin)343 PetscErrorCode VecAXPY_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
344 {
345 const ViennaCLVector *xgpu;
346 ViennaCLVector *ygpu;
347
348 PetscFunctionBegin;
349 if (alpha != 0.0 && xin->map->n > 0) {
350 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
351 PetscCall(VecViennaCLGetArray(yin, &ygpu));
352 PetscCall(PetscLogGpuTimeBegin());
353 try {
354 *ygpu += alpha * *xgpu;
355 ViennaCLWaitForGPU();
356 } catch (std::exception const &ex) {
357 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
358 }
359 PetscCall(PetscLogGpuTimeEnd());
360 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
361 PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
362 PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
363 }
364 PetscFunctionReturn(PETSC_SUCCESS);
365 }
366
VecPointwiseDivide_SeqViennaCL(Vec win,Vec xin,Vec yin)367 PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
368 {
369 const ViennaCLVector *xgpu, *ygpu;
370 ViennaCLVector *wgpu;
371
372 PetscFunctionBegin;
373 if (xin->map->n > 0) {
374 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
375 PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
376 PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
377 PetscCall(PetscLogGpuTimeBegin());
378 try {
379 *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
380 ViennaCLWaitForGPU();
381 } catch (std::exception const &ex) {
382 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
383 }
384 PetscCall(PetscLogGpuTimeEnd());
385 PetscCall(PetscLogGpuFlops(win->map->n));
386 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
387 PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
388 PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
389 }
390 PetscFunctionReturn(PETSC_SUCCESS);
391 }
392
VecWAXPY_SeqViennaCL(Vec win,PetscScalar alpha,Vec xin,Vec yin)393 PetscErrorCode VecWAXPY_SeqViennaCL(Vec win, PetscScalar alpha, Vec xin, Vec yin)
394 {
395 const ViennaCLVector *xgpu, *ygpu;
396 ViennaCLVector *wgpu;
397
398 PetscFunctionBegin;
399 if (alpha == 0.0 && xin->map->n > 0) {
400 PetscCall(VecCopy_SeqViennaCL(yin, win));
401 } else {
402 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
403 PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
404 PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
405 PetscCall(PetscLogGpuTimeBegin());
406 if (alpha == 1.0) {
407 try {
408 *wgpu = *ygpu + *xgpu;
409 } catch (std::exception const &ex) {
410 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
411 }
412 PetscCall(PetscLogGpuFlops(win->map->n));
413 } else if (alpha == -1.0) {
414 try {
415 *wgpu = *ygpu - *xgpu;
416 } catch (std::exception const &ex) {
417 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
418 }
419 PetscCall(PetscLogGpuFlops(win->map->n));
420 } else {
421 try {
422 *wgpu = *ygpu + alpha * *xgpu;
423 } catch (std::exception const &ex) {
424 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
425 }
426 PetscCall(PetscLogGpuFlops(2 * win->map->n));
427 }
428 ViennaCLWaitForGPU();
429 PetscCall(PetscLogGpuTimeEnd());
430 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
431 PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
432 PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
433 }
434 PetscFunctionReturn(PETSC_SUCCESS);
435 }
436
437 /*
438 * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
439 *
440 * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
441 * hence there is an iterated application of these until the final result is obtained
442 */
VecMAXPY_SeqViennaCL(Vec xin,PetscInt nv,const PetscScalar * alpha,Vec * y)443 PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
444 {
445 PetscInt j;
446
447 PetscFunctionBegin;
448 for (j = 0; j < nv; ++j) {
449 if (j + 1 < nv) {
450 PetscCall(VecAXPBYPCZ_SeqViennaCL(xin, alpha[j], alpha[j + 1], 1.0, y[j], y[j + 1]));
451 ++j;
452 } else {
453 PetscCall(VecAXPY_SeqViennaCL(xin, alpha[j], y[j]));
454 }
455 }
456 ViennaCLWaitForGPU();
457 PetscFunctionReturn(PETSC_SUCCESS);
458 }
459
VecDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar * z)460 PetscErrorCode VecDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
461 {
462 const ViennaCLVector *xgpu, *ygpu;
463
464 PetscFunctionBegin;
465 if (xin->map->n > 0) {
466 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
467 PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
468 PetscCall(PetscLogGpuTimeBegin());
469 try {
470 *z = viennacl::linalg::inner_prod(*xgpu, *ygpu);
471 } catch (std::exception const &ex) {
472 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
473 }
474 ViennaCLWaitForGPU();
475 PetscCall(PetscLogGpuTimeEnd());
476 if (xin->map->n > 0) PetscCall(PetscLogGpuFlops(2.0 * xin->map->n - 1));
477 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
478 PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
479 } else *z = 0.0;
480 PetscFunctionReturn(PETSC_SUCCESS);
481 }
482
483 /*
484 * Operation z[j] = dot(x, y[j])
485 *
486 * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
487 */
VecMDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar * z)488 PetscErrorCode VecMDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
489 {
490 PetscInt n = xin->map->n, i;
491 const ViennaCLVector *xgpu, *ygpu;
492 Vec *yyin = (Vec *)yin;
493 std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);
494
495 PetscFunctionBegin;
496 if (xin->map->n > 0) {
497 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
498 for (i = 0; i < nv; i++) {
499 PetscCall(VecViennaCLGetArrayRead(yyin[i], &ygpu));
500 ygpu_array[i] = ygpu;
501 }
502 PetscCall(PetscLogGpuTimeBegin());
503 viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
504 ViennaCLVector result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
505 viennacl::copy(result.begin(), result.end(), z);
506 for (i = 0; i < nv; i++) PetscCall(VecViennaCLRestoreArrayRead(yyin[i], &ygpu));
507 ViennaCLWaitForGPU();
508 PetscCall(PetscLogGpuTimeEnd());
509 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
510 PetscCall(PetscLogGpuFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
511 } else {
512 for (i = 0; i < nv; i++) z[i] = 0.0;
513 }
514 PetscFunctionReturn(PETSC_SUCCESS);
515 }
516
VecMTDot_SeqViennaCL(Vec xin,PetscInt nv,const Vec yin[],PetscScalar * z)517 static PetscErrorCode VecMTDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
518 {
519 PetscFunctionBegin;
520 /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
521 PetscCall(VecMDot_SeqViennaCL(xin, nv, yin, z));
522 ViennaCLWaitForGPU();
523 PetscFunctionReturn(PETSC_SUCCESS);
524 }
525
VecSet_SeqViennaCL(Vec xin,PetscScalar alpha)526 PetscErrorCode VecSet_SeqViennaCL(Vec xin, PetscScalar alpha)
527 {
528 ViennaCLVector *xgpu;
529
530 PetscFunctionBegin;
531 if (xin->map->n > 0) {
532 PetscCall(VecViennaCLGetArrayWrite(xin, &xgpu));
533 PetscCall(PetscLogGpuTimeBegin());
534 try {
535 *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
536 ViennaCLWaitForGPU();
537 } catch (std::exception const &ex) {
538 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
539 }
540 PetscCall(PetscLogGpuTimeEnd());
541 PetscCall(VecViennaCLRestoreArrayWrite(xin, &xgpu));
542 }
543 PetscFunctionReturn(PETSC_SUCCESS);
544 }
545
VecScale_SeqViennaCL(Vec xin,PetscScalar alpha)546 PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
547 {
548 ViennaCLVector *xgpu;
549
550 PetscFunctionBegin;
551 if (alpha == 0.0 && xin->map->n > 0) {
552 PetscCall(VecSet_SeqViennaCL(xin, alpha));
553 PetscCall(PetscLogGpuFlops(xin->map->n));
554 } else if (alpha != 1.0 && xin->map->n > 0) {
555 PetscCall(VecViennaCLGetArray(xin, &xgpu));
556 PetscCall(PetscLogGpuTimeBegin());
557 try {
558 *xgpu *= alpha;
559 ViennaCLWaitForGPU();
560 } catch (std::exception const &ex) {
561 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
562 }
563 PetscCall(PetscLogGpuTimeEnd());
564 PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
565 PetscCall(PetscLogGpuFlops(xin->map->n));
566 }
567 PetscFunctionReturn(PETSC_SUCCESS);
568 }
569
VecTDot_SeqViennaCL(Vec xin,Vec yin,PetscScalar * z)570 PetscErrorCode VecTDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
571 {
572 PetscFunctionBegin;
573 /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
574 PetscCall(VecDot_SeqViennaCL(xin, yin, z));
575 ViennaCLWaitForGPU();
576 PetscFunctionReturn(PETSC_SUCCESS);
577 }
578
VecCopy_SeqViennaCL(Vec xin,Vec yin)579 PetscErrorCode VecCopy_SeqViennaCL(Vec xin, Vec yin)
580 {
581 const ViennaCLVector *xgpu;
582 ViennaCLVector *ygpu;
583
584 PetscFunctionBegin;
585 if (xin != yin && xin->map->n > 0) {
586 if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
587 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
588 PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
589 PetscCall(PetscLogGpuTimeBegin());
590 try {
591 *ygpu = *xgpu;
592 ViennaCLWaitForGPU();
593 } catch (std::exception const &ex) {
594 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
595 }
596 PetscCall(PetscLogGpuTimeEnd());
597 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
598 PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
599
600 } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
601 /* copy in CPU if we are on the CPU*/
602 PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
603 ViennaCLWaitForGPU();
604 } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
605 /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
606 if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
607 /* copy in CPU */
608 PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
609 ViennaCLWaitForGPU();
610 } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
611 /* copy in GPU */
612 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
613 PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
614 PetscCall(PetscLogGpuTimeBegin());
615 try {
616 *ygpu = *xgpu;
617 ViennaCLWaitForGPU();
618 } catch (std::exception const &ex) {
619 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
620 }
621 PetscCall(PetscLogGpuTimeEnd());
622 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
623 PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
624 } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
625 /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
626 default to copy in GPU (this is an arbitrary choice) */
627 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
628 PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
629 PetscCall(PetscLogGpuTimeBegin());
630 try {
631 *ygpu = *xgpu;
632 ViennaCLWaitForGPU();
633 } catch (std::exception const &ex) {
634 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
635 }
636 PetscCall(PetscLogGpuTimeEnd());
637 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
638 PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
639 } else {
640 PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
641 ViennaCLWaitForGPU();
642 }
643 }
644 }
645 PetscFunctionReturn(PETSC_SUCCESS);
646 }
647
VecSwap_SeqViennaCL(Vec xin,Vec yin)648 PetscErrorCode VecSwap_SeqViennaCL(Vec xin, Vec yin)
649 {
650 ViennaCLVector *xgpu, *ygpu;
651
652 PetscFunctionBegin;
653 if (xin != yin && xin->map->n > 0) {
654 PetscCall(VecViennaCLGetArray(xin, &xgpu));
655 PetscCall(VecViennaCLGetArray(yin, &ygpu));
656 PetscCall(PetscLogGpuTimeBegin());
657 try {
658 viennacl::swap(*xgpu, *ygpu);
659 ViennaCLWaitForGPU();
660 } catch (std::exception const &ex) {
661 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
662 }
663 PetscCall(PetscLogGpuTimeEnd());
664 PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
665 PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
666 }
667 PetscFunctionReturn(PETSC_SUCCESS);
668 }
669
670 // y = alpha * x + beta * y
VecAXPBY_SeqViennaCL(Vec yin,PetscScalar alpha,PetscScalar beta,Vec xin)671 PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
672 {
673 PetscScalar a = alpha, b = beta;
674 const ViennaCLVector *xgpu;
675 ViennaCLVector *ygpu;
676
677 PetscFunctionBegin;
678 if (a == 0.0 && xin->map->n > 0) {
679 PetscCall(VecScale_SeqViennaCL(yin, beta));
680 } else if (b == 1.0 && xin->map->n > 0) {
681 PetscCall(VecAXPY_SeqViennaCL(yin, alpha, xin));
682 } else if (a == 1.0 && xin->map->n > 0) {
683 PetscCall(VecAYPX_SeqViennaCL(yin, beta, xin));
684 } else if (b == 0.0 && xin->map->n > 0) {
685 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
686 PetscCall(VecViennaCLGetArray(yin, &ygpu));
687 PetscCall(PetscLogGpuTimeBegin());
688 try {
689 *ygpu = *xgpu * alpha;
690 ViennaCLWaitForGPU();
691 } catch (std::exception const &ex) {
692 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
693 }
694 PetscCall(PetscLogGpuTimeEnd());
695 PetscCall(PetscLogGpuFlops(xin->map->n));
696 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
697 PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
698 } else if (xin->map->n > 0) {
699 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
700 PetscCall(VecViennaCLGetArray(yin, &ygpu));
701 PetscCall(PetscLogGpuTimeBegin());
702 try {
703 *ygpu = *xgpu * alpha + *ygpu * beta;
704 ViennaCLWaitForGPU();
705 } catch (std::exception const &ex) {
706 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
707 }
708 PetscCall(PetscLogGpuTimeEnd());
709 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
710 PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
711 PetscCall(PetscLogGpuFlops(3.0 * xin->map->n));
712 }
713 PetscFunctionReturn(PETSC_SUCCESS);
714 }
715
716 /* operation z = alpha * x + beta *y + gamma *z*/
VecAXPBYPCZ_SeqViennaCL(Vec zin,PetscScalar alpha,PetscScalar beta,PetscScalar gamma,Vec xin,Vec yin)717 PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
718 {
719 PetscInt n = zin->map->n;
720 const ViennaCLVector *xgpu, *ygpu;
721 ViennaCLVector *zgpu;
722
723 PetscFunctionBegin;
724 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
725 PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
726 PetscCall(VecViennaCLGetArray(zin, &zgpu));
727 if (alpha == 0.0 && xin->map->n > 0) {
728 PetscCall(PetscLogGpuTimeBegin());
729 try {
730 if (beta == 0.0) {
731 *zgpu = gamma * *zgpu;
732 ViennaCLWaitForGPU();
733 PetscCall(PetscLogGpuFlops(1.0 * n));
734 } else if (gamma == 0.0) {
735 *zgpu = beta * *ygpu;
736 ViennaCLWaitForGPU();
737 PetscCall(PetscLogGpuFlops(1.0 * n));
738 } else {
739 *zgpu = beta * *ygpu + gamma * *zgpu;
740 ViennaCLWaitForGPU();
741 PetscCall(PetscLogGpuFlops(3.0 * n));
742 }
743 } catch (std::exception const &ex) {
744 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
745 }
746 PetscCall(PetscLogGpuTimeEnd());
747 PetscCall(PetscLogGpuFlops(3.0 * n));
748 } else if (beta == 0.0 && xin->map->n > 0) {
749 PetscCall(PetscLogGpuTimeBegin());
750 try {
751 if (gamma == 0.0) {
752 *zgpu = alpha * *xgpu;
753 ViennaCLWaitForGPU();
754 PetscCall(PetscLogGpuFlops(1.0 * n));
755 } else {
756 *zgpu = alpha * *xgpu + gamma * *zgpu;
757 ViennaCLWaitForGPU();
758 PetscCall(PetscLogGpuFlops(3.0 * n));
759 }
760 } catch (std::exception const &ex) {
761 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
762 }
763 PetscCall(PetscLogGpuTimeEnd());
764 } else if (gamma == 0.0 && xin->map->n > 0) {
765 PetscCall(PetscLogGpuTimeBegin());
766 try {
767 *zgpu = alpha * *xgpu + beta * *ygpu;
768 ViennaCLWaitForGPU();
769 } catch (std::exception const &ex) {
770 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
771 }
772 PetscCall(PetscLogGpuTimeEnd());
773 PetscCall(PetscLogGpuFlops(3.0 * n));
774 } else if (xin->map->n > 0) {
775 PetscCall(PetscLogGpuTimeBegin());
776 try {
777 /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
778 if (gamma != 1.0) *zgpu *= gamma;
779 *zgpu += alpha * *xgpu + beta * *ygpu;
780 ViennaCLWaitForGPU();
781 } catch (std::exception const &ex) {
782 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
783 }
784 PetscCall(PetscLogGpuTimeEnd());
785 PetscCall(VecViennaCLRestoreArray(zin, &zgpu));
786 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
787 PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
788 PetscCall(PetscLogGpuFlops(5.0 * n));
789 }
790 PetscFunctionReturn(PETSC_SUCCESS);
791 }
792
VecPointwiseMult_SeqViennaCL(Vec win,Vec xin,Vec yin)793 PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win, Vec xin, Vec yin)
794 {
795 PetscInt n = win->map->n;
796 const ViennaCLVector *xgpu, *ygpu;
797 ViennaCLVector *wgpu;
798
799 PetscFunctionBegin;
800 if (xin->map->n > 0) {
801 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
802 PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
803 PetscCall(VecViennaCLGetArray(win, &wgpu));
804 PetscCall(PetscLogGpuTimeBegin());
805 try {
806 *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
807 ViennaCLWaitForGPU();
808 } catch (std::exception const &ex) {
809 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
810 }
811 PetscCall(PetscLogGpuTimeEnd());
812 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
813 PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
814 PetscCall(VecViennaCLRestoreArray(win, &wgpu));
815 PetscCall(PetscLogGpuFlops(n));
816 }
817 PetscFunctionReturn(PETSC_SUCCESS);
818 }
819
VecNorm_SeqViennaCL(Vec xin,NormType type,PetscReal * z)820 PetscErrorCode VecNorm_SeqViennaCL(Vec xin, NormType type, PetscReal *z)
821 {
822 PetscInt n = xin->map->n;
823 PetscBLASInt bn;
824 const ViennaCLVector *xgpu;
825
826 PetscFunctionBegin;
827 if (xin->map->n > 0) {
828 PetscCall(PetscBLASIntCast(n, &bn));
829 PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
830 if (type == NORM_2 || type == NORM_FROBENIUS) {
831 PetscCall(PetscLogGpuTimeBegin());
832 try {
833 *z = viennacl::linalg::norm_2(*xgpu);
834 ViennaCLWaitForGPU();
835 } catch (std::exception const &ex) {
836 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
837 }
838 PetscCall(PetscLogGpuTimeEnd());
839 PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
840 } else if (type == NORM_INFINITY) {
841 PetscCall(PetscLogGpuTimeBegin());
842 try {
843 *z = viennacl::linalg::norm_inf(*xgpu);
844 ViennaCLWaitForGPU();
845 } catch (std::exception const &ex) {
846 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
847 }
848 PetscCall(PetscLogGpuTimeEnd());
849 } else if (type == NORM_1) {
850 PetscCall(PetscLogGpuTimeBegin());
851 try {
852 *z = viennacl::linalg::norm_1(*xgpu);
853 ViennaCLWaitForGPU();
854 } catch (std::exception const &ex) {
855 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
856 }
857 PetscCall(PetscLogGpuTimeEnd());
858 PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
859 } else if (type == NORM_1_AND_2) {
860 PetscCall(PetscLogGpuTimeBegin());
861 try {
862 *z = viennacl::linalg::norm_1(*xgpu);
863 *(z + 1) = viennacl::linalg::norm_2(*xgpu);
864 ViennaCLWaitForGPU();
865 } catch (std::exception const &ex) {
866 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
867 }
868 PetscCall(PetscLogGpuTimeEnd());
869 PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
870 PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
871 }
872 PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
873 } else if (type == NORM_1_AND_2) {
874 *z = 0.0;
875 *(z + 1) = 0.0;
876 } else *z = 0.0;
877 PetscFunctionReturn(PETSC_SUCCESS);
878 }
879
VecSetRandom_SeqViennaCL(Vec xin,PetscRandom r)880 PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin, PetscRandom r)
881 {
882 PetscFunctionBegin;
883 PetscCall(VecSetRandom_SeqViennaCL_Private(xin, r));
884 xin->offloadmask = PETSC_OFFLOAD_CPU;
885 PetscFunctionReturn(PETSC_SUCCESS);
886 }
887
VecResetArray_SeqViennaCL(Vec vin)888 PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
889 {
890 PetscFunctionBegin;
891 PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
892 PetscCall(VecViennaCLCopyFromGPU(vin));
893 PetscCall(VecResetArray_SeqViennaCL_Private(vin));
894 vin->offloadmask = PETSC_OFFLOAD_CPU;
895 PetscFunctionReturn(PETSC_SUCCESS);
896 }
897
VecPlaceArray_SeqViennaCL(Vec vin,const PetscScalar * a)898 PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
899 {
900 PetscFunctionBegin;
901 PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
902 PetscCall(VecViennaCLCopyFromGPU(vin));
903 PetscCall(VecPlaceArray_Seq(vin, a));
904 vin->offloadmask = PETSC_OFFLOAD_CPU;
905 PetscFunctionReturn(PETSC_SUCCESS);
906 }
907
VecReplaceArray_SeqViennaCL(Vec vin,const PetscScalar * a)908 PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
909 {
910 PetscFunctionBegin;
911 PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
912 PetscCall(VecViennaCLCopyFromGPU(vin));
913 PetscCall(VecReplaceArray_Seq(vin, a));
914 vin->offloadmask = PETSC_OFFLOAD_CPU;
915 PetscFunctionReturn(PETSC_SUCCESS);
916 }
917
918 /*@C
919 VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.
920
921 Collective
922
923 Input Parameters:
924 + comm - the communicator, should be PETSC_COMM_SELF
925 - n - the vector length
926
927 Output Parameter:
928 . v - the vector
929
930 Notes:
931 Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
932 same type as an existing vector.
933
934 Level: intermediate
935
936 .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
937 @*/
VecCreateSeqViennaCL(MPI_Comm comm,PetscInt n,Vec * v)938 PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm, PetscInt n, Vec *v)
939 {
940 PetscFunctionBegin;
941 PetscCall(VecCreate(comm, v));
942 PetscCall(VecSetSizes(*v, n, n));
943 PetscCall(VecSetType(*v, VECSEQVIENNACL));
944 PetscFunctionReturn(PETSC_SUCCESS);
945 }
946
947 /*@C
948 VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
949 where the user provides the array space to store the vector values.
950
951 Collective
952
953 Input Parameters:
954 + comm - the communicator, should be PETSC_COMM_SELF
955 . bs - the block size
956 . n - the vector length
957 - array - viennacl array where the vector elements are to be stored.
958
959 Output Parameter:
960 . V - the vector
961
962 Notes:
963 Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
964 same type as an existing vector.
965
966 If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
967 at a later stage to SET the array for storing the vector values.
968
969 PETSc does NOT free the array when the vector is destroyed via VecDestroy().
970 The user should not free the array until the vector is destroyed.
971
972 Level: intermediate
973
974 .seealso: `VecCreateMPIViennaCLWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
975 `VecCreateGhost()`, `VecCreateSeq()`, `VecCUDAPlaceArray()`, `VecCreateSeqWithArray()`,
976 `VecCreateMPIWithArray()`
977 @*/
VecCreateSeqViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const ViennaCLVector * array,Vec * V)978 PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const ViennaCLVector *array, Vec *V) PeNS
979 {
980 PetscMPIInt size;
981
982 PetscFunctionBegin;
983 PetscCall(VecCreate(comm, V));
984 PetscCall(VecSetSizes(*V, n, n));
985 PetscCall(VecSetBlockSize(*V, bs));
986 PetscCallMPI(MPI_Comm_size(comm, &size));
987 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
988 PetscCall(VecCreate_SeqViennaCL_Private(*V, array));
989 PetscFunctionReturn(PETSC_SUCCESS);
990 }
991
992 /*@C
993 VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
994 the user provides the array space to store the vector values.
995
996 Collective
997
998 Input Parameters:
999 + comm - the communicator, should be PETSC_COMM_SELF
1000 . bs - the block size
1001 . n - the vector length
1002 . cpuarray - CPU memory where the vector elements are to be stored.
1003 - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
1004
1005 Output Parameter:
1006 . V - the vector
1007
1008 Notes:
1009 If both cpuarray and viennaclvec are provided, the caller must ensure that
1010 the provided arrays have identical values.
1011
1012 PETSc does NOT free the provided arrays when the vector is destroyed via
1013 VecDestroy(). The user should not free the array until the vector is
1014 destroyed.
1015
1016 Level: intermediate
1017
1018 .seealso: `VecCreateMPIViennaCLWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
1019 `VecViennaCLPlaceArray()`, `VecPlaceArray()`, `VecCreateSeqCUDAWithArrays()`,
1020 `VecViennaCLAllocateCheckHost()`
1021 @*/
VecCreateSeqViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar cpuarray[],const ViennaCLVector * viennaclvec,Vec * V)1022 PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *V) PeNS
1023 {
1024 PetscMPIInt size;
1025
1026 PetscFunctionBegin;
1027 PetscCallMPI(MPI_Comm_size(comm, &size));
1028 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
1029
1030 // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1031 PetscCall(VecCreateSeqViennaCLWithArray(comm, bs, n, viennaclvec, V));
1032
1033 if (cpuarray && viennaclvec) {
1034 Vec_Seq *s = (Vec_Seq *)((*V)->data);
1035 s->array = (PetscScalar *)cpuarray;
1036 (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1037 } else if (cpuarray) {
1038 Vec_Seq *s = (Vec_Seq *)((*V)->data);
1039 s->array = (PetscScalar *)cpuarray;
1040 (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1041 } else if (viennaclvec) {
1042 (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1043 } else {
1044 (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1045 }
1046 PetscFunctionReturn(PETSC_SUCCESS);
1047 }
1048
1049 /*@C
1050 VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1051 the one provided by the user. This is useful to avoid a copy.
1052
1053 Not Collective
1054
1055 Input Parameters:
1056 + vin - the vector
1057 - a - the ViennaCL vector
1058
1059 Notes:
1060 You can return to the original viennacl vector with a call to `VecViennaCLResetArray()`.
1061 It is not possible to use `VecViennaCLPlaceArray()` and `VecPlaceArray()` at the same time on
1062 the same vector.
1063
1064 Level: intermediate
1065
1066 .seealso: `VecPlaceArray()`, `VecSetValues()`, `VecViennaCLResetArray()`,
1067 `VecCUDAPlaceArray()`,
1068
1069 @*/
VecViennaCLPlaceArray(Vec vin,const ViennaCLVector * a)1070 PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin, const ViennaCLVector *a)
1071 {
1072 PetscFunctionBegin;
1073 PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1074 PetscCall(VecViennaCLCopyToGPU(vin));
1075 PetscCheck(!((Vec_Seq *)vin->data)->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1076 ((Vec_Seq *)vin->data)->unplacedarray = (PetscScalar *)((Vec_ViennaCL *)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1077 ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)a;
1078 vin->offloadmask = PETSC_OFFLOAD_GPU;
1079 PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1080 PetscFunctionReturn(PETSC_SUCCESS);
1081 }
1082
1083 /*@C
1084 VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1085 after the use of VecViennaCLPlaceArray().
1086
1087 Not Collective
1088
1089 Input Parameters:
1090 . vin - the vector
1091
1092 Level: developer
1093
1094 .seealso: `VecViennaCLPlaceArray()`, `VecResetArray()`, `VecCUDAResetArray()`, `VecPlaceArray()`
1095 @*/
VecViennaCLResetArray(Vec vin)1096 PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1097 {
1098 PetscFunctionBegin;
1099 PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1100 PetscCall(VecViennaCLCopyToGPU(vin));
1101 ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)((Vec_Seq *)vin->data)->unplacedarray;
1102 ((Vec_Seq *)vin->data)->unplacedarray = 0;
1103 vin->offloadmask = PETSC_OFFLOAD_GPU;
1104 PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1105 PetscFunctionReturn(PETSC_SUCCESS);
1106 }
1107
1108 /* VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1109 *
1110 * Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1111 */
VecDotNorm2_SeqViennaCL(Vec s,Vec t,PetscScalar * dp,PetscScalar * nm)1112 PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1113 {
1114 PetscFunctionBegin;
1115 PetscCall(VecDot_SeqViennaCL(s, t, dp));
1116 PetscCall(VecNorm_SeqViennaCL(t, NORM_2, nm));
1117 *nm *= *nm; //squared norm required
1118 PetscFunctionReturn(PETSC_SUCCESS);
1119 }
1120
VecDuplicate_SeqViennaCL(Vec win,Vec * V)1121 PetscErrorCode VecDuplicate_SeqViennaCL(Vec win, Vec *V)
1122 {
1123 PetscFunctionBegin;
1124 PetscCall(VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win), win->map->n, V));
1125 PetscCall(PetscLayoutReference(win->map, &(*V)->map));
1126 PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*V)->olist));
1127 PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*V)->qlist));
1128 (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1129 PetscFunctionReturn(PETSC_SUCCESS);
1130 }
1131
VecDestroy_SeqViennaCL(Vec v)1132 PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1133 {
1134 PetscFunctionBegin;
1135 try {
1136 if (v->spptr) {
1137 delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
1138 delete (Vec_ViennaCL *)v->spptr;
1139 }
1140 } catch (char *ex) {
1141 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
1142 }
1143 PetscCall(VecDestroy_SeqViennaCL_Private(v));
1144 PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146
VecGetArray_SeqViennaCL(Vec v,PetscScalar ** a)1147 PetscErrorCode VecGetArray_SeqViennaCL(Vec v, PetscScalar **a)
1148 {
1149 PetscFunctionBegin;
1150 if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1151 PetscCall(VecViennaCLCopyFromGPU(v));
1152 } else {
1153 PetscCall(VecViennaCLAllocateCheckHost(v));
1154 }
1155 *a = *((PetscScalar **)v->data);
1156 PetscFunctionReturn(PETSC_SUCCESS);
1157 }
1158
VecRestoreArray_SeqViennaCL(Vec v,PetscScalar ** a)1159 PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v, PetscScalar **a)
1160 {
1161 PetscFunctionBegin;
1162 v->offloadmask = PETSC_OFFLOAD_CPU;
1163 PetscFunctionReturn(PETSC_SUCCESS);
1164 }
1165
VecGetArrayWrite_SeqViennaCL(Vec v,PetscScalar ** a)1166 PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v, PetscScalar **a)
1167 {
1168 PetscFunctionBegin;
1169 PetscCall(VecViennaCLAllocateCheckHost(v));
1170 *a = *((PetscScalar **)v->data);
1171 PetscFunctionReturn(PETSC_SUCCESS);
1172 }
1173
VecBindToCPU_SeqAIJViennaCL(Vec V,PetscBool flg)1174 static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V, PetscBool flg)
1175 {
1176 PetscFunctionBegin;
1177 V->boundtocpu = flg;
1178 if (flg) {
1179 PetscCall(VecViennaCLCopyFromGPU(V));
1180 V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1181 V->ops->dot = VecDot_Seq;
1182 V->ops->norm = VecNorm_Seq;
1183 V->ops->tdot = VecTDot_Seq;
1184 V->ops->scale = VecScale_Seq;
1185 V->ops->copy = VecCopy_Seq;
1186 V->ops->set = VecSet_Seq;
1187 V->ops->swap = VecSwap_Seq;
1188 V->ops->axpy = VecAXPY_Seq;
1189 V->ops->axpby = VecAXPBY_Seq;
1190 V->ops->axpbypcz = VecAXPBYPCZ_Seq;
1191 V->ops->pointwisemult = VecPointwiseMult_Seq;
1192 V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1193 V->ops->setrandom = VecSetRandom_Seq;
1194 V->ops->dot_local = VecDot_Seq;
1195 V->ops->tdot_local = VecTDot_Seq;
1196 V->ops->norm_local = VecNorm_Seq;
1197 V->ops->mdot_local = VecMDot_Seq;
1198 V->ops->mtdot_local = VecMTDot_Seq;
1199 V->ops->maxpy = VecMAXPY_Seq;
1200 V->ops->mdot = VecMDot_Seq;
1201 V->ops->mtdot = VecMTDot_Seq;
1202 V->ops->aypx = VecAYPX_Seq;
1203 V->ops->waxpy = VecWAXPY_Seq;
1204 V->ops->dotnorm2 = NULL;
1205 V->ops->placearray = VecPlaceArray_Seq;
1206 V->ops->replacearray = VecReplaceArray_Seq;
1207 V->ops->resetarray = VecResetArray_Seq;
1208 V->ops->duplicate = VecDuplicate_Seq;
1209 } else {
1210 V->ops->dot = VecDot_SeqViennaCL;
1211 V->ops->norm = VecNorm_SeqViennaCL;
1212 V->ops->tdot = VecTDot_SeqViennaCL;
1213 V->ops->scale = VecScale_SeqViennaCL;
1214 V->ops->copy = VecCopy_SeqViennaCL;
1215 V->ops->set = VecSet_SeqViennaCL;
1216 V->ops->swap = VecSwap_SeqViennaCL;
1217 V->ops->axpy = VecAXPY_SeqViennaCL;
1218 V->ops->axpby = VecAXPBY_SeqViennaCL;
1219 V->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
1220 V->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
1221 V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1222 V->ops->setrandom = VecSetRandom_SeqViennaCL;
1223 V->ops->dot_local = VecDot_SeqViennaCL;
1224 V->ops->tdot_local = VecTDot_SeqViennaCL;
1225 V->ops->norm_local = VecNorm_SeqViennaCL;
1226 V->ops->mdot_local = VecMDot_SeqViennaCL;
1227 V->ops->mtdot_local = VecMTDot_SeqViennaCL;
1228 V->ops->maxpy = VecMAXPY_SeqViennaCL;
1229 V->ops->mdot = VecMDot_SeqViennaCL;
1230 V->ops->mtdot = VecMTDot_SeqViennaCL;
1231 V->ops->aypx = VecAYPX_SeqViennaCL;
1232 V->ops->waxpy = VecWAXPY_SeqViennaCL;
1233 V->ops->dotnorm2 = VecDotNorm2_SeqViennaCL;
1234 V->ops->placearray = VecPlaceArray_SeqViennaCL;
1235 V->ops->replacearray = VecReplaceArray_SeqViennaCL;
1236 V->ops->resetarray = VecResetArray_SeqViennaCL;
1237 V->ops->destroy = VecDestroy_SeqViennaCL;
1238 V->ops->duplicate = VecDuplicate_SeqViennaCL;
1239 V->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
1240 V->ops->getarray = VecGetArray_SeqViennaCL;
1241 V->ops->restorearray = VecRestoreArray_SeqViennaCL;
1242 }
1243 V->ops->duplicatevecs = VecDuplicateVecs_Default;
1244 PetscFunctionReturn(PETSC_SUCCESS);
1245 }
1246
VecCreate_SeqViennaCL(Vec V)1247 PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1248 {
1249 PetscMPIInt size;
1250
1251 PetscFunctionBegin;
1252 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1253 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1254 PetscCall(VecCreate_Seq_Private(V, 0));
1255 PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1256
1257 PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1258 V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1259
1260 PetscCall(VecViennaCLAllocateCheck(V));
1261 PetscCall(VecSet_SeqViennaCL(V, 0.0));
1262 PetscFunctionReturn(PETSC_SUCCESS);
1263 }
1264
1265 /*@C
1266 VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.
1267
1268 Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1269 invoking clReleaseContext().
1270
1271 Input Parameter:
1272 . v - the vector
1273
1274 Output Parameter:
1275 . ctx - pointer to the underlying CL context
1276
1277 Level: intermediate
1278
1279 .seealso: `VecViennaCLGetCLQueue()`, `VecViennaCLGetCLMemRead()`
1280 @*/
VecViennaCLGetCLContext(Vec v,PETSC_UINTPTR_T * ctx)1281 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T *ctx)
1282 {
1283 #if !defined(PETSC_HAVE_OPENCL)
1284 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_context.");
1285 #else
1286
1287 PetscFunctionBegin;
1288 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1289 const ViennaCLVector *v_vcl;
1290 PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1291 try {
1292 viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1293 const cl_context ocl_ctx = vcl_ctx.handle().get();
1294 clRetainContext(ocl_ctx);
1295 *ctx = (PETSC_UINTPTR_T)ocl_ctx;
1296 } catch (std::exception const &ex) {
1297 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1298 }
1299 PetscFunctionReturn(PETSC_SUCCESS);
1300 #endif
1301 }
1302
1303 /*@C
1304 VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1305 operations of the Vec are enqueued.
1306
1307 Caller should cast (*queue) to (const cl_command_queue). Caller is
1308 responsible for invoking clReleaseCommandQueue().
1309
1310 Input Parameter:
1311 . v - the vector
1312
1313 Output Parameter:
1314 . queue - pointer to the CL command queue
1315
1316 Level: intermediate
1317
1318 .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemRead()`
1319 @*/
VecViennaCLGetCLQueue(Vec v,PETSC_UINTPTR_T * queue)1320 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T *queue)
1321 {
1322 #if !defined(PETSC_HAVE_OPENCL)
1323 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1324 #else
1325 PetscFunctionBegin;
1326 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1327 const ViennaCLVector *v_vcl;
1328 PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1329 try {
1330 viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1331 const viennacl::ocl::command_queue &vcl_queue = vcl_ctx.current_queue();
1332 const cl_command_queue ocl_queue = vcl_queue.handle().get();
1333 clRetainCommandQueue(ocl_queue);
1334 *queue = (PETSC_UINTPTR_T)ocl_queue;
1335 } catch (std::exception const &ex) {
1336 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1337 }
1338 PetscFunctionReturn(PETSC_SUCCESS);
1339 #endif
1340 }
1341
1342 /*@C
1343 VecViennaCLGetCLMemRead - Provides access to the CL buffer inside a Vec.
1344
1345 Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1346 invoking clReleaseMemObject().
1347
1348 Input Parameter:
1349 . v - the vector
1350
1351 Output Parameter:
1352 . mem - pointer to the device buffer
1353
1354 Level: intermediate
1355
1356 .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1357 @*/
VecViennaCLGetCLMemRead(Vec v,PETSC_UINTPTR_T * mem)1358 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T *mem)
1359 {
1360 #if !defined(PETSC_HAVE_OPENCL)
1361 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1362 #else
1363 PetscFunctionBegin;
1364 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1365 const ViennaCLVector *v_vcl;
1366 PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1367 try {
1368 const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1369 clRetainMemObject(ocl_mem);
1370 *mem = (PETSC_UINTPTR_T)ocl_mem;
1371 } catch (std::exception const &ex) {
1372 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1373 }
1374 PetscFunctionReturn(PETSC_SUCCESS);
1375 #endif
1376 }
1377
1378 /*@C
1379 VecViennaCLGetCLMemWrite - Provides access to the CL buffer inside a Vec.
1380
1381 Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1382 invoking clReleaseMemObject().
1383
1384 The device pointer has to be released by calling
1385 VecViennaCLRestoreCLMemWrite(). Upon restoring the vector data the data on
1386 the host will be marked as out of date. A subsequent access of the host data
1387 will thus incur a data transfer from the device to the host.
1388
1389 Input Parameter:
1390 . v - the vector
1391
1392 Output Parameter:
1393 . mem - pointer to the device buffer
1394
1395 Level: intermediate
1396
1397 .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMemWrite()`
1398 @*/
VecViennaCLGetCLMemWrite(Vec v,PETSC_UINTPTR_T * mem)1399 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T *mem)
1400 {
1401 #if !defined(PETSC_HAVE_OPENCL)
1402 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1403 #else
1404 PetscFunctionBegin;
1405 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1406 ViennaCLVector *v_vcl;
1407 PetscCall(VecViennaCLGetArrayWrite(v, &v_vcl));
1408 try {
1409 const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1410 clRetainMemObject(ocl_mem);
1411 *mem = (PETSC_UINTPTR_T)ocl_mem;
1412 } catch (std::exception const &ex) {
1413 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1414 }
1415 PetscFunctionReturn(PETSC_SUCCESS);
1416 #endif
1417 }
1418
1419 /*@C
1420 VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1421 acquired with VecViennaCLGetCLMemWrite().
1422
1423 This marks the host data as out of date. Subsequent access to the
1424 vector data on the host side with for instance VecGetArray() incurs a
1425 data transfer.
1426
1427 Input Parameter:
1428 . v - the vector
1429
1430 Level: intermediate
1431
1432 .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1433 @*/
VecViennaCLRestoreCLMemWrite(Vec v)1434 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1435 {
1436 #if !defined(PETSC_HAVE_OPENCL)
1437 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1438 #else
1439 PetscFunctionBegin;
1440 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1441 PetscCall(VecViennaCLRestoreArrayWrite(v, NULL));
1442 PetscFunctionReturn(PETSC_SUCCESS);
1443 #endif
1444 }
1445
1446 /*@C
1447 VecViennaCLGetCLMem - Provides access to the CL buffer inside a Vec.
1448
1449 Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1450 invoking clReleaseMemObject().
1451
1452 The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1453 Upon restoring the vector data the data on the host will be marked as out of
1454 date. A subsequent access of the host data will thus incur a data transfer
1455 from the device to the host.
1456
1457 Input Parameter:
1458 . v - the vector
1459
1460 Output Parameter:
1461 . mem - pointer to the device buffer
1462
1463 Level: intermediate
1464
1465 .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMem()`
1466 @*/
VecViennaCLGetCLMem(Vec v,PETSC_UINTPTR_T * mem)1467 PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T *mem)
1468 {
1469 #if !defined(PETSC_HAVE_OPENCL)
1470 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1471 #else
1472 PetscFunctionBegin;
1473 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1474 ViennaCLVector *v_vcl;
1475 PetscCall(VecViennaCLGetArray(v, &v_vcl));
1476 try {
1477 const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1478 clRetainMemObject(ocl_mem);
1479 *mem = (PETSC_UINTPTR_T)ocl_mem;
1480 } catch (std::exception const &ex) {
1481 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1482 }
1483 PetscFunctionReturn(PETSC_SUCCESS);
1484 #endif
1485 }
1486
1487 /*@C
1488 VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1489 acquired with VecViennaCLGetCLMem().
1490
1491 This marks the host data as out of date. Subsequent access to the vector
1492 data on the host side with for instance VecGetArray() incurs a data
1493 transfer.
1494
1495 Input Parameter:
1496 . v - the vector
1497
1498 Level: intermediate
1499
1500 .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMem()`
1501 @*/
VecViennaCLRestoreCLMem(Vec v)1502 PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1503 {
1504 #if !defined(PETSC_HAVE_OPENCL)
1505 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1506 #else
1507 PetscFunctionBegin;
1508 PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1509 PetscCall(VecViennaCLRestoreArray(v, NULL));
1510 PetscFunctionReturn(PETSC_SUCCESS);
1511 #endif
1512 }
1513
VecCreate_SeqViennaCL_Private(Vec V,const ViennaCLVector * array)1514 PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V, const ViennaCLVector *array)
1515 {
1516 Vec_ViennaCL *vecviennacl;
1517 PetscMPIInt size;
1518
1519 PetscFunctionBegin;
1520 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1521 PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1522 PetscCall(VecCreate_Seq_Private(V, 0));
1523 PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1524 PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1525 V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;
1526
1527 if (array) {
1528 if (!V->spptr) V->spptr = new Vec_ViennaCL;
1529 vecviennacl = (Vec_ViennaCL *)V->spptr;
1530 vecviennacl->GPUarray_allocated = 0;
1531 vecviennacl->GPUarray = (ViennaCLVector *)array;
1532 V->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1533 }
1534 PetscFunctionReturn(PETSC_SUCCESS);
1535 }
1536