1 /*
2 This file contains routines for Parallel vector operations.
3 */
4 #include <petscconf.h>
5 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/
6 #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
7
8 /*MC
9 VECVIENNACL - VECVIENNACL = "viennacl" - A VECSEQVIENNACL on a single-process communicator, and VECMPIVIENNACL otherwise.
10
11 Options Database Keys:
12 . -vec_type viennacl - sets the vector type to VECVIENNACL during a call to VecSetFromOptions()
13
14 Level: beginner
15
16 .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQVIENNACL`, `VECMPIVIENNACL`, `VECSTANDARD`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
17 M*/
18
VecDestroy_MPIViennaCL(Vec v)19 static PetscErrorCode VecDestroy_MPIViennaCL(Vec v)
20 {
21 PetscFunctionBegin;
22 try {
23 if (v->spptr) {
24 delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
25 delete (Vec_ViennaCL *)v->spptr;
26 }
27 } catch (std::exception const &ex) {
28 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
29 }
30 PetscCall(VecDestroy_MPI(v));
31 PetscFunctionReturn(PETSC_SUCCESS);
32 }
33
VecNorm_MPIViennaCL(Vec xin,NormType type,PetscReal * z)34 static PetscErrorCode VecNorm_MPIViennaCL(Vec xin, NormType type, PetscReal *z)
35 {
36 PetscReal sum, work = 0.0;
37
38 PetscFunctionBegin;
39 if (type == NORM_2 || type == NORM_FROBENIUS) {
40 PetscCall(VecNorm_SeqViennaCL(xin, NORM_2, &work));
41 work *= work;
42 PetscCallMPI(MPIU_Allreduce(&work, &sum, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
43 *z = PetscSqrtReal(sum);
44 } else if (type == NORM_1) {
45 /* Find the local part */
46 PetscCall(VecNorm_SeqViennaCL(xin, NORM_1, &work));
47 /* Find the global max */
48 PetscCallMPI(MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
49 } else if (type == NORM_INFINITY) {
50 /* Find the local max */
51 PetscCall(VecNorm_SeqViennaCL(xin, NORM_INFINITY, &work));
52 /* Find the global max */
53 PetscCallMPI(MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)xin)));
54 } else if (type == NORM_1_AND_2) {
55 PetscReal temp[2];
56 PetscCall(VecNorm_SeqViennaCL(xin, NORM_1, temp));
57 PetscCall(VecNorm_SeqViennaCL(xin, NORM_2, temp + 1));
58 temp[1] = temp[1] * temp[1];
59 PetscCallMPI(MPIU_Allreduce(temp, z, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
60 z[1] = PetscSqrtReal(z[1]);
61 }
62 PetscFunctionReturn(PETSC_SUCCESS);
63 }
64
VecDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar * z)65 static PetscErrorCode VecDot_MPIViennaCL(Vec xin, Vec yin, PetscScalar *z)
66 {
67 PetscScalar sum, work;
68
69 PetscFunctionBegin;
70 PetscCall(VecDot_SeqViennaCL(xin, yin, &work));
71 PetscCallMPI(MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
72 *z = sum;
73 PetscFunctionReturn(PETSC_SUCCESS);
74 }
75
VecTDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar * z)76 static PetscErrorCode VecTDot_MPIViennaCL(Vec xin, Vec yin, PetscScalar *z)
77 {
78 PetscScalar sum, work;
79
80 PetscFunctionBegin;
81 PetscCall(VecTDot_SeqViennaCL(xin, yin, &work));
82 PetscCallMPI(MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
83 *z = sum;
84 PetscFunctionReturn(PETSC_SUCCESS);
85 }
86
VecMDot_MPIViennaCL(Vec xin,PetscInt nv,const Vec y[],PetscScalar * z)87 static PetscErrorCode VecMDot_MPIViennaCL(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
88 {
89 PetscScalar awork[128], *work = awork;
90
91 PetscFunctionBegin;
92 if (nv > 128) PetscCall(PetscMalloc1(nv, &work));
93 PetscCall(VecMDot_SeqViennaCL(xin, nv, y, work));
94 PetscCallMPI(MPIU_Allreduce(work, z, nv, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
95 if (nv > 128) PetscCall(PetscFree(work));
96 PetscFunctionReturn(PETSC_SUCCESS);
97 }
98
99 /*MC
100 VECMPIVIENNACL - VECMPIVIENNACL = "mpiviennacl" - The basic parallel vector, modified to use ViennaCL
101
102 Options Database Keys:
103 . -vec_type mpiviennacl - sets the vector type to VECMPIVIENNACL during a call to VecSetFromOptions()
104
105 Level: beginner
106
107 .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
108 M*/
109
VecDuplicate_MPIViennaCL(Vec win,Vec * v)110 static PetscErrorCode VecDuplicate_MPIViennaCL(Vec win, Vec *v)
111 {
112 Vec_MPI *vw, *w = (Vec_MPI *)win->data;
113 PetscScalar *array;
114
115 PetscFunctionBegin;
116 PetscCall(VecCreate(PetscObjectComm((PetscObject)win), v));
117 PetscCall(PetscLayoutReference(win->map, &(*v)->map));
118
119 PetscCall(VecCreate_MPI_Private(*v, PETSC_FALSE, w->nghost, 0));
120 vw = (Vec_MPI *)(*v)->data;
121 (*v)->ops[0] = win->ops[0];
122
123 /* save local representation of the parallel vector (and scatter) if it exists */
124 if (w->localrep) {
125 PetscCall(VecGetArray(*v, &array));
126 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, win->map->n + w->nghost, array, &vw->localrep));
127 vw->localrep->ops[0] = w->localrep->ops[0];
128 PetscCall(VecRestoreArray(*v, &array));
129 vw->localupdate = w->localupdate;
130 if (vw->localupdate) PetscCall(PetscObjectReference((PetscObject)vw->localupdate));
131 }
132
133 /* New vector should inherit stashing property of parent */
134 (*v)->stash.donotstash = win->stash.donotstash;
135 (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
136
137 /* change type_name appropriately */
138 PetscCall(PetscObjectChangeTypeName((PetscObject)*v, VECMPIVIENNACL));
139
140 PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*v)->olist));
141 PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*v)->qlist));
142 (*v)->map->bs = win->map->bs;
143 (*v)->bstash.bs = win->bstash.bs;
144 PetscFunctionReturn(PETSC_SUCCESS);
145 }
146
VecDotNorm2_MPIViennaCL(Vec s,Vec t,PetscScalar * dp,PetscScalar * nm)147 static PetscErrorCode VecDotNorm2_MPIViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
148 {
149 PetscScalar work[2], sum[2];
150
151 PetscFunctionBegin;
152 PetscCall(VecDotNorm2_SeqViennaCL(s, t, work, work + 1));
153 PetscCallMPI(MPIU_Allreduce((void *)&work, (void *)&sum, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)s)));
154 *dp = sum[0];
155 *nm = sum[1];
156 PetscFunctionReturn(PETSC_SUCCESS);
157 }
158
VecBindToCPU_MPIViennaCL(Vec vv,PetscBool bind)159 static PetscErrorCode VecBindToCPU_MPIViennaCL(Vec vv, PetscBool bind)
160 {
161 PetscFunctionBegin;
162 vv->boundtocpu = bind;
163
164 if (bind) {
165 PetscCall(VecViennaCLCopyFromGPU(vv));
166 vv->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
167 vv->ops->dotnorm2 = NULL;
168 vv->ops->waxpy = VecWAXPY_Seq;
169 vv->ops->dot = VecDot_MPI;
170 vv->ops->mdot = VecMDot_MPI;
171 vv->ops->tdot = VecTDot_MPI;
172 vv->ops->norm = VecNorm_MPI;
173 vv->ops->scale = VecScale_Seq;
174 vv->ops->copy = VecCopy_Seq;
175 vv->ops->set = VecSet_Seq;
176 vv->ops->swap = VecSwap_Seq;
177 vv->ops->axpy = VecAXPY_Seq;
178 vv->ops->axpby = VecAXPBY_Seq;
179 vv->ops->maxpy = VecMAXPY_Seq;
180 vv->ops->aypx = VecAYPX_Seq;
181 vv->ops->axpbypcz = VecAXPBYPCZ_Seq;
182 vv->ops->pointwisemult = VecPointwiseMult_Seq;
183 vv->ops->setrandom = VecSetRandom_Seq;
184 vv->ops->placearray = VecPlaceArray_Seq;
185 vv->ops->replacearray = VecReplaceArray_Seq;
186 vv->ops->resetarray = VecResetArray_Seq;
187 vv->ops->dot_local = VecDot_Seq;
188 vv->ops->tdot_local = VecTDot_Seq;
189 vv->ops->norm_local = VecNorm_Seq;
190 vv->ops->mdot_local = VecMDot_Seq;
191 vv->ops->pointwisedivide = VecPointwiseDivide_Seq;
192 vv->ops->getlocalvector = NULL;
193 vv->ops->restorelocalvector = NULL;
194 vv->ops->getlocalvectorread = NULL;
195 vv->ops->restorelocalvectorread = NULL;
196 vv->ops->getarraywrite = NULL;
197 } else {
198 vv->ops->dotnorm2 = VecDotNorm2_MPIViennaCL;
199 vv->ops->waxpy = VecWAXPY_SeqViennaCL;
200 vv->ops->duplicate = VecDuplicate_MPIViennaCL;
201 vv->ops->dot = VecDot_MPIViennaCL;
202 vv->ops->mdot = VecMDot_MPIViennaCL;
203 vv->ops->tdot = VecTDot_MPIViennaCL;
204 vv->ops->norm = VecNorm_MPIViennaCL;
205 vv->ops->scale = VecScale_SeqViennaCL;
206 vv->ops->copy = VecCopy_SeqViennaCL;
207 vv->ops->set = VecSet_SeqViennaCL;
208 vv->ops->swap = VecSwap_SeqViennaCL;
209 vv->ops->axpy = VecAXPY_SeqViennaCL;
210 vv->ops->axpby = VecAXPBY_SeqViennaCL;
211 vv->ops->maxpy = VecMAXPY_SeqViennaCL;
212 vv->ops->aypx = VecAYPX_SeqViennaCL;
213 vv->ops->axpbypcz = VecAXPBYPCZ_SeqViennaCL;
214 vv->ops->pointwisemult = VecPointwiseMult_SeqViennaCL;
215 vv->ops->setrandom = VecSetRandom_SeqViennaCL;
216 vv->ops->dot_local = VecDot_SeqViennaCL;
217 vv->ops->tdot_local = VecTDot_SeqViennaCL;
218 vv->ops->norm_local = VecNorm_SeqViennaCL;
219 vv->ops->mdot_local = VecMDot_SeqViennaCL;
220 vv->ops->destroy = VecDestroy_MPIViennaCL;
221 vv->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
222 vv->ops->placearray = VecPlaceArray_SeqViennaCL;
223 vv->ops->replacearray = VecReplaceArray_SeqViennaCL;
224 vv->ops->resetarray = VecResetArray_SeqViennaCL;
225 vv->ops->getarraywrite = VecGetArrayWrite_SeqViennaCL;
226 vv->ops->getarray = VecGetArray_SeqViennaCL;
227 vv->ops->restorearray = VecRestoreArray_SeqViennaCL;
228 }
229 vv->ops->duplicatevecs = VecDuplicateVecs_Default;
230 PetscFunctionReturn(PETSC_SUCCESS);
231 }
232
VecCreate_MPIViennaCL(Vec vv)233 PETSC_EXTERN PetscErrorCode VecCreate_MPIViennaCL(Vec vv)
234 {
235 PetscFunctionBegin;
236 PetscCall(PetscLayoutSetUp(vv->map));
237 PetscCall(VecViennaCLAllocateCheck(vv));
238 PetscCall(VecCreate_MPIViennaCL_Private(vv, PETSC_FALSE, 0, ((Vec_ViennaCL *)vv->spptr)->GPUarray));
239 PetscCall(VecViennaCLAllocateCheckHost(vv));
240 PetscCall(VecSet(vv, 0.0));
241 PetscCall(VecSet_Seq(vv, 0.0));
242 vv->offloadmask = PETSC_OFFLOAD_BOTH;
243 PetscFunctionReturn(PETSC_SUCCESS);
244 }
245
VecCreate_ViennaCL(Vec v)246 PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec v)
247 {
248 PetscMPIInt size;
249
250 PetscFunctionBegin;
251 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
252 if (size == 1) {
253 PetscCall(VecSetType(v, VECSEQVIENNACL));
254 } else {
255 PetscCall(VecSetType(v, VECMPIVIENNACL));
256 }
257 PetscFunctionReturn(PETSC_SUCCESS);
258 }
259
260 /*@C
261 VecCreateMPIViennaCLWithArray - Creates a parallel, array-style vector,
262 where the user provides the viennacl vector to store the vector values.
263
264 Collective
265
266 Input Parameters:
267 + comm - the MPI communicator to use
268 . bs - block size, same meaning as in `VecSetBlockSize()`
269 . n - local vector length, cannot be `PETSC_DECIDE`
270 . N - global vector length (or `PETSC_DECIDE` to have calculated)
271 - array - the user provided GPU array to store the vector values
272
273 Output Parameter:
274 . vv - the vector
275
276 Level: intermediate
277
278 Notes:
279 Use `VecDuplicate()` or `VecDuplicateVecs(`) to form additional vectors of the
280 same type as an existing vector.
281
282 If the user-provided array is `NULL`, then `VecViennaCLPlaceArray()` can be used
283 at a later stage to SET the array for storing the vector values.
284
285 PETSc does NOT free the array when the vector is destroyed via `VecDestroy()`.
286 The user should not free the array until the vector is destroyed.
287
288 .seealso: `VecCreateSeqViennaCLWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
289 `VecCreate()`, `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecViennaCLPlaceArray()`
290
291 @*/
VecCreateMPIViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const ViennaCLVector * array,Vec * vv)292 PetscErrorCode VecCreateMPIViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const ViennaCLVector *array, Vec *vv) PeNS
293 {
294 PetscFunctionBegin;
295 PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
296 PetscCall(PetscSplitOwnership(comm, &n, &N));
297 PetscCall(VecCreate(comm, vv));
298 PetscCall(VecSetSizes(*vv, n, N));
299 PetscCall(VecSetBlockSize(*vv, bs));
300 PetscCall(VecCreate_MPIViennaCL_Private(*vv, PETSC_FALSE, 0, array));
301 PetscFunctionReturn(PETSC_SUCCESS);
302 }
303
304 /*@C
305 VecCreateMPIViennaCLWithArrays - Creates a parallel, array-style vector,
306 where the user provides the ViennaCL vector to store the vector values.
307
308 Collective
309
310 Input Parameters:
311 + comm - the MPI communicator to use
312 . bs - block size, same meaning as VecSetBlockSize()
313 . n - local vector length, cannot be PETSC_DECIDE
314 . N - global vector length (or PETSC_DECIDE to have calculated)
315 . cpuarray - the user provided CPU array to store the vector values
316 - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
317
318 Output Parameter:
319 . vv - the vector
320
321 Notes:
322 If both cpuarray and viennaclvec are provided, the caller must ensure that
323 the provided arrays have identical values.
324
325 Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
326 same type as an existing vector.
327
328 PETSc does NOT free the provided arrays when the vector is destroyed via
329 VecDestroy(). The user should not free the array until the vector is
330 destroyed.
331
332 Level: intermediate
333
334 .seealso: `VecCreateSeqViennaCLWithArrays()`, `VecCreateMPIWithArray()`
335 `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
336 `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecViennaCLPlaceArray()`,
337 `VecPlaceArray()`, `VecCreateMPICUDAWithArrays()`,
338 `VecViennaCLAllocateCheckHost()`
339 @*/
VecCreateMPIViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar cpuarray[],const ViennaCLVector * viennaclvec,Vec * vv)340 PetscErrorCode VecCreateMPIViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *vv) PeNS
341 {
342 PetscFunctionBegin;
343 PetscCall(VecCreateMPIViennaCLWithArray(comm, bs, n, N, viennaclvec, vv));
344 if (cpuarray && viennaclvec) {
345 Vec_MPI *s = (Vec_MPI *)((*vv)->data);
346 s->array = (PetscScalar *)cpuarray;
347 (*vv)->offloadmask = PETSC_OFFLOAD_BOTH;
348 } else if (cpuarray) {
349 Vec_MPI *s = (Vec_MPI *)((*vv)->data);
350 s->array = (PetscScalar *)cpuarray;
351 (*vv)->offloadmask = PETSC_OFFLOAD_CPU;
352 } else if (viennaclvec) {
353 (*vv)->offloadmask = PETSC_OFFLOAD_GPU;
354 } else {
355 (*vv)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
356 }
357 PetscFunctionReturn(PETSC_SUCCESS);
358 }
359
VecCreate_MPIViennaCL_Private(Vec vv,PetscBool alloc,PetscInt nghost,const ViennaCLVector * array)360 PetscErrorCode VecCreate_MPIViennaCL_Private(Vec vv, PetscBool alloc, PetscInt nghost, const ViennaCLVector *array)
361 {
362 Vec_ViennaCL *vecviennacl;
363
364 PetscFunctionBegin;
365 PetscCall(VecCreate_MPI_Private(vv, PETSC_FALSE, 0, 0));
366 PetscCall(PetscObjectChangeTypeName((PetscObject)vv, VECMPIVIENNACL));
367
368 PetscCall(VecBindToCPU_MPIViennaCL(vv, PETSC_FALSE));
369 vv->ops->bindtocpu = VecBindToCPU_MPIViennaCL;
370
371 if (alloc && !array) {
372 PetscCall(VecViennaCLAllocateCheck(vv));
373 PetscCall(VecViennaCLAllocateCheckHost(vv));
374 PetscCall(VecSet(vv, 0.0));
375 PetscCall(VecSet_Seq(vv, 0.0));
376 vv->offloadmask = PETSC_OFFLOAD_BOTH;
377 }
378 if (array) {
379 if (!vv->spptr) vv->spptr = new Vec_ViennaCL;
380 vecviennacl = (Vec_ViennaCL *)vv->spptr;
381 vecviennacl->GPUarray_allocated = 0;
382 vecviennacl->GPUarray = (ViennaCLVector *)array;
383 vv->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
384 }
385 PetscFunctionReturn(PETSC_SUCCESS);
386 }
387