xref: /petsc/src/vec/vec/impls/mpi/mpiviennacl/mpiviennacl.cxx (revision 4f037aad63b0dd02ec302d96e63d16a2808e7a1f)
1 /*
2    This file contains routines for Parallel vector operations.
3  */
4 #include <petscconf.h>
5 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
6 #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>
7 
8 /*MC
9    VECVIENNACL - VECVIENNACL = "viennacl" - A VECSEQVIENNACL on a single-process communicator, and VECMPIVIENNACL otherwise.
10 
11    Options Database Keys:
12 . -vec_type viennacl - sets the vector type to VECVIENNACL during a call to VecSetFromOptions()
13 
14   Level: beginner
15 
16 .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQVIENNACL`, `VECMPIVIENNACL`, `VECSTANDARD`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
17 M*/
18 
VecDestroy_MPIViennaCL(Vec v)19 static PetscErrorCode VecDestroy_MPIViennaCL(Vec v)
20 {
21   PetscFunctionBegin;
22   try {
23     if (v->spptr) {
24       delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
25       delete (Vec_ViennaCL *)v->spptr;
26     }
27   } catch (std::exception const &ex) {
28     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
29   }
30   PetscCall(VecDestroy_MPI(v));
31   PetscFunctionReturn(PETSC_SUCCESS);
32 }
33 
VecNorm_MPIViennaCL(Vec xin,NormType type,PetscReal * z)34 static PetscErrorCode VecNorm_MPIViennaCL(Vec xin, NormType type, PetscReal *z)
35 {
36   PetscReal sum, work = 0.0;
37 
38   PetscFunctionBegin;
39   if (type == NORM_2 || type == NORM_FROBENIUS) {
40     PetscCall(VecNorm_SeqViennaCL(xin, NORM_2, &work));
41     work *= work;
42     PetscCallMPI(MPIU_Allreduce(&work, &sum, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
43     *z = PetscSqrtReal(sum);
44   } else if (type == NORM_1) {
45     /* Find the local part */
46     PetscCall(VecNorm_SeqViennaCL(xin, NORM_1, &work));
47     /* Find the global max */
48     PetscCallMPI(MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
49   } else if (type == NORM_INFINITY) {
50     /* Find the local max */
51     PetscCall(VecNorm_SeqViennaCL(xin, NORM_INFINITY, &work));
52     /* Find the global max */
53     PetscCallMPI(MPIU_Allreduce(&work, z, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)xin)));
54   } else if (type == NORM_1_AND_2) {
55     PetscReal temp[2];
56     PetscCall(VecNorm_SeqViennaCL(xin, NORM_1, temp));
57     PetscCall(VecNorm_SeqViennaCL(xin, NORM_2, temp + 1));
58     temp[1] = temp[1] * temp[1];
59     PetscCallMPI(MPIU_Allreduce(temp, z, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
60     z[1] = PetscSqrtReal(z[1]);
61   }
62   PetscFunctionReturn(PETSC_SUCCESS);
63 }
64 
VecDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar * z)65 static PetscErrorCode VecDot_MPIViennaCL(Vec xin, Vec yin, PetscScalar *z)
66 {
67   PetscScalar sum, work;
68 
69   PetscFunctionBegin;
70   PetscCall(VecDot_SeqViennaCL(xin, yin, &work));
71   PetscCallMPI(MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
72   *z = sum;
73   PetscFunctionReturn(PETSC_SUCCESS);
74 }
75 
VecTDot_MPIViennaCL(Vec xin,Vec yin,PetscScalar * z)76 static PetscErrorCode VecTDot_MPIViennaCL(Vec xin, Vec yin, PetscScalar *z)
77 {
78   PetscScalar sum, work;
79 
80   PetscFunctionBegin;
81   PetscCall(VecTDot_SeqViennaCL(xin, yin, &work));
82   PetscCallMPI(MPIU_Allreduce(&work, &sum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
83   *z = sum;
84   PetscFunctionReturn(PETSC_SUCCESS);
85 }
86 
VecMDot_MPIViennaCL(Vec xin,PetscInt nv,const Vec y[],PetscScalar * z)87 static PetscErrorCode VecMDot_MPIViennaCL(Vec xin, PetscInt nv, const Vec y[], PetscScalar *z)
88 {
89   PetscScalar awork[128], *work = awork;
90 
91   PetscFunctionBegin;
92   if (nv > 128) PetscCall(PetscMalloc1(nv, &work));
93   PetscCall(VecMDot_SeqViennaCL(xin, nv, y, work));
94   PetscCallMPI(MPIU_Allreduce(work, z, nv, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)xin)));
95   if (nv > 128) PetscCall(PetscFree(work));
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 /*MC
100    VECMPIVIENNACL - VECMPIVIENNACL = "mpiviennacl" - The basic parallel vector, modified to use ViennaCL
101 
102    Options Database Keys:
103 . -vec_type mpiviennacl - sets the vector type to VECMPIVIENNACL during a call to VecSetFromOptions()
104 
105   Level: beginner
106 
107 .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateMPI()`
108 M*/
109 
VecDuplicate_MPIViennaCL(Vec win,Vec * v)110 static PetscErrorCode VecDuplicate_MPIViennaCL(Vec win, Vec *v)
111 {
112   Vec_MPI     *vw, *w = (Vec_MPI *)win->data;
113   PetscScalar *array;
114 
115   PetscFunctionBegin;
116   PetscCall(VecCreate(PetscObjectComm((PetscObject)win), v));
117   PetscCall(PetscLayoutReference(win->map, &(*v)->map));
118 
119   PetscCall(VecCreate_MPI_Private(*v, PETSC_FALSE, w->nghost, 0));
120   vw           = (Vec_MPI *)(*v)->data;
121   (*v)->ops[0] = win->ops[0];
122 
123   /* save local representation of the parallel vector (and scatter) if it exists */
124   if (w->localrep) {
125     PetscCall(VecGetArray(*v, &array));
126     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, win->map->n + w->nghost, array, &vw->localrep));
127     vw->localrep->ops[0] = w->localrep->ops[0];
128     PetscCall(VecRestoreArray(*v, &array));
129     vw->localupdate = w->localupdate;
130     if (vw->localupdate) PetscCall(PetscObjectReference((PetscObject)vw->localupdate));
131   }
132 
133   /* New vector should inherit stashing property of parent */
134   (*v)->stash.donotstash   = win->stash.donotstash;
135   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
136 
137   /* change type_name appropriately */
138   PetscCall(PetscObjectChangeTypeName((PetscObject)*v, VECMPIVIENNACL));
139 
140   PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*v)->olist));
141   PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*v)->qlist));
142   (*v)->map->bs   = win->map->bs;
143   (*v)->bstash.bs = win->bstash.bs;
144   PetscFunctionReturn(PETSC_SUCCESS);
145 }
146 
VecDotNorm2_MPIViennaCL(Vec s,Vec t,PetscScalar * dp,PetscScalar * nm)147 static PetscErrorCode VecDotNorm2_MPIViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
148 {
149   PetscScalar work[2], sum[2];
150 
151   PetscFunctionBegin;
152   PetscCall(VecDotNorm2_SeqViennaCL(s, t, work, work + 1));
153   PetscCallMPI(MPIU_Allreduce((void *)&work, (void *)&sum, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)s)));
154   *dp = sum[0];
155   *nm = sum[1];
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
VecBindToCPU_MPIViennaCL(Vec vv,PetscBool bind)159 static PetscErrorCode VecBindToCPU_MPIViennaCL(Vec vv, PetscBool bind)
160 {
161   PetscFunctionBegin;
162   vv->boundtocpu = bind;
163 
164   if (bind) {
165     PetscCall(VecViennaCLCopyFromGPU(vv));
166     vv->offloadmask                 = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
167     vv->ops->dotnorm2               = NULL;
168     vv->ops->waxpy                  = VecWAXPY_Seq;
169     vv->ops->dot                    = VecDot_MPI;
170     vv->ops->mdot                   = VecMDot_MPI;
171     vv->ops->tdot                   = VecTDot_MPI;
172     vv->ops->norm                   = VecNorm_MPI;
173     vv->ops->scale                  = VecScale_Seq;
174     vv->ops->copy                   = VecCopy_Seq;
175     vv->ops->set                    = VecSet_Seq;
176     vv->ops->swap                   = VecSwap_Seq;
177     vv->ops->axpy                   = VecAXPY_Seq;
178     vv->ops->axpby                  = VecAXPBY_Seq;
179     vv->ops->maxpy                  = VecMAXPY_Seq;
180     vv->ops->aypx                   = VecAYPX_Seq;
181     vv->ops->axpbypcz               = VecAXPBYPCZ_Seq;
182     vv->ops->pointwisemult          = VecPointwiseMult_Seq;
183     vv->ops->setrandom              = VecSetRandom_Seq;
184     vv->ops->placearray             = VecPlaceArray_Seq;
185     vv->ops->replacearray           = VecReplaceArray_Seq;
186     vv->ops->resetarray             = VecResetArray_Seq;
187     vv->ops->dot_local              = VecDot_Seq;
188     vv->ops->tdot_local             = VecTDot_Seq;
189     vv->ops->norm_local             = VecNorm_Seq;
190     vv->ops->mdot_local             = VecMDot_Seq;
191     vv->ops->pointwisedivide        = VecPointwiseDivide_Seq;
192     vv->ops->getlocalvector         = NULL;
193     vv->ops->restorelocalvector     = NULL;
194     vv->ops->getlocalvectorread     = NULL;
195     vv->ops->restorelocalvectorread = NULL;
196     vv->ops->getarraywrite          = NULL;
197   } else {
198     vv->ops->dotnorm2        = VecDotNorm2_MPIViennaCL;
199     vv->ops->waxpy           = VecWAXPY_SeqViennaCL;
200     vv->ops->duplicate       = VecDuplicate_MPIViennaCL;
201     vv->ops->dot             = VecDot_MPIViennaCL;
202     vv->ops->mdot            = VecMDot_MPIViennaCL;
203     vv->ops->tdot            = VecTDot_MPIViennaCL;
204     vv->ops->norm            = VecNorm_MPIViennaCL;
205     vv->ops->scale           = VecScale_SeqViennaCL;
206     vv->ops->copy            = VecCopy_SeqViennaCL;
207     vv->ops->set             = VecSet_SeqViennaCL;
208     vv->ops->swap            = VecSwap_SeqViennaCL;
209     vv->ops->axpy            = VecAXPY_SeqViennaCL;
210     vv->ops->axpby           = VecAXPBY_SeqViennaCL;
211     vv->ops->maxpy           = VecMAXPY_SeqViennaCL;
212     vv->ops->aypx            = VecAYPX_SeqViennaCL;
213     vv->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
214     vv->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
215     vv->ops->setrandom       = VecSetRandom_SeqViennaCL;
216     vv->ops->dot_local       = VecDot_SeqViennaCL;
217     vv->ops->tdot_local      = VecTDot_SeqViennaCL;
218     vv->ops->norm_local      = VecNorm_SeqViennaCL;
219     vv->ops->mdot_local      = VecMDot_SeqViennaCL;
220     vv->ops->destroy         = VecDestroy_MPIViennaCL;
221     vv->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
222     vv->ops->placearray      = VecPlaceArray_SeqViennaCL;
223     vv->ops->replacearray    = VecReplaceArray_SeqViennaCL;
224     vv->ops->resetarray      = VecResetArray_SeqViennaCL;
225     vv->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
226     vv->ops->getarray        = VecGetArray_SeqViennaCL;
227     vv->ops->restorearray    = VecRestoreArray_SeqViennaCL;
228   }
229   vv->ops->duplicatevecs = VecDuplicateVecs_Default;
230   PetscFunctionReturn(PETSC_SUCCESS);
231 }
232 
VecCreate_MPIViennaCL(Vec vv)233 PETSC_EXTERN PetscErrorCode VecCreate_MPIViennaCL(Vec vv)
234 {
235   PetscFunctionBegin;
236   PetscCall(PetscLayoutSetUp(vv->map));
237   PetscCall(VecViennaCLAllocateCheck(vv));
238   PetscCall(VecCreate_MPIViennaCL_Private(vv, PETSC_FALSE, 0, ((Vec_ViennaCL *)vv->spptr)->GPUarray));
239   PetscCall(VecViennaCLAllocateCheckHost(vv));
240   PetscCall(VecSet(vv, 0.0));
241   PetscCall(VecSet_Seq(vv, 0.0));
242   vv->offloadmask = PETSC_OFFLOAD_BOTH;
243   PetscFunctionReturn(PETSC_SUCCESS);
244 }
245 
VecCreate_ViennaCL(Vec v)246 PETSC_EXTERN PetscErrorCode VecCreate_ViennaCL(Vec v)
247 {
248   PetscMPIInt size;
249 
250   PetscFunctionBegin;
251   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v), &size));
252   if (size == 1) {
253     PetscCall(VecSetType(v, VECSEQVIENNACL));
254   } else {
255     PetscCall(VecSetType(v, VECMPIVIENNACL));
256   }
257   PetscFunctionReturn(PETSC_SUCCESS);
258 }
259 
260 /*@C
261   VecCreateMPIViennaCLWithArray - Creates a parallel, array-style vector,
262   where the user provides the viennacl vector to store the vector values.
263 
264   Collective
265 
266   Input Parameters:
267 + comm  - the MPI communicator to use
268 . bs    - block size, same meaning as in `VecSetBlockSize()`
269 . n     - local vector length, cannot be `PETSC_DECIDE`
270 . N     - global vector length (or `PETSC_DECIDE` to have calculated)
271 - array - the user provided GPU array to store the vector values
272 
273   Output Parameter:
274 . vv - the vector
275 
276   Level: intermediate
277 
278   Notes:
279   Use `VecDuplicate()` or `VecDuplicateVecs(`) to form additional vectors of the
280   same type as an existing vector.
281 
282   If the user-provided array is `NULL`, then `VecViennaCLPlaceArray()` can be used
283   at a later stage to SET the array for storing the vector values.
284 
285   PETSc does NOT free the array when the vector is destroyed via `VecDestroy()`.
286   The user should not free the array until the vector is destroyed.
287 
288 .seealso: `VecCreateSeqViennaCLWithArray()`, `VecCreateMPIWithArray()`, `VecCreateSeqWithArray()`,
289           `VecCreate()`, `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecViennaCLPlaceArray()`
290 
291 @*/
VecCreateMPIViennaCLWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const ViennaCLVector * array,Vec * vv)292 PetscErrorCode VecCreateMPIViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const ViennaCLVector *array, Vec *vv) PeNS
293 {
294   PetscFunctionBegin;
295   PetscCheck(n != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must set local size of vector");
296   PetscCall(PetscSplitOwnership(comm, &n, &N));
297   PetscCall(VecCreate(comm, vv));
298   PetscCall(VecSetSizes(*vv, n, N));
299   PetscCall(VecSetBlockSize(*vv, bs));
300   PetscCall(VecCreate_MPIViennaCL_Private(*vv, PETSC_FALSE, 0, array));
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 /*@C
305   VecCreateMPIViennaCLWithArrays - Creates a parallel, array-style vector,
306   where the user provides the ViennaCL vector to store the vector values.
307 
308   Collective
309 
310   Input Parameters:
311 + comm        - the MPI communicator to use
312 . bs          - block size, same meaning as VecSetBlockSize()
313 . n           - local vector length, cannot be PETSC_DECIDE
314 . N           - global vector length (or PETSC_DECIDE to have calculated)
315 . cpuarray    - the user provided CPU array to store the vector values
316 - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.
317 
318   Output Parameter:
319 . vv - the vector
320 
321   Notes:
322   If both cpuarray and viennaclvec are provided, the caller must ensure that
323   the provided arrays have identical values.
324 
325   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
326   same type as an existing vector.
327 
328   PETSc does NOT free the provided arrays when the vector is destroyed via
329   VecDestroy(). The user should not free the array until the vector is
330   destroyed.
331 
332   Level: intermediate
333 
334 .seealso: `VecCreateSeqViennaCLWithArrays()`, `VecCreateMPIWithArray()`
335           `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
336           `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecViennaCLPlaceArray()`,
337           `VecPlaceArray()`, `VecCreateMPICUDAWithArrays()`,
338           `VecViennaCLAllocateCheckHost()`
339 @*/
VecCreateMPIViennaCLWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar cpuarray[],const ViennaCLVector * viennaclvec,Vec * vv)340 PetscErrorCode VecCreateMPIViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *vv) PeNS
341 {
342   PetscFunctionBegin;
343   PetscCall(VecCreateMPIViennaCLWithArray(comm, bs, n, N, viennaclvec, vv));
344   if (cpuarray && viennaclvec) {
345     Vec_MPI *s         = (Vec_MPI *)((*vv)->data);
346     s->array           = (PetscScalar *)cpuarray;
347     (*vv)->offloadmask = PETSC_OFFLOAD_BOTH;
348   } else if (cpuarray) {
349     Vec_MPI *s         = (Vec_MPI *)((*vv)->data);
350     s->array           = (PetscScalar *)cpuarray;
351     (*vv)->offloadmask = PETSC_OFFLOAD_CPU;
352   } else if (viennaclvec) {
353     (*vv)->offloadmask = PETSC_OFFLOAD_GPU;
354   } else {
355     (*vv)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
356   }
357   PetscFunctionReturn(PETSC_SUCCESS);
358 }
359 
VecCreate_MPIViennaCL_Private(Vec vv,PetscBool alloc,PetscInt nghost,const ViennaCLVector * array)360 PetscErrorCode VecCreate_MPIViennaCL_Private(Vec vv, PetscBool alloc, PetscInt nghost, const ViennaCLVector *array)
361 {
362   Vec_ViennaCL *vecviennacl;
363 
364   PetscFunctionBegin;
365   PetscCall(VecCreate_MPI_Private(vv, PETSC_FALSE, 0, 0));
366   PetscCall(PetscObjectChangeTypeName((PetscObject)vv, VECMPIVIENNACL));
367 
368   PetscCall(VecBindToCPU_MPIViennaCL(vv, PETSC_FALSE));
369   vv->ops->bindtocpu = VecBindToCPU_MPIViennaCL;
370 
371   if (alloc && !array) {
372     PetscCall(VecViennaCLAllocateCheck(vv));
373     PetscCall(VecViennaCLAllocateCheckHost(vv));
374     PetscCall(VecSet(vv, 0.0));
375     PetscCall(VecSet_Seq(vv, 0.0));
376     vv->offloadmask = PETSC_OFFLOAD_BOTH;
377   }
378   if (array) {
379     if (!vv->spptr) vv->spptr = new Vec_ViennaCL;
380     vecviennacl                     = (Vec_ViennaCL *)vv->spptr;
381     vecviennacl->GPUarray_allocated = 0;
382     vecviennacl->GPUarray           = (ViennaCLVector *)array;
383     vv->offloadmask                 = PETSC_OFFLOAD_UNALLOCATED;
384   }
385   PetscFunctionReturn(PETSC_SUCCESS);
386 }
387