xref: /petsc/src/vec/vec/impls/mpi/cupm/cuda/vecmpicupm.cu (revision 6d8694c4fbab79f9439f1ad13c0386ba7ee1ca4b) !
1 #include "../vecmpicupm.hpp" /*I <petscvec.h> I*/
2 #include "../vecmpicupm_impl.hpp"
3 
4 using namespace Petsc::vec::cupm;
5 using Petsc::device::cupm::DeviceType;
6 
7 template class impl::VecMPI_CUPM<DeviceType::CUDA>;
8 
9 static constexpr auto VecMPI_CUDA = impl::VecMPI_CUPM<DeviceType::CUDA>{};
10 
11 /*MC
12   VECCUDA - VECCUDA = "cuda" - A `VECSEQCUDA` on a single-process MPI communicator, and `VECMPICUDA`
13   otherwise.
14 
15   Options Database Keys:
16 . -vec_type cuda - sets the vector type to `VECCUDA` during a call to `VecSetFromOptions()`
17 
18   Level: beginner
19 
20 .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQCUDA`,
21 `VECMPICUDA`, `VECSTANDARD`, `VecType`, `VecCreateMPI()`, `VecSetPinnedMemoryMin()`, `VECHIP`
22 M*/
23 
VecCreate_CUDA(Vec v)24 PetscErrorCode VecCreate_CUDA(Vec v)
25 {
26   PetscFunctionBegin;
27   PetscCall(VecMPI_CUDA.Create_CUPM(v));
28   PetscFunctionReturn(PETSC_SUCCESS);
29 }
30 
31 /*MC
32   VECMPICUDA - VECMPICUDA = "mpicuda" - The basic parallel vector, modified to use CUDA
33 
34   Options Database Keys:
35 . -vec_type mpicuda - sets the vector type to `VECMPICUDA` during a call to `VecSetFromOptions()`
36 
37   Level: beginner
38 
39 .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`,
40 `VecType`, `VecCreateMPI()`, `VecSetPinnedMemoryMin()`, `VECSEQHIP`, `VECMPIHIP`
41 M*/
42 
VecCreate_MPICUDA(Vec v)43 PetscErrorCode VecCreate_MPICUDA(Vec v)
44 {
45   PetscFunctionBegin;
46   PetscCall(VecMPI_CUDA.Create(v));
47   PetscFunctionReturn(PETSC_SUCCESS);
48 }
49 
VecConvert_MPI_MPICUDA_inplace(Vec v)50 PetscErrorCode VecConvert_MPI_MPICUDA_inplace(Vec v)
51 {
52   PetscFunctionBegin;
53   PetscCall(VecMPI_CUDA.Convert_IMPL_IMPLCUPM(v));
54   PetscFunctionReturn(PETSC_SUCCESS);
55 }
56 
VecCUDAGetArrays_Private(Vec v,const PetscScalar ** host_array,const PetscScalar ** device_array,PetscOffloadMask * mask)57 PetscErrorCode VecCUDAGetArrays_Private(Vec v, const PetscScalar **host_array, const PetscScalar **device_array, PetscOffloadMask *mask)
58 {
59   PetscDeviceContext dctx;
60 
61   PetscFunctionBegin;
62   PetscValidHeaderSpecific(v, VEC_CLASSID, 1);
63   PetscCall(PetscDeviceContextGetCurrentContextAssertType_Internal(&dctx, PETSC_DEVICE_CUDA));
64   PetscCall(VecMPI_CUDA.GetArrays_CUPMBase(v, host_array, device_array, mask, dctx));
65   PetscFunctionReturn(PETSC_SUCCESS);
66 }
67 
68 // PetscClangLinter pragma disable: -fdoc-internal-linkage
69 /*@
70   VecCreateMPICUDA - Creates a standard, parallel, array-style vector for CUDA devices.
71 
72   Collective, Possibly Synchronous
73 
74   Input Parameters:
75 + comm - the MPI communicator to use
76 . n    - local vector length (or `PETSC_DECIDE` to have calculated if `N` is given)
77 - N    - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
78 
79   Output Parameter:
80 . v - the vector
81 
82   Notes:
83   Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
84   existing vector.
85 
86   This function may initialize `PetscDevice`, which may incur a device synchronization.
87 
88   Level: intermediate
89 
90 .seealso: `VecCreateMPICUDAWithArray()`, `VecCreateMPICUDAWithArrays()`, `VecCreateSeqCUDA()`,
91 `VecCreateSeq()`, `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
92 `VecCreateGhost()`, `VecCreateMPIWithArray()`, `VecCreateGhostWithArray()`, `VecMPISetGhost()`
93 @*/
VecCreateMPICUDA(MPI_Comm comm,PetscInt n,PetscInt N,Vec * v)94 PetscErrorCode VecCreateMPICUDA(MPI_Comm comm, PetscInt n, PetscInt N, Vec *v)
95 {
96   PetscFunctionBegin;
97   PetscAssertPointer(v, 4);
98   PetscCall(VecCreateMPICUPMAsync<DeviceType::CUDA>(comm, n, N, v));
99   PetscFunctionReturn(PETSC_SUCCESS);
100 }
101 
102 // PetscClangLinter pragma disable: -fdoc-internal-linkage
103 /*@C
104   VecCreateMPICUDAWithArrays - Creates a parallel, array-style vector using CUDA, where the
105   user provides the complete array space to store the vector values.
106 
107   Collective, Possibly Synchronous
108 
109   Input Parameters:
110 + comm     - the MPI communicator to use
111 . bs       - block size, same meaning as `VecSetBlockSize()`
112 . n        - local vector length, cannot be `PETSC_DECIDE`
113 . N        - global vector length (or `PETSC_DECIDE` to have calculated)
114 . cpuarray - CPU memory where the vector elements are to be stored (or `NULL`)
115 - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)
116 
117   Output Parameter:
118 . v - the vector
119 
120   Notes:
121   See `VecCreateSeqCUDAWithArrays()` for further discussion, this routine shares identical
122   semantics.
123 
124   Level: intermediate
125 
126 .seealso: `VecCreateMPICUDA()`, `VecCreateSeqCUDAWithArrays()`, `VecCreateMPIWithArray()`,
127 `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
128 `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
129 @*/
VecCreateMPICUDAWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar cpuarray[],const PetscScalar gpuarray[],Vec * v)130 PetscErrorCode VecCreateMPICUDAWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *v)
131 {
132   PetscFunctionBegin;
133   PetscCall(VecCreateMPICUPMWithArrays<DeviceType::CUDA>(comm, bs, n, N, cpuarray, gpuarray, v));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 // PetscClangLinter pragma disable: -fdoc-internal-linkage
138 /*@C
139   VecCreateMPICUDAWithArray - Creates a parallel, array-style vector using CUDA, where the
140   user provides the device array space to store the vector values.
141 
142   Collective
143 
144   Input Parameters:
145 + comm     - the MPI communicator to use
146 . bs       - block size, same meaning as `VecSetBlockSize()`
147 . n        - local vector length, cannot be `PETSC_DECIDE`
148 . N        - global vector length (or `PETSC_DECIDE` to have calculated)
149 - gpuarray - the user provided GPU array to store the vector values
150 
151   Output Parameter:
152 . v - the vector
153 
154   Notes:
155   See `VecCreateSeqCUDAWithArray()` for further discussion, this routine shares identical
156   semantics.
157 
158   Level: intermediate
159 
160 .seealso: `VecCreateMPICUDA()`, `VecCreateSeqCUDAWithArray()`, `VecCreateMPIWithArray()`,
161 `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
162 `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
163 @*/
VecCreateMPICUDAWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar gpuarray[],Vec * v)164 PetscErrorCode VecCreateMPICUDAWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar gpuarray[], Vec *v)
165 {
166   PetscFunctionBegin;
167   PetscCall(VecCreateMPICUPMWithArray<DeviceType::CUDA>(comm, bs, n, N, gpuarray, v));
168   PetscFunctionReturn(PETSC_SUCCESS);
169 }
170