1 #include "../vecmpicupm.hpp" /*I <petscvec.h> I*/
2 #include "../vecmpicupm_impl.hpp"
3
4 using namespace Petsc::vec::cupm;
5 using Petsc::device::cupm::DeviceType;
6
7 template class impl::VecMPI_CUPM<DeviceType::CUDA>;
8
9 static constexpr auto VecMPI_CUDA = impl::VecMPI_CUPM<DeviceType::CUDA>{};
10
11 /*MC
12 VECCUDA - VECCUDA = "cuda" - A `VECSEQCUDA` on a single-process MPI communicator, and `VECMPICUDA`
13 otherwise.
14
15 Options Database Keys:
16 . -vec_type cuda - sets the vector type to `VECCUDA` during a call to `VecSetFromOptions()`
17
18 Level: beginner
19
20 .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECSEQCUDA`,
21 `VECMPICUDA`, `VECSTANDARD`, `VecType`, `VecCreateMPI()`, `VecSetPinnedMemoryMin()`, `VECHIP`
22 M*/
23
VecCreate_CUDA(Vec v)24 PetscErrorCode VecCreate_CUDA(Vec v)
25 {
26 PetscFunctionBegin;
27 PetscCall(VecMPI_CUDA.Create_CUPM(v));
28 PetscFunctionReturn(PETSC_SUCCESS);
29 }
30
31 /*MC
32 VECMPICUDA - VECMPICUDA = "mpicuda" - The basic parallel vector, modified to use CUDA
33
34 Options Database Keys:
35 . -vec_type mpicuda - sets the vector type to `VECMPICUDA` during a call to `VecSetFromOptions()`
36
37 Level: beginner
38
39 .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateMPIWithArray()`, `VECMPI`,
40 `VecType`, `VecCreateMPI()`, `VecSetPinnedMemoryMin()`, `VECSEQHIP`, `VECMPIHIP`
41 M*/
42
VecCreate_MPICUDA(Vec v)43 PetscErrorCode VecCreate_MPICUDA(Vec v)
44 {
45 PetscFunctionBegin;
46 PetscCall(VecMPI_CUDA.Create(v));
47 PetscFunctionReturn(PETSC_SUCCESS);
48 }
49
VecConvert_MPI_MPICUDA_inplace(Vec v)50 PetscErrorCode VecConvert_MPI_MPICUDA_inplace(Vec v)
51 {
52 PetscFunctionBegin;
53 PetscCall(VecMPI_CUDA.Convert_IMPL_IMPLCUPM(v));
54 PetscFunctionReturn(PETSC_SUCCESS);
55 }
56
VecCUDAGetArrays_Private(Vec v,const PetscScalar ** host_array,const PetscScalar ** device_array,PetscOffloadMask * mask)57 PetscErrorCode VecCUDAGetArrays_Private(Vec v, const PetscScalar **host_array, const PetscScalar **device_array, PetscOffloadMask *mask)
58 {
59 PetscDeviceContext dctx;
60
61 PetscFunctionBegin;
62 PetscValidHeaderSpecific(v, VEC_CLASSID, 1);
63 PetscCall(PetscDeviceContextGetCurrentContextAssertType_Internal(&dctx, PETSC_DEVICE_CUDA));
64 PetscCall(VecMPI_CUDA.GetArrays_CUPMBase(v, host_array, device_array, mask, dctx));
65 PetscFunctionReturn(PETSC_SUCCESS);
66 }
67
68 // PetscClangLinter pragma disable: -fdoc-internal-linkage
69 /*@
70 VecCreateMPICUDA - Creates a standard, parallel, array-style vector for CUDA devices.
71
72 Collective, Possibly Synchronous
73
74 Input Parameters:
75 + comm - the MPI communicator to use
76 . n - local vector length (or `PETSC_DECIDE` to have calculated if `N` is given)
77 - N - global vector length (or `PETSC_DETERMINE` to have calculated if `n` is given)
78
79 Output Parameter:
80 . v - the vector
81
82 Notes:
83 Use `VecDuplicate()` or `VecDuplicateVecs()` to form additional vectors of the same type as an
84 existing vector.
85
86 This function may initialize `PetscDevice`, which may incur a device synchronization.
87
88 Level: intermediate
89
90 .seealso: `VecCreateMPICUDAWithArray()`, `VecCreateMPICUDAWithArrays()`, `VecCreateSeqCUDA()`,
91 `VecCreateSeq()`, `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
92 `VecCreateGhost()`, `VecCreateMPIWithArray()`, `VecCreateGhostWithArray()`, `VecMPISetGhost()`
93 @*/
VecCreateMPICUDA(MPI_Comm comm,PetscInt n,PetscInt N,Vec * v)94 PetscErrorCode VecCreateMPICUDA(MPI_Comm comm, PetscInt n, PetscInt N, Vec *v)
95 {
96 PetscFunctionBegin;
97 PetscAssertPointer(v, 4);
98 PetscCall(VecCreateMPICUPMAsync<DeviceType::CUDA>(comm, n, N, v));
99 PetscFunctionReturn(PETSC_SUCCESS);
100 }
101
102 // PetscClangLinter pragma disable: -fdoc-internal-linkage
103 /*@C
104 VecCreateMPICUDAWithArrays - Creates a parallel, array-style vector using CUDA, where the
105 user provides the complete array space to store the vector values.
106
107 Collective, Possibly Synchronous
108
109 Input Parameters:
110 + comm - the MPI communicator to use
111 . bs - block size, same meaning as `VecSetBlockSize()`
112 . n - local vector length, cannot be `PETSC_DECIDE`
113 . N - global vector length (or `PETSC_DECIDE` to have calculated)
114 . cpuarray - CPU memory where the vector elements are to be stored (or `NULL`)
115 - gpuarray - GPU memory where the vector elements are to be stored (or `NULL`)
116
117 Output Parameter:
118 . v - the vector
119
120 Notes:
121 See `VecCreateSeqCUDAWithArrays()` for further discussion, this routine shares identical
122 semantics.
123
124 Level: intermediate
125
126 .seealso: `VecCreateMPICUDA()`, `VecCreateSeqCUDAWithArrays()`, `VecCreateMPIWithArray()`,
127 `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
128 `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
129 @*/
VecCreateMPICUDAWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar cpuarray[],const PetscScalar gpuarray[],Vec * v)130 PetscErrorCode VecCreateMPICUDAWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar cpuarray[], const PetscScalar gpuarray[], Vec *v)
131 {
132 PetscFunctionBegin;
133 PetscCall(VecCreateMPICUPMWithArrays<DeviceType::CUDA>(comm, bs, n, N, cpuarray, gpuarray, v));
134 PetscFunctionReturn(PETSC_SUCCESS);
135 }
136
137 // PetscClangLinter pragma disable: -fdoc-internal-linkage
138 /*@C
139 VecCreateMPICUDAWithArray - Creates a parallel, array-style vector using CUDA, where the
140 user provides the device array space to store the vector values.
141
142 Collective
143
144 Input Parameters:
145 + comm - the MPI communicator to use
146 . bs - block size, same meaning as `VecSetBlockSize()`
147 . n - local vector length, cannot be `PETSC_DECIDE`
148 . N - global vector length (or `PETSC_DECIDE` to have calculated)
149 - gpuarray - the user provided GPU array to store the vector values
150
151 Output Parameter:
152 . v - the vector
153
154 Notes:
155 See `VecCreateSeqCUDAWithArray()` for further discussion, this routine shares identical
156 semantics.
157
158 Level: intermediate
159
160 .seealso: `VecCreateMPICUDA()`, `VecCreateSeqCUDAWithArray()`, `VecCreateMPIWithArray()`,
161 `VecCreateSeqWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`,
162 `VecCreateMPI()`, `VecCreateGhostWithArray()`, `VecPlaceArray()`
163 @*/
VecCreateMPICUDAWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar gpuarray[],Vec * v)164 PetscErrorCode VecCreateMPICUDAWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, PetscInt N, const PetscScalar gpuarray[], Vec *v)
165 {
166 PetscFunctionBegin;
167 PetscCall(VecCreateMPICUPMWithArray<DeviceType::CUDA>(comm, bs, n, N, gpuarray, v));
168 PetscFunctionReturn(PETSC_SUCCESS);
169 }
170