xref: /petsc/src/mat/impls/dense/mpi/cupm/cuda/matmpidensecuda.cu (revision d0b3cce3968ed23450689940f4aaa7d58fe0cffb)
1 #include "../matmpidensecupm.hpp"
2 
3 using namespace Petsc::mat::cupm;
4 using Petsc::device::cupm::DeviceType;
5 
6 static constexpr impl::MatDense_MPI_CUPM<DeviceType::CUDA> mat_cupm{};
7 
8 /*MC
9   MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs.
10 
11   This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process
12   communicator, and `MATMPIDENSECUDA` otherwise.
13 
14   Options Database Key:
15 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to
16                         `MatSetFromOptions()`
17 
18   Level: beginner
19 
20 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`,
21 `MATMPIDENSEHIP`, `MATDENSE`
22 M*/
23 
24 /*MC
25   MATMPIDENSECUDA - "mpidensecuda" - A matrix type to be used for distributed dense matrices on
26   GPUs.
27 
28   Options Database Key:
29 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to
30                            `MatSetFromOptions()`
31 
32   Level: beginner
33 
34 .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MATMPIDENSE`, `MATSEQDENSE`,
35 `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
36 M*/
MatCreate_MPIDenseCUDA(Mat A)37 PETSC_INTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat A)
38 {
39   PetscFunctionBegin;
40   PetscCall(mat_cupm.Create(A));
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
MatConvert_MPIDense_MPIDenseCUDA(Mat A,MatType type,MatReuse reuse,Mat * ret)44 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat A, MatType type, MatReuse reuse, Mat *ret)
45 {
46   PetscFunctionBegin;
47   PetscCall(mat_cupm.Convert_MPIDense_MPIDenseCUPM(A, type, reuse, ret));
48   PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 
51 /*@C
52   MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
53 
54   Collective
55 
56   Input Parameters:
57 + comm - MPI communicator
58 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
59 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
60 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
61 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
62 - data - optional location of GPU matrix data. Pass `NULL` to have PETSc to control matrix memory allocation.
63 
64   Output Parameter:
65 . A - the matrix
66 
67   Level: intermediate
68 
69 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
70 @*/
MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar * data,Mat * A)71 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
72 {
73   PetscFunctionBegin;
74   PetscCall(MatCreateDenseCUPM<DeviceType::CUDA>(comm, m, n, M, N, data, A));
75   PetscFunctionReturn(PETSC_SUCCESS);
76 }
77 
78 /*@C
79   MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
80   array provided by the user. This is useful to avoid copying an array into a matrix.
81 
82   Not Collective
83 
84   Input Parameters:
85 + mat   - the matrix
86 - array - the array in column major order
87 
88   Level: developer
89 
90   Note:
91   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
92 
93   You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is
94   responsible for freeing this array; it will not be freed when the matrix is destroyed. The
95   array must have been allocated with `cudaMalloc()`.
96 
97 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`,
98           `MatDenseCUDAReplaceArray()`
99 @*/
MatDenseCUDAPlaceArray(Mat mat,const PetscScalar * array)100 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
101 {
102   PetscFunctionBegin;
103   PetscCall(MatDenseCUPMPlaceArray<DeviceType::CUDA>(mat, array));
104   PetscFunctionReturn(PETSC_SUCCESS);
105 }
106 
107 /*@C
108   MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to
109   `MatDenseCUDAPlaceArray()`
110 
111   Not Collective
112 
113   Input Parameter:
114 . mat - the matrix
115 
116   Level: developer
117 
118   Note:
119   You can only call this after a call to `MatDenseCUDAPlaceArray()`
120 
121 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
122 @*/
MatDenseCUDAResetArray(Mat mat)123 PetscErrorCode MatDenseCUDAResetArray(Mat mat)
124 {
125   PetscFunctionBegin;
126   PetscCall(MatDenseCUPMResetArray<DeviceType::CUDA>(mat));
127   PetscFunctionReturn(PETSC_SUCCESS);
128 }
129 
130 /*@C
131   MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix
132   with an array provided by the user. This is useful to avoid copying an array into a matrix.
133 
134   Not Collective
135 
136   Input Parameters:
137 + mat   - the matrix
138 - array - the array in column major order
139 
140   Level: developer
141 
142   Note:
143   Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
144 
145   This permanently replaces the GPU array and frees the memory associated with the old GPU
146   array. The memory passed in CANNOT be freed by the user. It will be freed when the matrix is
147   destroyed. The array should respect the matrix leading dimension.
148 
149 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
150 @*/
MatDenseCUDAReplaceArray(Mat mat,const PetscScalar * array)151 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
152 {
153   PetscFunctionBegin;
154   PetscCall(MatDenseCUPMReplaceArray<DeviceType::CUDA>(mat, array));
155   PetscFunctionReturn(PETSC_SUCCESS);
156 }
157 
158 /*@C
159   MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA`
160   matrix.
161 
162   Not Collective
163 
164   Input Parameter:
165 . A - the matrix
166 
167   Output Parameter:
168 . a - the GPU array in column major order
169 
170   Level: developer
171 
172   Notes:
173   The data on the GPU may not be updated due to operations done on the CPU. If you need updated
174   data, use `MatDenseCUDAGetArray()`.
175 
176   The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
177 
178 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
179           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`,
180           `MatDenseCUDARestoreArrayRead()`
181 @*/
MatDenseCUDAGetArrayWrite(Mat A,PetscScalar ** a)182 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
183 {
184   PetscFunctionBegin;
185   PetscCall(MatDenseCUPMGetArrayWrite<DeviceType::CUDA>(A, a));
186   PetscFunctionReturn(PETSC_SUCCESS);
187 }
188 
189 /*@C
190   MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a
191   `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
192 
193   Not Collective
194 
195   Input Parameters:
196 + A - the matrix
197 - a - the GPU array in column major order
198 
199   Level: developer
200 
201 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
202 `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
203 @*/
MatDenseCUDARestoreArrayWrite(Mat A,PetscScalar ** a)204 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
205 {
206   PetscFunctionBegin;
207   PetscCall(MatDenseCUPMRestoreArrayWrite<DeviceType::CUDA>(A, a));
208   PetscFunctionReturn(PETSC_SUCCESS);
209 }
210 
211 /*@C
212   MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a
213   `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when
214   no longer needed.
215 
216   Not Collective
217 
218   Input Parameter:
219 . A - the matrix
220 
221   Output Parameter:
222 . a - the GPU array in column major order
223 
224   Level: developer
225 
226   Note:
227   Data may be copied to the GPU due to operations done on the CPU. If you need write only
228   access, use `MatDenseCUDAGetArrayWrite()`.
229 
230 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
231           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
232           `MatDenseCUDARestoreArrayRead()`
233 @*/
MatDenseCUDAGetArrayRead(Mat A,const PetscScalar ** a)234 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
235 {
236   PetscFunctionBegin;
237   PetscCall(MatDenseCUPMGetArrayRead<DeviceType::CUDA>(A, a));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 /*@C
242   MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a
243   `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
244 
245   Not Collective
246 
247   Input Parameters:
248 + A - the matrix
249 - a - the GPU array in column major order
250 
251   Level: developer
252 
253   Note:
254   Data can be copied to the GPU due to operations done on the CPU. If you need write only
255   access, use `MatDenseCUDAGetArrayWrite()`.
256 
257 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
258           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
259 @*/
MatDenseCUDARestoreArrayRead(Mat A,const PetscScalar ** a)260 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
261 {
262   PetscFunctionBegin;
263   PetscCall(MatDenseCUPMRestoreArrayRead<DeviceType::CUDA>(A, a));
264   PetscFunctionReturn(PETSC_SUCCESS);
265 }
266 
267 /*@C
268   MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The
269   array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
270 
271   Not Collective
272 
273   Input Parameter:
274 . A - the matrix
275 
276   Output Parameter:
277 . a - the GPU array in column major order
278 
279   Level: developer
280 
281   Note:
282   Data can be copied to the GPU due to operations done on the CPU. If you need write only
283   access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use
284   `MatDenseCUDAGetArrayRead()`.
285 
286 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`,
287           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
288           `MatDenseCUDARestoreArrayRead()`
289 @*/
MatDenseCUDAGetArray(Mat A,PetscScalar ** a)290 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
291 {
292   PetscFunctionBegin;
293   PetscCall(MatDenseCUPMGetArray<DeviceType::CUDA>(A, a));
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
297 /*@C
298   MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix
299   previously obtained with `MatDenseCUDAGetArray()`.
300 
301   Not Collective
302 
303   Level: developer
304 
305   Input Parameters:
306 + A - the matrix
307 - a - the GPU array in column major order
308 
309 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`,
310           `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
311 @*/
MatDenseCUDARestoreArray(Mat A,PetscScalar ** a)312 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
313 {
314   PetscFunctionBegin;
315   PetscCall(MatDenseCUPMRestoreArray<DeviceType::CUDA>(A, a));
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 /*@C
320   MatDenseCUDASetPreallocation - Set the device array used for storing the matrix elements of a
321   `MATDENSECUDA` matrix
322 
323   Collective
324 
325   Input Parameters:
326 + A            - the matrix
327 - device_array - the array (or `NULL`)
328 
329   Level: intermediate
330 
331 .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDenseCUDA()`,
332 `MatSetValues()`, `MatDenseSetLDA()`
333 @*/
MatDenseCUDASetPreallocation(Mat A,PetscScalar * device_array)334 PetscErrorCode MatDenseCUDASetPreallocation(Mat A, PetscScalar *device_array)
335 {
336   PetscFunctionBegin;
337   PetscCall(MatDenseCUPMSetPreallocation<DeviceType::CUDA>(A, device_array));
338   PetscFunctionReturn(PETSC_SUCCESS);
339 }
340