xref: /petsc/src/mat/impls/dense/mpi/cupm/cuda/matmpidensecuda.cu (revision bcee047adeeb73090d7e36cc71e39fc287cdbb97)
1 #include "../matmpidensecupm.hpp"
2 
3 using namespace Petsc::mat::cupm;
4 using Petsc::device::cupm::DeviceType;
5 
6 static constexpr impl::MatDense_MPI_CUPM<DeviceType::CUDA> mat_cupm{};
7 
8 /*MC
9   MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs.
10 
11   This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process
12   communicator, and `MATMPIDENSECUDA` otherwise.
13 
14   Options Database Key:
15 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to
16                         `MatSetFromOptions()`
17 
18   Level: beginner
19 
20 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`,
21 `MATMPIDENSEHIP`, `MATDENSE`
22 M*/
23 
24 /*MC
25   MATMPIDENSECUDA - "mpidensecuda" - A matrix type to be used for distributed dense matrices on
26   GPUs.
27 
28   Options Database Key:
29 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to
30                            `MatSetFromOptions()`
31 
32   Level: beginner
33 
34 .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MATMPIDENSE`, `MATSEQDENSE`,
35 `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
36 M*/
37 PETSC_INTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat A)
38 {
39   PetscFunctionBegin;
40   PetscCall(mat_cupm.Create(A));
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat A, MatType type, MatReuse reuse, Mat *ret)
45 {
46   PetscFunctionBegin;
47   PetscCall(mat_cupm.Convert_MPIDense_MPIDenseCUPM(A, type, reuse, ret));
48   PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 
51 /*@C
52   MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
53 
54   Collective
55 
56   Input Parameters:
57 + comm - MPI communicator
58 . m    - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
59 . n    - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
60 . M    - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
61 . N    - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
62 - data - optional location of GPU matrix data. Pass `NULL` to have PETSc to control matrix memory allocation.
63 
64   Output Parameter:
65 . A - the matrix
66 
67   Level: intermediate
68 
69 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
70 @*/
71 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
72 {
73   PetscFunctionBegin;
74   PetscCall(MatCreateDenseCUPM<DeviceType::CUDA>(comm, m, n, M, N, data, A));
75   PetscFunctionReturn(PETSC_SUCCESS);
76 }
77 
78 /*@C
79   MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
80   array provided by the user. This is useful to avoid copying an array into a matrix.
81 
82   Not Collective
83 
84   Input Parameters:
85 + mat   - the matrix
86 - array - the array in column major order
87 
88   Level: developer
89 
90   Note:
91   You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is
92   responsible for freeing this array; it will not be freed when the matrix is destroyed. The
93   array must have been allocated with `cudaMalloc()`.
94 
95 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`,
96           `MatDenseCUDAReplaceArray()`
97 @*/
98 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
99 {
100   PetscFunctionBegin;
101   PetscCall(MatDenseCUPMPlaceArray<DeviceType::CUDA>(mat, array));
102   PetscFunctionReturn(PETSC_SUCCESS);
103 }
104 
105 /*@C
106   MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to
107   `MatDenseCUDAPlaceArray()`
108 
109   Not Collective
110 
111   Input Parameter:
112 . mat - the matrix
113 
114   Level: developer
115 
116   Note:
117   You can only call this after a call to `MatDenseCUDAPlaceArray()`
118 
119 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
120 @*/
121 PetscErrorCode MatDenseCUDAResetArray(Mat mat)
122 {
123   PetscFunctionBegin;
124   PetscCall(MatDenseCUPMResetArray<DeviceType::CUDA>(mat));
125   PetscFunctionReturn(PETSC_SUCCESS);
126 }
127 
128 /*@C
129   MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix
130   with an array provided by the user. This is useful to avoid copying an array into a matrix.
131 
132   Not Collective
133 
134   Input Parameters:
135 + mat   - the matrix
136 - array - the array in column major order
137 
138   Level: developer
139 
140   Note:
141   This permanently replaces the GPU array and frees the memory associated with the old GPU
142   array. The memory passed in CANNOT be freed by the user. It will be freed when the matrix is
143   destroyed. The array should respect the matrix leading dimension.
144 
145 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
146 @*/
147 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
148 {
149   PetscFunctionBegin;
150   PetscCall(MatDenseCUPMReplaceArray<DeviceType::CUDA>(mat, array));
151   PetscFunctionReturn(PETSC_SUCCESS);
152 }
153 
154 /*@C
155   MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA`
156   matrix.
157 
158   Not Collective
159 
160   Input Parameter:
161 . A - the matrix
162 
163   Output Parameter:
164 . a - the GPU array in column major order
165 
166   Level: developer
167 
168   Notes:
169   The data on the GPU may not be updated due to operations done on the CPU. If you need updated
170   data, use `MatDenseCUDAGetArray()`.
171 
172   The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
173 
174 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
175           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`,
176           `MatDenseCUDARestoreArrayRead()`
177 @*/
178 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
179 {
180   PetscFunctionBegin;
181   PetscCall(MatDenseCUPMGetArrayWrite<DeviceType::CUDA>(A, a));
182   PetscFunctionReturn(PETSC_SUCCESS);
183 }
184 
185 /*@C
186   MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a
187   `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
188 
189   Not Collective
190 
191   Input Parameters:
192 + A     - the matrix
193 - a - the GPU array in column major order
194 
195   Level: developer
196 
197 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
198 `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
199 @*/
200 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
201 {
202   PetscFunctionBegin;
203   PetscCall(MatDenseCUPMRestoreArrayWrite<DeviceType::CUDA>(A, a));
204   PetscFunctionReturn(PETSC_SUCCESS);
205 }
206 
207 /*@C
208   MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a
209   `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when
210   no longer needed.
211 
212   Not Collective
213 
214   Input Parameter:
215 . A - the matrix
216 
217   Output Parameter:
218 . a - the GPU array in column major order
219 
220   Level: developer
221 
222   Note:
223   Data may be copied to the GPU due to operations done on the CPU. If you need write only
224   access, use `MatDenseCUDAGetArrayWrite()`.
225 
226 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
227           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
228           `MatDenseCUDARestoreArrayRead()`
229 @*/
230 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
231 {
232   PetscFunctionBegin;
233   PetscCall(MatDenseCUPMGetArrayRead<DeviceType::CUDA>(A, a));
234   PetscFunctionReturn(PETSC_SUCCESS);
235 }
236 
237 /*@C
238   MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a
239   `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
240 
241   Not Collective
242 
243   Input Parameters:
244 + A     - the matrix
245 - a - the GPU array in column major order
246 
247   Level: developer
248 
249   Note:
250   Data can be copied to the GPU due to operations done on the CPU. If you need write only
251   access, use `MatDenseCUDAGetArrayWrite()`.
252 
253 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
254           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
255 @*/
256 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
257 {
258   PetscFunctionBegin;
259   PetscCall(MatDenseCUPMRestoreArrayRead<DeviceType::CUDA>(A, a));
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 /*@C
264   MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The
265   array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
266 
267   Not Collective
268 
269   Input Parameter:
270 . A - the matrix
271 
272   Output Parameter:
273 . a - the GPU array in column major order
274 
275   Level: developer
276 
277   Note:
278   Data can be copied to the GPU due to operations done on the CPU. If you need write only
279   access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use
280   `MatDenseCUDAGetArrayRead()`.
281 
282 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`,
283           `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
284           `MatDenseCUDARestoreArrayRead()`
285 @*/
286 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
287 {
288   PetscFunctionBegin;
289   PetscCall(MatDenseCUPMGetArray<DeviceType::CUDA>(A, a));
290   PetscFunctionReturn(PETSC_SUCCESS);
291 }
292 
293 /*@C
294   MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix
295   previously obtained with `MatDenseCUDAGetArray()`.
296 
297   Not Collective
298 
299   Level: developer
300 
301   Input Parameters:
302 + A - the matrix
303 - a - the GPU array in column major order
304 
305 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`,
306           `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
307 @*/
308 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
309 {
310   PetscFunctionBegin;
311   PetscCall(MatDenseCUPMRestoreArray<DeviceType::CUDA>(A, a));
312   PetscFunctionReturn(PETSC_SUCCESS);
313 }
314 
315 /*@C
316   MatDenseCUDASetPreallocation - Set the device array used for storing the matrix elements of a
317   `MATDENSECUDA` matrix
318 
319   Collective
320 
321   Input Parameters:
322 + A            - the matrix
323 - device_array - the array (or `NULL`)
324 
325   Level: intermediate
326 
327 .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDenseCUDA()`,
328 `MatSetValues()`, `MatDenseSetLDA()`
329 @*/
330 PetscErrorCode MatDenseCUDASetPreallocation(Mat A, PetscScalar *device_array)
331 {
332   PetscFunctionBegin;
333   PetscCall(MatDenseCUPMSetPreallocation<DeviceType::CUDA>(A, device_array));
334   PetscFunctionReturn(PETSC_SUCCESS);
335 }
336