1 #include "../matmpidensecupm.hpp"
2
3 using namespace Petsc::mat::cupm;
4 using Petsc::device::cupm::DeviceType;
5
6 static constexpr impl::MatDense_MPI_CUPM<DeviceType::CUDA> mat_cupm{};
7
8 /*MC
9 MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs.
10
11 This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process
12 communicator, and `MATMPIDENSECUDA` otherwise.
13
14 Options Database Key:
15 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to
16 `MatSetFromOptions()`
17
18 Level: beginner
19
20 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`,
21 `MATMPIDENSEHIP`, `MATDENSE`
22 M*/
23
24 /*MC
25 MATMPIDENSECUDA - "mpidensecuda" - A matrix type to be used for distributed dense matrices on
26 GPUs.
27
28 Options Database Key:
29 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to
30 `MatSetFromOptions()`
31
32 Level: beginner
33
34 .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MATMPIDENSE`, `MATSEQDENSE`,
35 `MATSEQDENSECUDA`, `MATSEQDENSEHIP`
36 M*/
MatCreate_MPIDenseCUDA(Mat A)37 PETSC_INTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat A)
38 {
39 PetscFunctionBegin;
40 PetscCall(mat_cupm.Create(A));
41 PetscFunctionReturn(PETSC_SUCCESS);
42 }
43
MatConvert_MPIDense_MPIDenseCUDA(Mat A,MatType type,MatReuse reuse,Mat * ret)44 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat A, MatType type, MatReuse reuse, Mat *ret)
45 {
46 PetscFunctionBegin;
47 PetscCall(mat_cupm.Convert_MPIDense_MPIDenseCUPM(A, type, reuse, ret));
48 PetscFunctionReturn(PETSC_SUCCESS);
49 }
50
51 /*@C
52 MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA.
53
54 Collective
55
56 Input Parameters:
57 + comm - MPI communicator
58 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
59 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
60 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given)
61 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given)
62 - data - optional location of GPU matrix data. Pass `NULL` to have PETSc to control matrix memory allocation.
63
64 Output Parameter:
65 . A - the matrix
66
67 Level: intermediate
68
69 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()`
70 @*/
MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar * data,Mat * A)71 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A)
72 {
73 PetscFunctionBegin;
74 PetscCall(MatCreateDenseCUPM<DeviceType::CUDA>(comm, m, n, M, N, data, A));
75 PetscFunctionReturn(PETSC_SUCCESS);
76 }
77
78 /*@C
79 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an
80 array provided by the user. This is useful to avoid copying an array into a matrix.
81
82 Not Collective
83
84 Input Parameters:
85 + mat - the matrix
86 - array - the array in column major order
87
88 Level: developer
89
90 Note:
91 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
92
93 You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is
94 responsible for freeing this array; it will not be freed when the matrix is destroyed. The
95 array must have been allocated with `cudaMalloc()`.
96
97 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`,
98 `MatDenseCUDAReplaceArray()`
99 @*/
MatDenseCUDAPlaceArray(Mat mat,const PetscScalar * array)100 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array)
101 {
102 PetscFunctionBegin;
103 PetscCall(MatDenseCUPMPlaceArray<DeviceType::CUDA>(mat, array));
104 PetscFunctionReturn(PETSC_SUCCESS);
105 }
106
107 /*@C
108 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to
109 `MatDenseCUDAPlaceArray()`
110
111 Not Collective
112
113 Input Parameter:
114 . mat - the matrix
115
116 Level: developer
117
118 Note:
119 You can only call this after a call to `MatDenseCUDAPlaceArray()`
120
121 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
122 @*/
MatDenseCUDAResetArray(Mat mat)123 PetscErrorCode MatDenseCUDAResetArray(Mat mat)
124 {
125 PetscFunctionBegin;
126 PetscCall(MatDenseCUPMResetArray<DeviceType::CUDA>(mat));
127 PetscFunctionReturn(PETSC_SUCCESS);
128 }
129
130 /*@C
131 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix
132 with an array provided by the user. This is useful to avoid copying an array into a matrix.
133
134 Not Collective
135
136 Input Parameters:
137 + mat - the matrix
138 - array - the array in column major order
139
140 Level: developer
141
142 Note:
143 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`.
144
145 This permanently replaces the GPU array and frees the memory associated with the old GPU
146 array. The memory passed in CANNOT be freed by the user. It will be freed when the matrix is
147 destroyed. The array should respect the matrix leading dimension.
148
149 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
150 @*/
MatDenseCUDAReplaceArray(Mat mat,const PetscScalar * array)151 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array)
152 {
153 PetscFunctionBegin;
154 PetscCall(MatDenseCUPMReplaceArray<DeviceType::CUDA>(mat, array));
155 PetscFunctionReturn(PETSC_SUCCESS);
156 }
157
158 /*@C
159 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA`
160 matrix.
161
162 Not Collective
163
164 Input Parameter:
165 . A - the matrix
166
167 Output Parameter:
168 . a - the GPU array in column major order
169
170 Level: developer
171
172 Notes:
173 The data on the GPU may not be updated due to operations done on the CPU. If you need updated
174 data, use `MatDenseCUDAGetArray()`.
175
176 The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed.
177
178 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
179 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`,
180 `MatDenseCUDARestoreArrayRead()`
181 @*/
MatDenseCUDAGetArrayWrite(Mat A,PetscScalar ** a)182 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
183 {
184 PetscFunctionBegin;
185 PetscCall(MatDenseCUPMGetArrayWrite<DeviceType::CUDA>(A, a));
186 PetscFunctionReturn(PETSC_SUCCESS);
187 }
188
189 /*@C
190 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a
191 `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`.
192
193 Not Collective
194
195 Input Parameters:
196 + A - the matrix
197 - a - the GPU array in column major order
198
199 Level: developer
200
201 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
202 `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
203 @*/
MatDenseCUDARestoreArrayWrite(Mat A,PetscScalar ** a)204 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
205 {
206 PetscFunctionBegin;
207 PetscCall(MatDenseCUPMRestoreArrayWrite<DeviceType::CUDA>(A, a));
208 PetscFunctionReturn(PETSC_SUCCESS);
209 }
210
211 /*@C
212 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a
213 `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when
214 no longer needed.
215
216 Not Collective
217
218 Input Parameter:
219 . A - the matrix
220
221 Output Parameter:
222 . a - the GPU array in column major order
223
224 Level: developer
225
226 Note:
227 Data may be copied to the GPU due to operations done on the CPU. If you need write only
228 access, use `MatDenseCUDAGetArrayWrite()`.
229
230 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
231 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
232 `MatDenseCUDARestoreArrayRead()`
233 @*/
MatDenseCUDAGetArrayRead(Mat A,const PetscScalar ** a)234 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
235 {
236 PetscFunctionBegin;
237 PetscCall(MatDenseCUPMGetArrayRead<DeviceType::CUDA>(A, a));
238 PetscFunctionReturn(PETSC_SUCCESS);
239 }
240
241 /*@C
242 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a
243 `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`.
244
245 Not Collective
246
247 Input Parameters:
248 + A - the matrix
249 - a - the GPU array in column major order
250
251 Level: developer
252
253 Note:
254 Data can be copied to the GPU due to operations done on the CPU. If you need write only
255 access, use `MatDenseCUDAGetArrayWrite()`.
256
257 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`,
258 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
259 @*/
MatDenseCUDARestoreArrayRead(Mat A,const PetscScalar ** a)260 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
261 {
262 PetscFunctionBegin;
263 PetscCall(MatDenseCUPMRestoreArrayRead<DeviceType::CUDA>(A, a));
264 PetscFunctionReturn(PETSC_SUCCESS);
265 }
266
267 /*@C
268 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The
269 array must be restored with `MatDenseCUDARestoreArray()` when no longer needed.
270
271 Not Collective
272
273 Input Parameter:
274 . A - the matrix
275
276 Output Parameter:
277 . a - the GPU array in column major order
278
279 Level: developer
280
281 Note:
282 Data can be copied to the GPU due to operations done on the CPU. If you need write only
283 access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use
284 `MatDenseCUDAGetArrayRead()`.
285
286 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`,
287 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`,
288 `MatDenseCUDARestoreArrayRead()`
289 @*/
MatDenseCUDAGetArray(Mat A,PetscScalar ** a)290 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
291 {
292 PetscFunctionBegin;
293 PetscCall(MatDenseCUPMGetArray<DeviceType::CUDA>(A, a));
294 PetscFunctionReturn(PETSC_SUCCESS);
295 }
296
297 /*@C
298 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix
299 previously obtained with `MatDenseCUDAGetArray()`.
300
301 Not Collective
302
303 Level: developer
304
305 Input Parameters:
306 + A - the matrix
307 - a - the GPU array in column major order
308
309 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`,
310 `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
311 @*/
MatDenseCUDARestoreArray(Mat A,PetscScalar ** a)312 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
313 {
314 PetscFunctionBegin;
315 PetscCall(MatDenseCUPMRestoreArray<DeviceType::CUDA>(A, a));
316 PetscFunctionReturn(PETSC_SUCCESS);
317 }
318
319 /*@C
320 MatDenseCUDASetPreallocation - Set the device array used for storing the matrix elements of a
321 `MATDENSECUDA` matrix
322
323 Collective
324
325 Input Parameters:
326 + A - the matrix
327 - device_array - the array (or `NULL`)
328
329 Level: intermediate
330
331 .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDenseCUDA()`,
332 `MatSetValues()`, `MatDenseSetLDA()`
333 @*/
MatDenseCUDASetPreallocation(Mat A,PetscScalar * device_array)334 PetscErrorCode MatDenseCUDASetPreallocation(Mat A, PetscScalar *device_array)
335 {
336 PetscFunctionBegin;
337 PetscCall(MatDenseCUPMSetPreallocation<DeviceType::CUDA>(A, device_array));
338 PetscFunctionReturn(PETSC_SUCCESS);
339 }
340