1 #include "../matmpidensecupm.hpp" 2 3 using namespace Petsc::mat::cupm; 4 using Petsc::device::cupm::DeviceType; 5 6 static constexpr impl::MatDense_MPI_CUPM<DeviceType::CUDA> mat_cupm{}; 7 8 /*MC 9 MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs. 10 11 This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process 12 communicator, and `MATMPIDENSECUDA` otherwise. 13 14 Options Database Key: 15 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to 16 `MatSetFromOptions()` 17 18 Level: beginner 19 20 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, 21 `MATMPIDENSEHIP`, `MATDENSE` 22 M*/ 23 24 /*MC 25 MATMPIDENSECUDA - "mpidensecuda" - A matrix type to be used for distributed dense matrices on 26 GPUs. 27 28 Options Database Key: 29 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to 30 `MatSetFromOptions()` 31 32 Level: beginner 33 34 .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MATMPIDENSE`, `MATSEQDENSE`, 35 `MATSEQDENSECUDA`, `MATSEQDENSEHIP` 36 M*/ 37 PETSC_INTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat A) 38 { 39 PetscFunctionBegin; 40 PetscCall(mat_cupm.Create(A)); 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat A, MatType type, MatReuse reuse, Mat *ret) 45 { 46 PetscFunctionBegin; 47 PetscCall(mat_cupm.Convert_MPIDense_MPIDenseCUPM(A, type, reuse, ret)); 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 51 /*@C 52 MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA. 53 54 Collective 55 56 Input Parameters: 57 + comm - MPI communicator 58 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 59 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 60 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 61 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 62 - data - optional location of GPU matrix data. Pass `NULL` to have PETSc to control matrix memory allocation. 63 64 Output Parameter: 65 . A - the matrix 66 67 Level: intermediate 68 69 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()` 70 @*/ 71 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 72 { 73 PetscFunctionBegin; 74 PetscCall(MatCreateDenseCUPM<DeviceType::CUDA>(comm, m, n, M, N, data, A)); 75 PetscFunctionReturn(PETSC_SUCCESS); 76 } 77 78 /*@C 79 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 80 array provided by the user. This is useful to avoid copying an array into a matrix. 81 82 Not Collective 83 84 Input Parameters: 85 + mat - the matrix 86 - array - the array in column major order 87 88 Level: developer 89 90 Note: 91 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 92 93 You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is 94 responsible for freeing this array; it will not be freed when the matrix is destroyed. The 95 array must have been allocated with `cudaMalloc()`. 96 97 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, 98 `MatDenseCUDAReplaceArray()` 99 @*/ 100 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) 101 { 102 PetscFunctionBegin; 103 PetscCall(MatDenseCUPMPlaceArray<DeviceType::CUDA>(mat, array)); 104 PetscFunctionReturn(PETSC_SUCCESS); 105 } 106 107 /*@C 108 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to 109 `MatDenseCUDAPlaceArray()` 110 111 Not Collective 112 113 Input Parameter: 114 . mat - the matrix 115 116 Level: developer 117 118 Note: 119 You can only call this after a call to `MatDenseCUDAPlaceArray()` 120 121 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 122 @*/ 123 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 124 { 125 PetscFunctionBegin; 126 PetscCall(MatDenseCUPMResetArray<DeviceType::CUDA>(mat)); 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 /*@C 131 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix 132 with an array provided by the user. This is useful to avoid copying an array into a matrix. 133 134 Not Collective 135 136 Input Parameters: 137 + mat - the matrix 138 - array - the array in column major order 139 140 Level: developer 141 142 Note: 143 Adding `const` to `array` was an oversight, see notes in `VecPlaceArray()`. 144 145 This permanently replaces the GPU array and frees the memory associated with the old GPU 146 array. The memory passed in CANNOT be freed by the user. It will be freed when the matrix is 147 destroyed. The array should respect the matrix leading dimension. 148 149 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 150 @*/ 151 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) 152 { 153 PetscFunctionBegin; 154 PetscCall(MatDenseCUPMReplaceArray<DeviceType::CUDA>(mat, array)); 155 PetscFunctionReturn(PETSC_SUCCESS); 156 } 157 158 /*@C 159 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` 160 matrix. 161 162 Not Collective 163 164 Input Parameter: 165 . A - the matrix 166 167 Output Parameter: 168 . a - the GPU array in column major order 169 170 Level: developer 171 172 Notes: 173 The data on the GPU may not be updated due to operations done on the CPU. If you need updated 174 data, use `MatDenseCUDAGetArray()`. 175 176 The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed. 177 178 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, 179 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, 180 `MatDenseCUDARestoreArrayRead()` 181 @*/ 182 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 183 { 184 PetscFunctionBegin; 185 PetscCall(MatDenseCUPMGetArrayWrite<DeviceType::CUDA>(A, a)); 186 PetscFunctionReturn(PETSC_SUCCESS); 187 } 188 189 /*@C 190 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a 191 `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`. 192 193 Not Collective 194 195 Input Parameters: 196 + A - the matrix 197 - a - the GPU array in column major order 198 199 Level: developer 200 201 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, 202 `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 203 @*/ 204 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 205 { 206 PetscFunctionBegin; 207 PetscCall(MatDenseCUPMRestoreArrayWrite<DeviceType::CUDA>(A, a)); 208 PetscFunctionReturn(PETSC_SUCCESS); 209 } 210 211 /*@C 212 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a 213 `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when 214 no longer needed. 215 216 Not Collective 217 218 Input Parameter: 219 . A - the matrix 220 221 Output Parameter: 222 . a - the GPU array in column major order 223 224 Level: developer 225 226 Note: 227 Data may be copied to the GPU due to operations done on the CPU. If you need write only 228 access, use `MatDenseCUDAGetArrayWrite()`. 229 230 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, 231 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, 232 `MatDenseCUDARestoreArrayRead()` 233 @*/ 234 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 235 { 236 PetscFunctionBegin; 237 PetscCall(MatDenseCUPMGetArrayRead<DeviceType::CUDA>(A, a)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 /*@C 242 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a 243 `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`. 244 245 Not Collective 246 247 Input Parameters: 248 + A - the matrix 249 - a - the GPU array in column major order 250 251 Level: developer 252 253 Note: 254 Data can be copied to the GPU due to operations done on the CPU. If you need write only 255 access, use `MatDenseCUDAGetArrayWrite()`. 256 257 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, 258 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 259 @*/ 260 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 261 { 262 PetscFunctionBegin; 263 PetscCall(MatDenseCUPMRestoreArrayRead<DeviceType::CUDA>(A, a)); 264 PetscFunctionReturn(PETSC_SUCCESS); 265 } 266 267 /*@C 268 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The 269 array must be restored with `MatDenseCUDARestoreArray()` when no longer needed. 270 271 Not Collective 272 273 Input Parameter: 274 . A - the matrix 275 276 Output Parameter: 277 . a - the GPU array in column major order 278 279 Level: developer 280 281 Note: 282 Data can be copied to the GPU due to operations done on the CPU. If you need write only 283 access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use 284 `MatDenseCUDAGetArrayRead()`. 285 286 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, 287 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, 288 `MatDenseCUDARestoreArrayRead()` 289 @*/ 290 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 291 { 292 PetscFunctionBegin; 293 PetscCall(MatDenseCUPMGetArray<DeviceType::CUDA>(A, a)); 294 PetscFunctionReturn(PETSC_SUCCESS); 295 } 296 297 /*@C 298 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix 299 previously obtained with `MatDenseCUDAGetArray()`. 300 301 Not Collective 302 303 Level: developer 304 305 Input Parameters: 306 + A - the matrix 307 - a - the GPU array in column major order 308 309 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, 310 `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 311 @*/ 312 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 313 { 314 PetscFunctionBegin; 315 PetscCall(MatDenseCUPMRestoreArray<DeviceType::CUDA>(A, a)); 316 PetscFunctionReturn(PETSC_SUCCESS); 317 } 318 319 /*@C 320 MatDenseCUDASetPreallocation - Set the device array used for storing the matrix elements of a 321 `MATDENSECUDA` matrix 322 323 Collective 324 325 Input Parameters: 326 + A - the matrix 327 - device_array - the array (or `NULL`) 328 329 Level: intermediate 330 331 .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDenseCUDA()`, 332 `MatSetValues()`, `MatDenseSetLDA()` 333 @*/ 334 PetscErrorCode MatDenseCUDASetPreallocation(Mat A, PetscScalar *device_array) 335 { 336 PetscFunctionBegin; 337 PetscCall(MatDenseCUPMSetPreallocation<DeviceType::CUDA>(A, device_array)); 338 PetscFunctionReturn(PETSC_SUCCESS); 339 } 340