1 #include "../matmpidensecupm.hpp" 2 3 using namespace Petsc::mat::cupm; 4 using Petsc::device::cupm::DeviceType; 5 6 static constexpr impl::MatDense_MPI_CUPM<DeviceType::CUDA> mat_cupm{}; 7 8 /*MC 9 MATDENSECUDA - "densecuda" - A matrix type to be used for dense matrices on GPUs. 10 11 This matrix type is identical to `MATSEQDENSECUDA` when constructed with a single process 12 communicator, and `MATMPIDENSECUDA` otherwise. 13 14 Options Database Key: 15 . -mat_type densecuda - sets the matrix type to `MATDENSECUDA` during a call to 16 `MatSetFromOptions()` 17 18 Level: beginner 19 20 .seealso: [](ch_matrices), `Mat`, `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATSEQDENSEHIP`, 21 `MATMPIDENSEHIP`, `MATDENSE` 22 M*/ 23 24 /*MC 25 MATMPIDENSECUDA - "mpidensecuda" - A matrix type to be used for distributed dense matrices on 26 GPUs. 27 28 Options Database Key: 29 . -mat_type mpidensecuda - sets the matrix type to `MATMPIDENSECUDA` during a call to 30 `MatSetFromOptions()` 31 32 Level: beginner 33 34 .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MATMPIDENSE`, `MATSEQDENSE`, 35 `MATSEQDENSECUDA`, `MATSEQDENSEHIP` 36 M*/ 37 PETSC_INTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat A) 38 { 39 PetscFunctionBegin; 40 PetscCall(mat_cupm.Create(A)); 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat A, MatType type, MatReuse reuse, Mat *ret) 45 { 46 PetscFunctionBegin; 47 PetscCall(mat_cupm.Convert_MPIDense_MPIDenseCUPM(A, type, reuse, ret)); 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 51 /*@C 52 MatCreateDenseCUDA - Creates a matrix in `MATDENSECUDA` format using CUDA. 53 54 Collective 55 56 Input Parameters: 57 + comm - MPI communicator 58 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 59 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 60 . M - number of global rows (or `PETSC_DECIDE` to have calculated if `m` is given) 61 . N - number of global columns (or `PETSC_DECIDE` to have calculated if `n` is given) 62 - data - optional location of GPU matrix data. Pass `NULL` to have PETSc to control matrix memory allocation. 63 64 Output Parameter: 65 . A - the matrix 66 67 Level: intermediate 68 69 .seealso: `MATDENSECUDA`, `MatCreate()`, `MatCreateDense()` 70 @*/ 71 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscScalar *data, Mat *A) 72 { 73 PetscFunctionBegin; 74 PetscCall(MatCreateDenseCUPM<DeviceType::CUDA>(comm, m, n, M, N, data, A)); 75 PetscFunctionReturn(PETSC_SUCCESS); 76 } 77 78 /*@C 79 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix with an 80 array provided by the user. This is useful to avoid copying an array into a matrix. 81 82 Not Collective 83 84 Input Parameters: 85 + mat - the matrix 86 - array - the array in column major order 87 88 Level: developer 89 90 Note: 91 You can return to the original array with a call to `MatDenseCUDAResetArray()`. The user is 92 responsible for freeing this array; it will not be freed when the matrix is destroyed. The 93 array must have been allocated with `cudaMalloc()`. 94 95 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`, 96 `MatDenseCUDAReplaceArray()` 97 @*/ 98 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat, const PetscScalar *array) 99 { 100 PetscFunctionBegin; 101 PetscCall(MatDenseCUPMPlaceArray<DeviceType::CUDA>(mat, array)); 102 PetscFunctionReturn(PETSC_SUCCESS); 103 } 104 105 /*@C 106 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to 107 `MatDenseCUDAPlaceArray()` 108 109 Not Collective 110 111 Input Parameter: 112 . mat - the matrix 113 114 Level: developer 115 116 Note: 117 You can only call this after a call to `MatDenseCUDAPlaceArray()` 118 119 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 120 @*/ 121 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 122 { 123 PetscFunctionBegin; 124 PetscCall(MatDenseCUPMResetArray<DeviceType::CUDA>(mat)); 125 PetscFunctionReturn(PETSC_SUCCESS); 126 } 127 128 /*@C 129 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a `MATDENSECUDA` matrix 130 with an array provided by the user. This is useful to avoid copying an array into a matrix. 131 132 Not Collective 133 134 Input Parameters: 135 + mat - the matrix 136 - array - the array in column major order 137 138 Level: developer 139 140 Note: 141 This permanently replaces the GPU array and frees the memory associated with the old GPU 142 array. The memory passed in CANNOT be freed by the user. It will be freed when the matrix is 143 destroyed. The array should respect the matrix leading dimension. 144 145 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 146 @*/ 147 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat, const PetscScalar *array) 148 { 149 PetscFunctionBegin; 150 PetscCall(MatDenseCUPMReplaceArray<DeviceType::CUDA>(mat, array)); 151 PetscFunctionReturn(PETSC_SUCCESS); 152 } 153 154 /*@C 155 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a `MATDENSECUDA` 156 matrix. 157 158 Not Collective 159 160 Input Parameter: 161 . A - the matrix 162 163 Output Parameter: 164 . a - the GPU array in column major order 165 166 Level: developer 167 168 Notes: 169 The data on the GPU may not be updated due to operations done on the CPU. If you need updated 170 data, use `MatDenseCUDAGetArray()`. 171 172 The array must be restored with `MatDenseCUDARestoreArrayWrite()` when no longer needed. 173 174 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, 175 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, 176 `MatDenseCUDARestoreArrayRead()` 177 @*/ 178 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 179 { 180 PetscFunctionBegin; 181 PetscCall(MatDenseCUPMGetArrayWrite<DeviceType::CUDA>(A, a)); 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 /*@C 186 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a 187 `MATDENSECUDA` matrix previously obtained with `MatDenseCUDAGetArrayWrite()`. 188 189 Not Collective 190 191 Input Parameters: 192 + A - the matrix 193 - a - the GPU array in column major order 194 195 Level: developer 196 197 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, 198 `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 199 @*/ 200 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 201 { 202 PetscFunctionBegin; 203 PetscCall(MatDenseCUPMRestoreArrayWrite<DeviceType::CUDA>(A, a)); 204 PetscFunctionReturn(PETSC_SUCCESS); 205 } 206 207 /*@C 208 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a 209 `MATDENSECUDA` matrix. The array must be restored with `MatDenseCUDARestoreArrayRead()` when 210 no longer needed. 211 212 Not Collective 213 214 Input Parameter: 215 . A - the matrix 216 217 Output Parameter: 218 . a - the GPU array in column major order 219 220 Level: developer 221 222 Note: 223 Data may be copied to the GPU due to operations done on the CPU. If you need write only 224 access, use `MatDenseCUDAGetArrayWrite()`. 225 226 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, 227 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, 228 `MatDenseCUDARestoreArrayRead()` 229 @*/ 230 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 231 { 232 PetscFunctionBegin; 233 PetscCall(MatDenseCUPMGetArrayRead<DeviceType::CUDA>(A, a)); 234 PetscFunctionReturn(PETSC_SUCCESS); 235 } 236 237 /*@C 238 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a 239 `MATDENSECUDA` matrix previously obtained with a call to `MatDenseCUDAGetArrayRead()`. 240 241 Not Collective 242 243 Input Parameters: 244 + A - the matrix 245 - a - the GPU array in column major order 246 247 Level: developer 248 249 Note: 250 Data can be copied to the GPU due to operations done on the CPU. If you need write only 251 access, use `MatDenseCUDAGetArrayWrite()`. 252 253 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, 254 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 255 @*/ 256 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 257 { 258 PetscFunctionBegin; 259 PetscCall(MatDenseCUPMRestoreArrayRead<DeviceType::CUDA>(A, a)); 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 /*@C 264 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a `MATDENSECUDA` matrix. The 265 array must be restored with `MatDenseCUDARestoreArray()` when no longer needed. 266 267 Not Collective 268 269 Input Parameter: 270 . A - the matrix 271 272 Output Parameter: 273 . a - the GPU array in column major order 274 275 Level: developer 276 277 Note: 278 Data can be copied to the GPU due to operations done on the CPU. If you need write only 279 access, use `MatDenseCUDAGetArrayWrite()`. For read-only access, use 280 `MatDenseCUDAGetArrayRead()`. 281 282 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, 283 `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, 284 `MatDenseCUDARestoreArrayRead()` 285 @*/ 286 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 287 { 288 PetscFunctionBegin; 289 PetscCall(MatDenseCUPMGetArray<DeviceType::CUDA>(A, a)); 290 PetscFunctionReturn(PETSC_SUCCESS); 291 } 292 293 /*@C 294 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a `MATDENSECUDA` matrix 295 previously obtained with `MatDenseCUDAGetArray()`. 296 297 Not Collective 298 299 Level: developer 300 301 Input Parameters: 302 + A - the matrix 303 - a - the GPU array in column major order 304 305 .seealso: `MATDENSECUDA`, `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, 306 `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 307 @*/ 308 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 309 { 310 PetscFunctionBegin; 311 PetscCall(MatDenseCUPMRestoreArray<DeviceType::CUDA>(A, a)); 312 PetscFunctionReturn(PETSC_SUCCESS); 313 } 314 315 /*@C 316 MatDenseCUDASetPreallocation - Set the device array used for storing the matrix elements of a 317 `MATDENSECUDA` matrix 318 319 Collective 320 321 Input Parameters: 322 + A - the matrix 323 - device_array - the array (or `NULL`) 324 325 Level: intermediate 326 327 .seealso: [](ch_matrices), `Mat`, `MATDENSECUDA`, `MatCreate()`, `MatCreateDenseCUDA()`, 328 `MatSetValues()`, `MatDenseSetLDA()` 329 @*/ 330 PetscErrorCode MatDenseCUDASetPreallocation(Mat A, PetscScalar *device_array) 331 { 332 PetscFunctionBegin; 333 PetscCall(MatDenseCUPMSetPreallocation<DeviceType::CUDA>(A, device_array)); 334 PetscFunctionReturn(PETSC_SUCCESS); 335 } 336