1 #pragma once 2 3 #include <petscdevicetypes.h> 4 #include <petscviewertypes.h> 5 6 #if PETSC_CPP_VERSION >= 11 // C++11 7 #define PETSC_DEVICE_ALIGNOF(...) alignof(decltype(__VA_ARGS__)) 8 #elif PETSC_C_VERSION >= 11 // C11 9 #ifdef __GNUC__ 10 #define PETSC_DEVICE_ALIGNOF(...) _Alignof(__typeof__(__VA_ARGS__)) 11 #else 12 #include <stddef.h> // max_align_t 13 // Note we cannot just do _Alignof(expression) since clang warns that "'_Alignof' applied to an 14 // expression is a GNU extension", so we just default to max_align_t which is ultra safe 15 #define PETSC_DEVICE_ALIGNOF(...) _Alignof(max_align_t) 16 #endif // __GNUC__ 17 #else 18 #define PETSC_DEVICE_ALIGNOF(...) PETSC_MEMALIGN 19 #endif 20 21 /* SUBMANSEC = Sys */ 22 23 // REVIEW ME: this should probably go somewhere better, configure-time? 24 #define PETSC_HAVE_HOST 1 25 26 /* logging support */ 27 PETSC_EXTERN PetscClassId PETSC_DEVICE_CLASSID; 28 PETSC_EXTERN PetscClassId PETSC_DEVICE_CONTEXT_CLASSID; 29 30 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void); 31 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void); 32 PETSC_EXTERN PetscErrorCode PetscGetMemType(const void *, PetscMemType *); 33 34 /* PetscDevice */ 35 #if PetscDefined(HAVE_CXX) 36 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType, PetscInt, PetscDevice *); 37 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice *); 38 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice); 39 PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice, PetscViewer); 40 PETSC_EXTERN PetscErrorCode PetscDeviceGetType(PetscDevice, PetscDeviceType *); 41 PETSC_EXTERN PetscErrorCode PetscDeviceGetDeviceId(PetscDevice, PetscInt *); 42 PETSC_EXTERN PetscDeviceType PETSC_DEVICE_DEFAULT(void); 43 PETSC_EXTERN PetscErrorCode PetscDeviceSetDefaultDeviceType(PetscDeviceType); 44 PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType); 45 PETSC_EXTERN PetscBool PetscDeviceInitialized(PetscDeviceType); 46 #else 47 #define PetscDeviceCreate(PetscDeviceType, PetscInt, dev) (*(dev) = PETSC_NULLPTR, PETSC_SUCCESS) 48 #define PetscDeviceDestroy(dev) (*(dev) = PETSC_NULLPTR, PETSC_SUCCESS) 49 #define PetscDeviceConfigure(PetscDevice) PETSC_SUCCESS 50 #define PetscDeviceView(PetscDevice, PetscViewer) PETSC_SUCCESS 51 #define PetscDeviceGetType(PetscDevice, type) (*(type) = PETSC_DEVICE_DEFAULT(), PETSC_SUCCESS) 52 #define PetscDeviceGetDeviceId(PetscDevice, id) (*(id) = 0, PETSC_SUCCESS) 53 #define PETSC_DEVICE_DEFAULT() PETSC_DEVICE_HOST 54 #define PetscDeviceSetDefaultDeviceType(PetscDeviceType) PETSC_SUCCESS 55 #define PetscDeviceInitialize(PetscDeviceType) PETSC_SUCCESS 56 #define PetscDeviceInitialized(dtype) ((dtype) == PETSC_DEVICE_HOST) 57 #endif /* PetscDefined(HAVE_CXX) */ 58 59 /* PetscDeviceContext */ 60 #if PetscDefined(HAVE_CXX) 61 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *); 62 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *); 63 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType); 64 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext, PetscStreamType *); 65 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice); 66 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext, PetscDevice *); 67 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext, PetscDeviceType *); 68 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext); 69 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext, PetscDeviceContext *); 70 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext, PetscBool *); 71 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext, PetscDeviceContext); 72 PETSC_EXTERN PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext, PetscStreamType, PetscInt, PetscDeviceContext **); 73 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext, PetscInt, PetscDeviceContext **); 74 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContext **); 75 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext); 76 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm, PetscDeviceContext); 77 PETSC_EXTERN PetscErrorCode PetscDeviceContextView(PetscDeviceContext, PetscViewer); 78 PETSC_EXTERN PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext, PetscObject, const char name[]); 79 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *); 80 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext); 81 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext, void **); 82 #else 83 #define PetscDeviceContextCreate(dctx) (*(dctx) = PETSC_NULLPTR, PETSC_SUCCESS) 84 #define PetscDeviceContextDestroy(dctx) (*(dctx) = PETSC_NULLPTR, PETSC_SUCCESS) 85 #define PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType) PETSC_SUCCESS 86 #define PetscDeviceContextGetStreamType(PetscDeviceContext, type) (*(type) = PETSC_STREAM_DEFAULT, PETSC_SUCCESS) 87 #define PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice) PETSC_SUCCESS 88 #define PetscDeviceContextGetDevice(PetscDeviceContext, device) (*(device) = PETSC_NULLPTR, PETSC_SUCCESS) 89 #define PetscDeviceContextGetDeviceType(PetscDeviceContext, type) (*(type) = PETSC_DEVICE_DEFAULT(), PETSC_SUCCESS) 90 #define PetscDeviceContextSetUp(PetscDeviceContext) PETSC_SUCCESS 91 #define PetscDeviceContextDuplicate(PetscDeviceContextl, PetscDeviceContextr) (*(PetscDeviceContextr) = PETSC_NULLPTR, PETSC_SUCCESS) 92 #define PetscDeviceContextQueryIdle(PetscDeviceContext, idle) (*(idle) = PETSC_TRUE, PETSC_SUCCESS) 93 #define PetscDeviceContextWaitForContext(PetscDeviceContextl, PetscDeviceContextr) PETSC_SUCCESS 94 #define PetscDeviceContextForkWithStreamType(PetscDeviceContextp, PetscStreamType, PetscInt, PetscDeviceContextc) (*(PetscDeviceContextc) = PETSC_NULLPTR, PETSC_SUCCESS) 95 #define PetscDeviceContextFork(PetscDeviceContextp, PetscInt, PetscDeviceContextc) (*(PetscDeviceContextc) = PETSC_NULLPTR, PETSC_SUCCESS) 96 #define PetscDeviceContextJoin(PetscDeviceContextp, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContextc) (*(PetscDeviceContextc) = PETSC_NULLPTR, PETSC_SUCCESS) 97 #define PetscDeviceContextSynchronize(PetscDeviceContext) PETSC_SUCCESS 98 #define PetscDeviceContextSetFromOptions(MPI_Comm, PetscDeviceContext) PETSC_SUCCESS 99 #define PetscDeviceContextView(PetscDeviceContext, PetscViewer) PETSC_SUCCESS 100 #define PetscDeviceContextViewFromOptions(PetscDeviceContext, PetscObject, PetscViewer) PETSC_SUCCESS 101 #define PetscDeviceContextGetCurrentContext(dctx) (*(dctx) = PETSC_NULLPTR, PETSC_SUCCESS) 102 #define PetscDeviceContextSetCurrentContext(PetscDeviceContext) PETSC_SUCCESS 103 #define PetscDeviceContextGetStreamHandle(PetscDeviceContext, handle) (*(handle) = PETSC_NULLPTR, PETSC_SUCCESS) 104 #endif /* PetscDefined(HAVE_CXX) */ 105 106 /* memory */ 107 #if PetscDefined(HAVE_CXX) 108 PETSC_EXTERN PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext, PetscBool, PetscMemType, size_t, size_t, void **PETSC_RESTRICT); 109 PETSC_EXTERN PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext, void *PETSC_RESTRICT); 110 PETSC_EXTERN PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext, void *PETSC_RESTRICT, const void *PETSC_RESTRICT, size_t); 111 PETSC_EXTERN PetscErrorCode PetscDeviceMemset(PetscDeviceContext, void *PETSC_RESTRICT, PetscInt, size_t); 112 #else 113 #include <string.h> // memset() 114 #define PetscDeviceAllocate_Private(PetscDeviceContext, clear, PetscMemType, size, alignment, ptr) PetscMallocA(1, (clear), __LINE__, PETSC_FUNCTION_NAME, __FILE__, (size), (ptr)) 115 #define PetscDeviceDeallocate_Private(PetscDeviceContext, ptr) PetscFree((ptr)) 116 #define PetscDeviceMemcpy(PetscDeviceContext, dest, src, size) PetscMemcpy((dest), (src), (size)) 117 #define PetscDeviceMemset(PetscDeviceContext, ptr, v, size) ((void)memset((ptr), (unsigned char)(v), (size)), PETSC_SUCCESS) 118 #endif /* PetscDefined(HAVE_CXX) */ 119 120 /*MC 121 PetscDeviceMalloc - Allocate device-aware memory 122 123 Synopsis: 124 #include <petscdevice.h> 125 PetscErrorCode PetscDeviceMalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr) 126 127 Not Collective, Asynchronous, Auto-dependency aware 128 129 Input Parameters: 130 + dctx - The `PetscDeviceContext` used to allocate the memory 131 . mtype - The type of memory to allocate 132 - n - The amount (in elements) to allocate 133 134 Output Parameter: 135 . ptr - The pointer to store the result in 136 137 Level: beginner 138 139 Notes: 140 Memory allocated with this function must be freed with `PetscDeviceFree()`. 141 142 If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`. 143 144 This routine falls back to using `PetscMalloc1()` if PETSc was not configured with device 145 support. The user should note that `mtype` is ignored in this case, as `PetscMalloc1()` 146 allocates only host memory. 147 148 This routine uses the `sizeof()` of the memory type requested to determine the total memory 149 to be allocated, therefore you should not multiply the number of elements requested by the 150 `sizeof()` the type\: 151 152 .vb 153 PetscInt *arr; 154 155 // correct 156 PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n,&arr); 157 158 // incorrect 159 PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n*sizeof(*arr),&arr); 160 .ve 161 162 Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate 163 its value on function return, i.e.\: 164 165 .vb 166 PetscInt *ptr; 167 168 PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, 20, &ptr); 169 170 PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize 171 172 ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization 173 .ve 174 175 DAG representation: 176 .vb 177 time -> 178 179 -> dctx - |= CALL =| -\- dctx --> 180 \- ptr -> 181 .ve 182 183 .N ASYNC_API 184 185 .seealso: `PetscDeviceFree()`, `PetscDeviceCalloc()`, `PetscDeviceArrayCopy()`, 186 `PetscDeviceArrayZero()` 187 M*/ 188 #define PetscDeviceMalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_FALSE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr)) 189 190 /*MC 191 PetscDeviceCalloc - Allocate zeroed device-aware memory 192 193 Synopsis: 194 #include <petscdevice.h> 195 PetscErrorCode PetscDeviceCalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr) 196 197 Not Collective, Asynchronous, Auto-dependency aware 198 199 Input Parameters: 200 + dctx - The `PetscDeviceContext` used to allocate the memory 201 . mtype - The type of memory to allocate 202 - n - The amount (in elements) to allocate 203 204 Output Parameter: 205 . ptr - The pointer to store the result in 206 207 Level: beginner 208 209 Notes: 210 Has identical usage to `PetscDeviceMalloc()` except that the memory is zeroed before it is 211 returned. See `PetscDeviceMalloc()` for further discussion. 212 213 This routine falls back to using `PetscCalloc1()` if PETSc was not configured with device 214 support. The user should note that `mtype` is ignored in this case, as `PetscCalloc1()` 215 allocates only host memory. 216 217 .N ASYNC_API 218 219 .seealso: `PetscDeviceFree()`, `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, 220 `PetscDeviceArrayZero()` 221 M*/ 222 #define PetscDeviceCalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_TRUE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr)) 223 224 /*MC 225 PetscDeviceFree - Free device-aware memory obtained with `PetscDeviceMalloc()` or `PetscDeviceCalloc()` 226 227 Synopsis: 228 #include <petscdevice.h> 229 PetscErrorCode PetscDeviceFree(PetscDeviceContext dctx, void *ptr) 230 231 Not Collective, Asynchronous, Auto-dependency aware 232 233 Input Parameters: 234 + dctx - The `PetscDeviceContext` used to free the memory 235 - ptr - The pointer to free, may be `NULL` 236 237 Level: beginner 238 239 Notes: 240 `ptr` is set to `PETSC_NULLPTR` on successful deallocation. 241 242 `ptr` must have been allocated using `PetscDeviceMalloc()`, `PetscDeviceCalloc()` not `PetscMalloc()` or related routines 243 244 This routine falls back to using `PetscFree()` if PETSc was not configured with device 245 support. The user should note that `PetscFree()` frees only host memory. 246 247 DAG representation: 248 .vb 249 time -> 250 251 -> dctx -/- |= CALL =| - dctx -> 252 -> ptr -/ 253 .ve 254 255 .N ASYNC_API 256 257 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()` 258 M*/ 259 #define PetscDeviceFree(dctx, ptr) ((PetscErrorCode)(PetscDeviceDeallocate_Private((dctx), (ptr)) || ((ptr) = PETSC_NULLPTR, PETSC_SUCCESS))) 260 261 /*MC 262 PetscDeviceArrayCopy - Copy memory in a device-aware manner 263 264 Synopsis: 265 #include <petscdevice.h> 266 PetscErrorCode PetscDeviceArrayCopy(PetscDeviceContext dctx, void *dest, const void *src, size_t n) 267 268 Not Collective, Asynchronous, Auto-dependency aware 269 270 Input Parameters: 271 + dctx - The `PetscDeviceContext` used to copy the memory 272 . dest - The pointer to copy to 273 . src - The pointer to copy from 274 - n - The amount (in elements) to copy 275 276 Notes: 277 Both `dest` and `src` must have been allocated using `PetscDeviceMalloc()` or 278 `PetscDeviceCalloc()`. 279 280 This uses the `sizeof()` of the `src` memory type requested to determine the total memory to 281 be copied, therefore you should not multiply the number of elements by the `sizeof()` the 282 type\: 283 284 .vb 285 PetscInt *to,*from; 286 287 // correct 288 PetscDeviceArrayCopy(dctx,to,from,n); 289 290 // incorrect 291 PetscDeviceArrayCopy(dctx,to,from,n*sizeof(*from)); 292 .ve 293 294 See `PetscDeviceMemcpy()` for further discussion. 295 296 Level: beginner 297 298 .N ASYNC_API 299 300 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`, 301 `PetscDeviceArrayZero()`, `PetscDeviceMemcpy()` 302 M*/ 303 #define PetscDeviceArrayCopy(dctx, dest, src, n) PetscDeviceMemcpy((dctx), (dest), (src), (size_t)(n) * sizeof(*(src))) 304 305 /*MC 306 PetscDeviceArrayZero - Zero memory in a device-aware manner 307 308 Synopsis: 309 #include <petscdevice.h> 310 PetscErrorCode PetscDeviceArrayZero(PetscDeviceContext dctx, void *ptr, size_t n) 311 312 Not Collective, Asynchronous, Auto-dependency aware 313 314 Input Parameters: 315 + dctx - The `PetscDeviceContext` used to zero the memory 316 . ptr - The pointer to the memory 317 - n - The amount (in elements) to zero 318 319 Level: beginner 320 321 Notes: 322 `ptr` must have been allocated using `PetscDeviceMalloc()` or `PetscDeviceCalloc()`. 323 324 This uses the `sizeof()` of the memory type requested to determine the total memory to be 325 zeroed, therefore you should not multiply the number of elements by the `sizeof()` the type\: 326 327 .vb 328 PetscInt *ptr; 329 330 // correct 331 PetscDeviceArrayZero(dctx,ptr,n); 332 333 // incorrect 334 PetscDeviceArrayZero(dctx,ptr,n*sizeof(*ptr)); 335 .ve 336 337 See `PetscDeviceMemset()` for further discussion. 338 339 .N ASYNC_API 340 341 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`, 342 `PetscDeviceArrayCopy()`, `PetscDeviceMemset()` 343 M*/ 344 #define PetscDeviceArrayZero(dctx, ptr, n) PetscDeviceMemset((dctx), (ptr), 0, (size_t)(n) * sizeof(*(ptr))) 345