1 #pragma once 2 3 #include <petscdevicetypes.h> 4 #include <petscviewertypes.h> 5 6 #if PETSC_CPP_VERSION >= 11 // C++11 7 #define PETSC_DEVICE_ALIGNOF(...) alignof(decltype(__VA_ARGS__)) 8 #elif PETSC_C_VERSION >= 11 // C11 9 #ifdef __GNUC__ 10 #define PETSC_DEVICE_ALIGNOF(...) _Alignof(__typeof__(__VA_ARGS__)) 11 #else 12 #include <stddef.h> // max_align_t 13 // Note we cannot just do _Alignof(expression) since clang warns that "'_Alignof' applied to an 14 // expression is a GNU extension", so we just default to max_align_t which is ultra safe 15 #define PETSC_DEVICE_ALIGNOF(...) _Alignof(max_align_t) 16 #endif // __GNUC__ 17 #else 18 #define PETSC_DEVICE_ALIGNOF(...) PETSC_MEMALIGN 19 #endif 20 21 /* MANSEC = Sys */ 22 /* SUBMANSEC = Device */ 23 24 // REVIEW ME: this should probably go somewhere better, configure-time? 25 #define PETSC_HAVE_HOST 1 26 27 /* logging support */ 28 PETSC_EXTERN PetscClassId PETSC_DEVICE_CLASSID; 29 PETSC_EXTERN PetscClassId PETSC_DEVICE_CONTEXT_CLASSID; 30 31 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void); 32 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void); 33 PETSC_EXTERN PetscErrorCode PetscGetMemType(const void *, PetscMemType *); 34 35 /* PetscDevice */ 36 #if PetscDefined(HAVE_CXX) 37 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType, PetscInt, PetscDevice *); 38 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice *); 39 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice); 40 PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice, PetscViewer); 41 PETSC_EXTERN PetscErrorCode PetscDeviceGetType(PetscDevice, PetscDeviceType *); 42 PETSC_EXTERN PetscErrorCode PetscDeviceGetDeviceId(PetscDevice, PetscInt *); 43 PETSC_EXTERN PetscDeviceType PETSC_DEVICE_DEFAULT(void); 44 PETSC_EXTERN PetscErrorCode PetscDeviceSetDefaultDeviceType(PetscDeviceType); 45 PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType); 46 PETSC_EXTERN PetscBool PetscDeviceInitialized(PetscDeviceType); 47 #else 48 #define PetscDeviceCreate(PetscDeviceType, PetscInt, dev) (*(dev) = PETSC_NULLPTR, PETSC_SUCCESS) 49 #define PetscDeviceDestroy(dev) (*(dev) = PETSC_NULLPTR, PETSC_SUCCESS) 50 #define PetscDeviceConfigure(PetscDevice) PETSC_SUCCESS 51 #define PetscDeviceView(PetscDevice, PetscViewer) PETSC_SUCCESS 52 #define PetscDeviceGetType(PetscDevice, type) (*(type) = PETSC_DEVICE_DEFAULT(), PETSC_SUCCESS) 53 #define PetscDeviceGetDeviceId(PetscDevice, id) (*(id) = 0, PETSC_SUCCESS) 54 #define PETSC_DEVICE_DEFAULT() PETSC_DEVICE_HOST 55 #define PetscDeviceSetDefaultDeviceType(PetscDeviceType) PETSC_SUCCESS 56 #define PetscDeviceInitialize(PetscDeviceType) PETSC_SUCCESS 57 #define PetscDeviceInitialized(dtype) ((dtype) == PETSC_DEVICE_HOST) 58 #endif /* PetscDefined(HAVE_CXX) */ 59 60 /* PetscDeviceContext */ 61 #if PetscDefined(HAVE_CXX) 62 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *); 63 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *); 64 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType); 65 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext, PetscStreamType *); 66 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice); 67 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext, PetscDevice *); 68 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext, PetscDeviceType *); 69 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext); 70 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext, PetscDeviceContext *); 71 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext, PetscBool *); 72 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext, PetscDeviceContext); 73 PETSC_EXTERN PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext, PetscStreamType, PetscInt, PetscDeviceContext **); 74 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext, PetscInt, PetscDeviceContext **); 75 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContext **); 76 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext); 77 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm, PetscDeviceContext); 78 PETSC_EXTERN PetscErrorCode PetscDeviceContextView(PetscDeviceContext, PetscViewer); 79 PETSC_EXTERN PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext, PetscObject, const char[]); 80 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *); 81 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext); 82 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext, void **); 83 #else 84 #define PetscDeviceContextCreate(dctx) (*(dctx) = PETSC_NULLPTR, PETSC_SUCCESS) 85 #define PetscDeviceContextDestroy(dctx) (*(dctx) = PETSC_NULLPTR, PETSC_SUCCESS) 86 #define PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType) PETSC_SUCCESS 87 #define PetscDeviceContextGetStreamType(PetscDeviceContext, type) (*(type) = PETSC_STREAM_DEFAULT, PETSC_SUCCESS) 88 #define PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice) PETSC_SUCCESS 89 #define PetscDeviceContextGetDevice(PetscDeviceContext, device) (*(device) = PETSC_NULLPTR, PETSC_SUCCESS) 90 #define PetscDeviceContextGetDeviceType(PetscDeviceContext, type) (*(type) = PETSC_DEVICE_DEFAULT(), PETSC_SUCCESS) 91 #define PetscDeviceContextSetUp(PetscDeviceContext) PETSC_SUCCESS 92 #define PetscDeviceContextDuplicate(PetscDeviceContextl, PetscDeviceContextr) (*(PetscDeviceContextr) = PETSC_NULLPTR, PETSC_SUCCESS) 93 #define PetscDeviceContextQueryIdle(PetscDeviceContext, idle) (*(idle) = PETSC_TRUE, PETSC_SUCCESS) 94 #define PetscDeviceContextWaitForContext(PetscDeviceContextl, PetscDeviceContextr) PETSC_SUCCESS 95 #define PetscDeviceContextForkWithStreamType(PetscDeviceContextp, PetscStreamType, PetscInt, PetscDeviceContextc) (*(PetscDeviceContextc) = PETSC_NULLPTR, PETSC_SUCCESS) 96 #define PetscDeviceContextFork(PetscDeviceContextp, PetscInt, PetscDeviceContextc) (*(PetscDeviceContextc) = PETSC_NULLPTR, PETSC_SUCCESS) 97 #define PetscDeviceContextJoin(PetscDeviceContextp, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContextc) (*(PetscDeviceContextc) = PETSC_NULLPTR, PETSC_SUCCESS) 98 #define PetscDeviceContextSynchronize(PetscDeviceContext) PETSC_SUCCESS 99 #define PetscDeviceContextSetFromOptions(MPI_Comm, PetscDeviceContext) PETSC_SUCCESS 100 #define PetscDeviceContextView(PetscDeviceContext, PetscViewer) PETSC_SUCCESS 101 #define PetscDeviceContextViewFromOptions(PetscDeviceContext, PetscObject, PetscViewer) PETSC_SUCCESS 102 #define PetscDeviceContextGetCurrentContext(dctx) (*(dctx) = PETSC_NULLPTR, PETSC_SUCCESS) 103 #define PetscDeviceContextSetCurrentContext(PetscDeviceContext) PETSC_SUCCESS 104 #define PetscDeviceContextGetStreamHandle(PetscDeviceContext, handle) (*(handle) = PETSC_NULLPTR, PETSC_SUCCESS) 105 #endif /* PetscDefined(HAVE_CXX) */ 106 107 /* memory */ 108 #if PetscDefined(HAVE_CXX) 109 PETSC_EXTERN PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext, PetscBool, PetscMemType, size_t, size_t, void **PETSC_RESTRICT); 110 PETSC_EXTERN PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext, void *PETSC_RESTRICT); 111 PETSC_EXTERN PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext, void *PETSC_RESTRICT, const void *PETSC_RESTRICT, size_t); 112 PETSC_EXTERN PetscErrorCode PetscDeviceMemset(PetscDeviceContext, void *PETSC_RESTRICT, PetscInt, size_t); 113 #else 114 #include <string.h> // memset() 115 #define PetscDeviceAllocate_Private(PetscDeviceContext, clear, PetscMemType, size, alignment, ptr) PetscMallocA(1, (clear), __LINE__, PETSC_FUNCTION_NAME, __FILE__, (size), (ptr)) 116 #define PetscDeviceDeallocate_Private(PetscDeviceContext, ptr) PetscFree(ptr) 117 #define PetscDeviceMemcpy(PetscDeviceContext, dest, src, size) PetscMemcpy((dest), (src), (size)) 118 #define PetscDeviceMemset(PetscDeviceContext, ptr, v, size) ((void)memset((ptr), (unsigned char)(v), (size)), PETSC_SUCCESS) 119 #endif /* PetscDefined(HAVE_CXX) */ 120 121 /*MC 122 PetscDeviceMalloc - Allocate device-aware memory 123 124 Synopsis: 125 #include <petscdevice.h> 126 PetscErrorCode PetscDeviceMalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr) 127 128 Not Collective, Asynchronous, Auto-dependency aware 129 130 Input Parameters: 131 + dctx - The `PetscDeviceContext` used to allocate the memory 132 . mtype - The type of memory to allocate 133 - n - The amount (in elements) to allocate 134 135 Output Parameter: 136 . ptr - The pointer to store the result in 137 138 Level: beginner 139 140 Notes: 141 Memory allocated with this function must be freed with `PetscDeviceFree()`. 142 143 If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`. 144 145 This routine falls back to using `PetscMalloc1()` if PETSc was not configured with device 146 support. The user should note that `mtype` is ignored in this case, as `PetscMalloc1()` 147 allocates only host memory. 148 149 This routine uses the `sizeof()` of the memory type requested to determine the total memory 150 to be allocated, therefore you should not multiply the number of elements requested by the 151 `sizeof()` the type\: 152 153 .vb 154 PetscInt *arr; 155 156 // correct 157 PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n,&arr); 158 159 // incorrect 160 PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n*sizeof(*arr),&arr); 161 .ve 162 163 Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate 164 its value on function return, i.e.\: 165 166 .vb 167 PetscInt *ptr; 168 169 PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, 20, &ptr); 170 171 PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize 172 173 ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization 174 .ve 175 176 DAG representation: 177 .vb 178 time -> 179 180 -> dctx - |= CALL =| -\- dctx --> 181 \- ptr -> 182 .ve 183 184 .N ASYNC_API 185 186 .seealso: `PetscDeviceFree()`, `PetscDeviceCalloc()`, `PetscDeviceArrayCopy()`, 187 `PetscDeviceArrayZero()` 188 M*/ 189 #define PetscDeviceMalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_FALSE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr)) 190 191 /*MC 192 PetscDeviceCalloc - Allocate zeroed device-aware memory 193 194 Synopsis: 195 #include <petscdevice.h> 196 PetscErrorCode PetscDeviceCalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr) 197 198 Not Collective, Asynchronous, Auto-dependency aware 199 200 Input Parameters: 201 + dctx - The `PetscDeviceContext` used to allocate the memory 202 . mtype - The type of memory to allocate 203 - n - The amount (in elements) to allocate 204 205 Output Parameter: 206 . ptr - The pointer to store the result in 207 208 Level: beginner 209 210 Notes: 211 Has identical usage to `PetscDeviceMalloc()` except that the memory is zeroed before it is 212 returned. See `PetscDeviceMalloc()` for further discussion. 213 214 This routine falls back to using `PetscCalloc1()` if PETSc was not configured with device 215 support. The user should note that `mtype` is ignored in this case, as `PetscCalloc1()` 216 allocates only host memory. 217 218 .N ASYNC_API 219 220 .seealso: `PetscDeviceFree()`, `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, 221 `PetscDeviceArrayZero()` 222 M*/ 223 #define PetscDeviceCalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_TRUE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr)) 224 225 /*MC 226 PetscDeviceFree - Free device-aware memory obtained with `PetscDeviceMalloc()` or `PetscDeviceCalloc()` 227 228 Synopsis: 229 #include <petscdevice.h> 230 PetscErrorCode PetscDeviceFree(PetscDeviceContext dctx, void *ptr) 231 232 Not Collective, Asynchronous, Auto-dependency aware 233 234 Input Parameters: 235 + dctx - The `PetscDeviceContext` used to free the memory 236 - ptr - The pointer to free, may be `NULL` 237 238 Level: beginner 239 240 Notes: 241 `ptr` is set to `PETSC_NULLPTR` on successful deallocation. 242 243 `ptr` must have been allocated using `PetscDeviceMalloc()`, `PetscDeviceCalloc()` not `PetscMalloc()` or related routines 244 245 This routine falls back to using `PetscFree()` if PETSc was not configured with device 246 support. The user should note that `PetscFree()` frees only host memory. 247 248 DAG representation: 249 .vb 250 time -> 251 252 -> dctx -/- |= CALL =| - dctx -> 253 -> ptr -/ 254 .ve 255 256 .N ASYNC_API 257 258 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()` 259 M*/ 260 #define PetscDeviceFree(dctx, ptr) ((PetscErrorCode)(PetscDeviceDeallocate_Private((dctx), (ptr)) || ((ptr) = PETSC_NULLPTR, PETSC_SUCCESS))) 261 262 /*MC 263 PetscDeviceArrayCopy - Copy memory in a device-aware manner 264 265 Synopsis: 266 #include <petscdevice.h> 267 PetscErrorCode PetscDeviceArrayCopy(PetscDeviceContext dctx, void *dest, const void *src, size_t n) 268 269 Not Collective, Asynchronous, Auto-dependency aware 270 271 Input Parameters: 272 + dctx - The `PetscDeviceContext` used to copy the memory 273 . dest - The pointer to copy to 274 . src - The pointer to copy from 275 - n - The amount (in elements) to copy 276 277 Notes: 278 Both `dest` and `src` must have been allocated using `PetscDeviceMalloc()` or 279 `PetscDeviceCalloc()`. 280 281 This uses the `sizeof()` of the `src` memory type requested to determine the total memory to 282 be copied, therefore you should not multiply the number of elements by the `sizeof()` the 283 type\: 284 285 .vb 286 PetscInt *to,*from; 287 288 // correct 289 PetscDeviceArrayCopy(dctx,to,from,n); 290 291 // incorrect 292 PetscDeviceArrayCopy(dctx,to,from,n*sizeof(*from)); 293 .ve 294 295 See `PetscDeviceMemcpy()` for further discussion. 296 297 Level: beginner 298 299 .N ASYNC_API 300 301 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`, 302 `PetscDeviceArrayZero()`, `PetscDeviceMemcpy()` 303 M*/ 304 #define PetscDeviceArrayCopy(dctx, dest, src, n) PetscDeviceMemcpy((dctx), (dest), (src), (size_t)(n) * sizeof(*(src))) 305 306 /*MC 307 PetscDeviceArrayZero - Zero memory in a device-aware manner 308 309 Synopsis: 310 #include <petscdevice.h> 311 PetscErrorCode PetscDeviceArrayZero(PetscDeviceContext dctx, void *ptr, size_t n) 312 313 Not Collective, Asynchronous, Auto-dependency aware 314 315 Input Parameters: 316 + dctx - The `PetscDeviceContext` used to zero the memory 317 . ptr - The pointer to the memory 318 - n - The amount (in elements) to zero 319 320 Level: beginner 321 322 Notes: 323 `ptr` must have been allocated using `PetscDeviceMalloc()` or `PetscDeviceCalloc()`. 324 325 This uses the `sizeof()` of the memory type requested to determine the total memory to be 326 zeroed, therefore you should not multiply the number of elements by the `sizeof()` the type\: 327 328 .vb 329 PetscInt *ptr; 330 331 // correct 332 PetscDeviceArrayZero(dctx,ptr,n); 333 334 // incorrect 335 PetscDeviceArrayZero(dctx,ptr,n*sizeof(*ptr)); 336 .ve 337 338 See `PetscDeviceMemset()` for further discussion. 339 340 .N ASYNC_API 341 342 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`, 343 `PetscDeviceArrayCopy()`, `PetscDeviceMemset()` 344 M*/ 345 #define PetscDeviceArrayZero(dctx, ptr, n) PetscDeviceMemset((dctx), (ptr), 0, (size_t)(n) * sizeof(*(ptr))) 346