1 #ifndef PETSCDEVICE_H 2 #define PETSCDEVICE_H 3 4 #include <petscdevicetypes.h> 5 #include <petscviewertypes.h> 6 7 #if PETSC_CPP_VERSION >= 11 // C++11 8 #define PETSC_DEVICE_ALIGNOF(...) alignof(decltype(__VA_ARGS__)) 9 #elif PETSC_C_VERSION >= 11 // C11 10 #ifdef __GNUC__ 11 #define PETSC_DEVICE_ALIGNOF(...) _Alignof(__typeof__(__VA_ARGS__)) 12 #else 13 #include <stddef.h> // max_align_t 14 // Note we cannot just do _Alignof(expression) since clang warns that "'_Alignof' applied to an 15 // expression is a GNU extension", so we just default to max_align_t which is ultra safe 16 #define PETSC_DEVICE_ALIGNOF(...) _Alignof(max_align_t) 17 #endif // __GNUC__ 18 #else 19 #define PETSC_DEVICE_ALIGNOF(...) PETSC_MEMALIGN 20 #endif 21 22 /* SUBMANSEC = Sys */ 23 24 // REVIEW ME: this should probably go somewhere better, configure-time? 25 #define PETSC_HAVE_HOST 1 26 27 /* logging support */ 28 PETSC_EXTERN PetscClassId PETSC_DEVICE_CLASSID; 29 PETSC_EXTERN PetscClassId PETSC_DEVICE_CONTEXT_CLASSID; 30 31 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void); 32 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void); 33 PETSC_EXTERN PetscErrorCode PetscGetMemType(const void *, PetscMemType *); 34 35 /* PetscDevice */ 36 #if PetscDefined(HAVE_CXX) 37 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType, PetscInt, PetscDevice *); 38 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice *); 39 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice); 40 PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice, PetscViewer); 41 PETSC_EXTERN PetscErrorCode PetscDeviceGetType(PetscDevice, PetscDeviceType *); 42 PETSC_EXTERN PetscErrorCode PetscDeviceGetDeviceId(PetscDevice, PetscInt *); 43 PETSC_EXTERN PetscDeviceType PETSC_DEVICE_DEFAULT(void); 44 PETSC_EXTERN PetscErrorCode PetscDeviceSetDefaultDeviceType(PetscDeviceType); 45 PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType); 46 PETSC_EXTERN PetscBool PetscDeviceInitialized(PetscDeviceType); 47 #else 48 #define PetscDeviceCreate(PetscDeviceType, PetscInt, dev) (*(dev) = PETSC_NULLPTR, 0) 49 #define PetscDeviceDestroy(dev) (*(dev) = PETSC_NULLPTR, 0) 50 #define PetscDeviceConfigure(PetscDevice) 0 51 #define PetscDeviceView(PetscDevice, PetscViewer) 0 52 #define PetscDeviceGetType(PetscDevice, type) (*(type) = PETSC_DEVICE_DEFAULT(), 0) 53 #define PetscDeviceGetDeviceId(PetscDevice, id) (*(id) = 0) 54 #define PETSC_DEVICE_DEFAULT() PETSC_DEVICE_HOST 55 #define PetscDeviceSetDefaultDeviceType(PetscDeviceType) 0 56 #define PetscDeviceInitialize(PetscDeviceType) 0 57 #define PetscDeviceInitialized(dtype) ((dtype) == PETSC_DEVICE_HOST) 58 #endif /* PetscDefined(HAVE_CXX) */ 59 60 /* PetscDeviceContext */ 61 #if PetscDefined(HAVE_CXX) 62 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *); 63 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *); 64 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType); 65 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext, PetscStreamType *); 66 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice); 67 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext, PetscDevice *); 68 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext, PetscDeviceType *); 69 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext); 70 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext, PetscDeviceContext *); 71 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext, PetscBool *); 72 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext, PetscDeviceContext); 73 PETSC_EXTERN PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext, PetscStreamType, PetscInt, PetscDeviceContext **); 74 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext, PetscInt, PetscDeviceContext **); 75 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContext **); 76 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext); 77 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm, PetscDeviceContext); 78 PETSC_EXTERN PetscErrorCode PetscDeviceContextView(PetscDeviceContext, PetscViewer); 79 PETSC_EXTERN PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext, PetscObject, PetscViewer); 80 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *); 81 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext); 82 #else 83 #define PetscDeviceContextCreate(dctx) (*(dctx) = PETSC_NULLPTR, 0) 84 #define PetscDeviceContextDestroy(dctx) (*(dctx) = PETSC_NULLPTR, 0) 85 #define PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType) 0 86 #define PetscDeviceContextGetStreamType(PetscDeviceContext, type) (*(type) = PETSC_STREAM_GLOBAL_BLOCKING, 0) 87 #define PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice) 0 88 #define PetscDeviceContextGetDevice(PetscDeviceContext, device) (*(device) = PETSC_NULLPTR, 0) 89 #define PetscDeviceContextGetDeviceType(PetscDeviceContext, type) (*(type) = PETSC_DEVICE_DEFAULT()) 90 #define PetscDeviceContextSetUp(PetscDeviceContext) 0 91 #define PetscDeviceContextDuplicate(PetscDeviceContextl, PetscDeviceContextr) (*(PetscDeviceContextr) = PETSC_NULLPTR, 0) 92 #define PetscDeviceContextQueryIdle(PetscDeviceContext, idle) (*(idle) = PETSC_TRUE, 0) 93 #define PetscDeviceContextWaitForContext(PetscDeviceContextl, PetscDeviceContextr) 0 94 #define PetscDeviceContextForkWithStreamType(PetscDeviceContextp, PetscStreamType, PetscInt, PetscDeviceContextc) (*(PetscDeviceContextc) = PETSC_NULLPTR, 0) 95 #define PetscDeviceContextFork(PetscDeviceContextp, PetscInt, PetscDeviceContextc) (*(PetscDeviceContextc) = PETSC_NULLPTR, 0) 96 #define PetscDeviceContextJoin(PetscDeviceContextp, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContextc) (*(PetscDeviceContextc) = PETSC_NULLPTR, 0) 97 #define PetscDeviceContextSynchronize(PetscDeviceContext) 0 98 #define PetscDeviceContextSetFromOptions(MPI_Comm, PetscDeviceContext) 0 99 #define PetscDeviceContextView(PetscDeviceContext, PetscViewer) 0 100 #define PetscDeviceContextViewFromOptions(PetscDeviceContext, PetscObject, PetscViewer) 0 101 #define PetscDeviceContextGetCurrentContext(dctx) (*(dctx) = PETSC_NULLPTR, 0) 102 #define PetscDeviceContextSetCurrentContext(PetscDeviceContext) 0 103 #endif /* PetscDefined(HAVE_CXX) */ 104 105 /* memory */ 106 #if PetscDefined(HAVE_CXX) 107 PETSC_EXTERN PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext, PetscBool, PetscMemType, size_t, size_t, void **PETSC_RESTRICT); 108 PETSC_EXTERN PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext, void *PETSC_RESTRICT); 109 PETSC_EXTERN PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext, void *PETSC_RESTRICT, const void *PETSC_RESTRICT, size_t); 110 PETSC_EXTERN PetscErrorCode PetscDeviceMemset(PetscDeviceContext, void *PETSC_RESTRICT, PetscInt, size_t); 111 #else 112 #include <string.h> // memset() 113 #define PetscDeviceAllocate_Private(PetscDeviceContext, clear, PetscMemType, size, alignment, ptr) PetscMallocA(1, (clear), __LINE__, PETSC_FUNCTION_NAME, __FILE__, (size), (ptr)) 114 #define PetscDeviceDeallocate_Private(PetscDeviceContext, ptr) PetscFree((ptr)) 115 #define PetscDeviceMemcpy(PetscDeviceContext, dest, src, size) PetscMemcpy((dest), (src), (size)) 116 #define PetscDeviceMemset(PetscDeviceContext, ptr, v, size) ((void)memset((ptr), (unsigned char)(v), (size)), 0) 117 #endif /* PetscDefined(HAVE_CXX) */ 118 119 /*MC 120 PetscDeviceMalloc - Allocate device-aware memory 121 122 Synopsis: 123 #include <petscdevice.h> 124 PetscErrorCode PetscDeviceMalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr) 125 126 Not Collective, Asynchronous, Auto-dependency aware 127 128 Input Parameters: 129 + dctx - The `PetscDeviceContext` used to allocate the memory 130 . mtype - The type of memory to allocate 131 - n - The amount (in elements) to allocate 132 133 Output Parameter: 134 . ptr - The pointer to store the result in 135 136 Notes: 137 Memory allocated with this function must be freed with `PetscDeviceFree()`. 138 139 If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`. 140 141 This routine falls back to using `PetscMalloc1()` if PETSc was not configured with device 142 support. The user should note that `mtype` is ignored in this case, as `PetscMalloc1()` 143 allocates only host memory. 144 145 This routine uses the `sizeof()` of the memory type requested to determine the total memory 146 to be allocated, therefore you should not multiply the number of elements requested by the 147 `sizeof()` the type\: 148 149 .vb 150 PetscInt *arr; 151 152 // correct 153 PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n,&arr); 154 155 // incorrect 156 PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n*sizeof(*arr),&arr); 157 .ve 158 159 Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate 160 its value on function return, i.e.\: 161 162 .vb 163 PetscInt *ptr; 164 165 PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, 20, &ptr); 166 167 PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize 168 169 ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization 170 .ve 171 172 DAG representation: 173 .vb 174 time -> 175 176 -> dctx - |= CALL =| -\- dctx --> 177 \- ptr -> 178 .ve 179 180 Level: beginner 181 182 .N ASYNC_API 183 184 .seealso: `PetscDeviceFree()`, `PetscDeviceCalloc()`, `PetscDeviceArrayCopy()`, 185 `PetscDeviceArrayZero()` 186 M*/ 187 #define PetscDeviceMalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_FALSE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr)) 188 189 /*MC 190 PetscDeviceCalloc - Allocate zeroed device-aware memory 191 192 Synopsis: 193 #include <petscdevice.h> 194 PetscErrorCode PetscDeviceCalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr) 195 196 Not Collective, Asynchronous, Auto-dependency aware 197 198 Input Parameters: 199 + dctx - The `PetscDeviceContext` used to allocate the memory 200 . mtype - The type of memory to allocate 201 - n - The amount (in elements) to allocate 202 203 Output Parameter: 204 . ptr - The pointer to store the result in 205 206 Notes: 207 Has identical usage to `PetscDeviceMalloc()` except that the memory is zeroed before it is 208 returned. See `PetscDeviceMalloc()` for further discussion. 209 210 This routine falls back to using `PetscCalloc1()` if PETSc was not configured with device 211 support. The user should note that `mtype` is ignored in this case, as `PetscCalloc1()` 212 allocates only host memory. 213 214 Level: beginner 215 216 .N ASYNC_API 217 218 .seealso: `PetscDeviceFree()`, `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, 219 `PetscDeviceArrayZero()` 220 M*/ 221 #define PetscDeviceCalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_TRUE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr)) 222 223 /*MC 224 PetscDeviceFree - Free device-aware memory 225 226 Synopsis: 227 #include <petscdevice.h> 228 PetscErrorCode PetscDeviceFree(PetscDeviceContext dctx, void *ptr) 229 230 Not Collective, Asynchronous, Auto-dependency aware 231 232 Input Parameters: 233 + dctx - The `PetscDeviceContext` used to free the memory 234 - ptr - The pointer to free 235 236 Notes: 237 `ptr` may be `NULL`, and is set to `PETSC_NULLPTR` on successful deallocation. 238 239 `ptr` must have been allocated using `PetscDeviceMalloc()`, `PetscDeviceCalloc()`. 240 241 This routine falls back to using `PetscFree()` if PETSc was not configured with device 242 support. The user should note that `PetscFree()` frees only host memory. 243 244 DAG representation: 245 .vb 246 time -> 247 248 -> dctx -/- |= CALL =| - dctx -> 249 -> ptr -/ 250 .ve 251 252 Level: beginner 253 254 .N ASYNC_API 255 256 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()` 257 M*/ 258 #define PetscDeviceFree(dctx, ptr) (PetscDeviceDeallocate_Private((dctx), (ptr)) || ((ptr) = PETSC_NULLPTR, 0)) 259 260 /*MC 261 PetscDeviceArrayCopy - Copy memory in a device-aware manner 262 263 Synopsis: 264 #include <petscdevice.h> 265 PetscErrorCode PetscDeviceArrayCopy(PetscDeviceContext dctx, void *dest, const void *src, size_t n) 266 267 Not Collective, Asynchronous, Auto-dependency aware 268 269 Input Parameters: 270 + dctx - The `PetscDeviceContext` used to copy the memory 271 . dest - The pointer to copy to 272 . src - The pointer to copy from 273 - n - The amount (in elements) to copy 274 275 Notes: 276 Both `dest` and `src` must have been allocated using any of `PetscDeviceMalloc()`, 277 `PetscDeviceCalloc()`. 278 279 This uses the `sizeof()` of the `src` memory type requested to determine the total memory to 280 be copied, therefore you should not multiply the number of elements by the `sizeof()` the 281 type\: 282 283 .vb 284 PetscInt *to,*from; 285 286 // correct 287 PetscDeviceArrayCopy(dctx,to,from,n); 288 289 // incorrect 290 PetscDeviceArrayCopy(dctx,to,from,n*sizeof(*from)); 291 .ve 292 293 See `PetscDeviceMemcpy()` for further discussion. 294 295 Level: beginner 296 297 .N ASYNC_API 298 299 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`, 300 `PetscDeviceArrayZero()`, `PetscDeviceMemcpy()` 301 M*/ 302 #define PetscDeviceArrayCopy(dctx, dest, src, n) PetscDeviceMemcpy((dctx), (dest), (src), (size_t)(n) * sizeof(*(src))) 303 304 /*MC 305 PetscDeviceArrayZero - Zero memory in a device-aware manner 306 307 Synopsis: 308 #include <petscdevice.h> 309 PetscErrorCode PetscDeviceArrayZero(PetscDeviceContext dctx, void *ptr, size_t n) 310 311 Not Collective, Asynchronous, Auto-dependency aware 312 313 Input Parameters: 314 + dctx - The `PetscDeviceContext` used to zero the memory 315 . ptr - The pointer to the memory 316 - n - The amount (in elements) to zero 317 318 Notes: 319 `ptr` must have been allocated using `PetscDeviceMalloc()` or `PetscDeviceCalloc()`. 320 321 This uses the `sizeof()` of the memory type requested to determine the total memory to be 322 zeroed, therefore you should not multiply the number of elements by the `sizeof()` the type\: 323 324 .vb 325 PetscInt *ptr; 326 327 // correct 328 PetscDeviceArrayZero(dctx,ptr,n); 329 330 // incorrect 331 PetscDeviceArrayZero(dctx,ptr,n*sizeof(*ptr)); 332 .ve 333 334 See `PetscDeviceMemset()` for further discussion. 335 336 Level: beginner 337 338 .N ASYNC_API 339 340 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`, 341 `PetscDeviceArrayCopy()`, `PetscDeviceMemset()` 342 M*/ 343 #define PetscDeviceArrayZero(dctx, ptr, n) PetscDeviceMemset((dctx), (ptr), 0, (size_t)(n) * sizeof(*(ptr))) 344 345 #endif /* PETSCDEVICE_H */ 346