1 #pragma once 2 3 #include <petscdevicetypes.h> 4 #include <petscviewertypes.h> 5 6 #if PETSC_CPP_VERSION >= 11 // C++11 7 #define PETSC_DEVICE_ALIGNOF(...) alignof(decltype(__VA_ARGS__)) 8 #elif PETSC_C_VERSION >= 11 // C11 9 #ifdef __GNUC__ 10 #define PETSC_DEVICE_ALIGNOF(...) _Alignof(__typeof__(__VA_ARGS__)) 11 #else 12 #include <stddef.h> // max_align_t 13 // Note we cannot just do _Alignof(expression) since clang warns that "'_Alignof' applied to an 14 // expression is a GNU extension", so we just default to max_align_t which is ultra safe 15 #define PETSC_DEVICE_ALIGNOF(...) _Alignof(max_align_t) 16 #endif // __GNUC__ 17 #else 18 #define PETSC_DEVICE_ALIGNOF(...) PETSC_MEMALIGN 19 #endif 20 21 /* MANSEC = Sys */ 22 /* SUBMANSEC = Device */ 23 24 // REVIEW ME: this should probably go somewhere better, configure-time? 25 #define PETSC_HAVE_HOST 1 26 27 /* logging support */ 28 PETSC_EXTERN PetscClassId PETSC_DEVICE_CLASSID; 29 PETSC_EXTERN PetscClassId PETSC_DEVICE_CONTEXT_CLASSID; 30 31 PETSC_EXTERN PetscErrorCode PetscDeviceInitializePackage(void); 32 PETSC_EXTERN PetscErrorCode PetscDeviceFinalizePackage(void); 33 PETSC_EXTERN PetscErrorCode PetscGetMemType(const void *, PetscMemType *); 34 35 /* PetscDevice */ 36 PETSC_EXTERN PetscErrorCode PetscDeviceCreate(PetscDeviceType, PetscInt, PetscDevice *); 37 PETSC_EXTERN PetscErrorCode PetscDeviceDestroy(PetscDevice *); 38 PETSC_EXTERN PetscErrorCode PetscDeviceConfigure(PetscDevice); 39 PETSC_EXTERN PetscErrorCode PetscDeviceView(PetscDevice, PetscViewer); 40 PETSC_EXTERN PetscErrorCode PetscDeviceGetType(PetscDevice, PetscDeviceType *); 41 PETSC_EXTERN PetscErrorCode PetscDeviceGetDeviceId(PetscDevice, PetscInt *); 42 PETSC_EXTERN PetscDeviceType PETSC_DEVICE_DEFAULT(void); 43 PETSC_EXTERN PetscErrorCode PetscDeviceSetDefaultDeviceType(PetscDeviceType); 44 PETSC_EXTERN PetscErrorCode PetscDeviceInitialize(PetscDeviceType); 45 PETSC_EXTERN PetscBool PetscDeviceInitialized(PetscDeviceType); 46 47 /* PetscDeviceContext */ 48 PETSC_EXTERN PetscErrorCode PetscDeviceContextCreate(PetscDeviceContext *); 49 PETSC_EXTERN PetscErrorCode PetscDeviceContextDestroy(PetscDeviceContext *); 50 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetStreamType(PetscDeviceContext, PetscStreamType); 51 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamType(PetscDeviceContext, PetscStreamType *); 52 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetDevice(PetscDeviceContext, PetscDevice); 53 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDevice(PetscDeviceContext, PetscDevice *); 54 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetDeviceType(PetscDeviceContext, PetscDeviceType *); 55 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetUp(PetscDeviceContext); 56 PETSC_EXTERN PetscErrorCode PetscDeviceContextDuplicate(PetscDeviceContext, PetscDeviceContext *); 57 PETSC_EXTERN PetscErrorCode PetscDeviceContextQueryIdle(PetscDeviceContext, PetscBool *); 58 PETSC_EXTERN PetscErrorCode PetscDeviceContextWaitForContext(PetscDeviceContext, PetscDeviceContext); 59 PETSC_EXTERN PetscErrorCode PetscDeviceContextForkWithStreamType(PetscDeviceContext, PetscStreamType, PetscInt, PetscDeviceContext **); 60 PETSC_EXTERN PetscErrorCode PetscDeviceContextFork(PetscDeviceContext, PetscInt, PetscDeviceContext **); 61 PETSC_EXTERN PetscErrorCode PetscDeviceContextJoin(PetscDeviceContext, PetscInt, PetscDeviceContextJoinMode, PetscDeviceContext **); 62 PETSC_EXTERN PetscErrorCode PetscDeviceContextSynchronize(PetscDeviceContext); 63 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetFromOptions(MPI_Comm, PetscDeviceContext); 64 PETSC_EXTERN PetscErrorCode PetscDeviceContextView(PetscDeviceContext, PetscViewer); 65 PETSC_EXTERN PetscErrorCode PetscDeviceContextViewFromOptions(PetscDeviceContext, PetscObject, const char[]); 66 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetCurrentContext(PetscDeviceContext *); 67 PETSC_EXTERN PetscErrorCode PetscDeviceContextSetCurrentContext(PetscDeviceContext); 68 PETSC_EXTERN PetscErrorCode PetscDeviceContextGetStreamHandle(PetscDeviceContext, void **); 69 70 /* memory */ 71 PETSC_EXTERN PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext, PetscBool, PetscMemType, size_t, size_t, void **PETSC_RESTRICT); 72 PETSC_EXTERN PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext, void *PETSC_RESTRICT); 73 PETSC_EXTERN PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext, void *PETSC_RESTRICT, const void *PETSC_RESTRICT, size_t); 74 PETSC_EXTERN PetscErrorCode PetscDeviceMemset(PetscDeviceContext, void *PETSC_RESTRICT, PetscInt, size_t); 75 76 /*MC 77 PetscDeviceMalloc - Allocate device-aware memory 78 79 Synopsis: 80 #include <petscdevice.h> 81 PetscErrorCode PetscDeviceMalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr) 82 83 Not Collective, Asynchronous, Auto-dependency aware 84 85 Input Parameters: 86 + dctx - The `PetscDeviceContext` used to allocate the memory 87 . mtype - The type of memory to allocate 88 - n - The amount (in elements) to allocate 89 90 Output Parameter: 91 . ptr - The pointer to store the result in 92 93 Level: beginner 94 95 Notes: 96 Memory allocated with this function must be freed with `PetscDeviceFree()`. 97 98 If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`. 99 100 This routine falls back to using `PetscMalloc1()` if PETSc was not configured with device 101 support. The user should note that `mtype` is ignored in this case, as `PetscMalloc1()` 102 allocates only host memory. 103 104 This routine uses the `sizeof()` of the memory type requested to determine the total memory 105 to be allocated, therefore you should not multiply the number of elements requested by the 106 `sizeof()` the type\: 107 108 .vb 109 PetscInt *arr; 110 111 // correct 112 PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n,&arr); 113 114 // incorrect 115 PetscDeviceMalloc(dctx,PETSC_MEMTYPE_DEVICE,n*sizeof(*arr),&arr); 116 .ve 117 118 Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate 119 its value on function return, i.e.\: 120 121 .vb 122 PetscInt *ptr; 123 124 PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, 20, &ptr); 125 126 PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize 127 128 ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization 129 .ve 130 131 DAG representation: 132 .vb 133 time -> 134 135 -> dctx - |= CALL =| -\- dctx --> 136 \- ptr -> 137 .ve 138 139 .N ASYNC_API 140 141 .seealso: `PetscDeviceFree()`, `PetscDeviceCalloc()`, `PetscDeviceArrayCopy()`, 142 `PetscDeviceArrayZero()` 143 M*/ 144 #define PetscDeviceMalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_FALSE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr)) 145 146 /*MC 147 PetscDeviceCalloc - Allocate zeroed device-aware memory 148 149 Synopsis: 150 #include <petscdevice.h> 151 PetscErrorCode PetscDeviceCalloc(PetscDeviceContext dctx, PetscMemType mtype, size_t n, Type **ptr) 152 153 Not Collective, Asynchronous, Auto-dependency aware 154 155 Input Parameters: 156 + dctx - The `PetscDeviceContext` used to allocate the memory 157 . mtype - The type of memory to allocate 158 - n - The amount (in elements) to allocate 159 160 Output Parameter: 161 . ptr - The pointer to store the result in 162 163 Level: beginner 164 165 Notes: 166 Has identical usage to `PetscDeviceMalloc()` except that the memory is zeroed before it is 167 returned. See `PetscDeviceMalloc()` for further discussion. 168 169 This routine falls back to using `PetscCalloc1()` if PETSc was not configured with device 170 support. The user should note that `mtype` is ignored in this case, as `PetscCalloc1()` 171 allocates only host memory. 172 173 .N ASYNC_API 174 175 .seealso: `PetscDeviceFree()`, `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, 176 `PetscDeviceArrayZero()` 177 M*/ 178 #define PetscDeviceCalloc(dctx, mtype, n, ptr) PetscDeviceAllocate_Private((dctx), PETSC_TRUE, (mtype), (size_t)(n) * sizeof(**(ptr)), PETSC_DEVICE_ALIGNOF(**(ptr)), (void **)(ptr)) 179 180 /*MC 181 PetscDeviceFree - Free device-aware memory obtained with `PetscDeviceMalloc()` or `PetscDeviceCalloc()` 182 183 Synopsis: 184 #include <petscdevice.h> 185 PetscErrorCode PetscDeviceFree(PetscDeviceContext dctx, void *ptr) 186 187 Not Collective, Asynchronous, Auto-dependency aware 188 189 Input Parameters: 190 + dctx - The `PetscDeviceContext` used to free the memory 191 - ptr - The pointer to free, may be `NULL` 192 193 Level: beginner 194 195 Notes: 196 `ptr` is set to `PETSC_NULLPTR` on successful deallocation. 197 198 `ptr` must have been allocated using `PetscDeviceMalloc()`, `PetscDeviceCalloc()` not `PetscMalloc()` or related routines 199 200 This routine falls back to using `PetscFree()` if PETSc was not configured with device 201 support. The user should note that `PetscFree()` frees only host memory. 202 203 DAG representation: 204 .vb 205 time -> 206 207 -> dctx -/- |= CALL =| - dctx -> 208 -> ptr -/ 209 .ve 210 211 .N ASYNC_API 212 213 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()` 214 M*/ 215 #define PetscDeviceFree(dctx, ptr) ((PetscErrorCode)(PetscDeviceDeallocate_Private((dctx), (ptr)) || ((ptr) = PETSC_NULLPTR, PETSC_SUCCESS))) 216 217 /*MC 218 PetscDeviceArrayCopy - Copy memory in a device-aware manner 219 220 Synopsis: 221 #include <petscdevice.h> 222 PetscErrorCode PetscDeviceArrayCopy(PetscDeviceContext dctx, void *dest, const void *src, size_t n) 223 224 Not Collective, Asynchronous, Auto-dependency aware 225 226 Input Parameters: 227 + dctx - The `PetscDeviceContext` used to copy the memory 228 . dest - The pointer to copy to 229 . src - The pointer to copy from 230 - n - The amount (in elements) to copy 231 232 Notes: 233 Both `dest` and `src` must have been allocated using `PetscDeviceMalloc()` or 234 `PetscDeviceCalloc()`. 235 236 This uses the `sizeof()` of the `src` memory type requested to determine the total memory to 237 be copied, therefore you should not multiply the number of elements by the `sizeof()` the 238 type\: 239 240 .vb 241 PetscInt *to,*from; 242 243 // correct 244 PetscDeviceArrayCopy(dctx,to,from,n); 245 246 // incorrect 247 PetscDeviceArrayCopy(dctx,to,from,n*sizeof(*from)); 248 .ve 249 250 See `PetscDeviceMemcpy()` for further discussion. 251 252 Level: beginner 253 254 .N ASYNC_API 255 256 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`, 257 `PetscDeviceArrayZero()`, `PetscDeviceMemcpy()` 258 M*/ 259 #define PetscDeviceArrayCopy(dctx, dest, src, n) PetscDeviceMemcpy((dctx), (dest), (src), (size_t)(n) * sizeof(*(src))) 260 261 /*MC 262 PetscDeviceArrayZero - Zero memory in a device-aware manner 263 264 Synopsis: 265 #include <petscdevice.h> 266 PetscErrorCode PetscDeviceArrayZero(PetscDeviceContext dctx, void *ptr, size_t n) 267 268 Not Collective, Asynchronous, Auto-dependency aware 269 270 Input Parameters: 271 + dctx - The `PetscDeviceContext` used to zero the memory 272 . ptr - The pointer to the memory 273 - n - The amount (in elements) to zero 274 275 Level: beginner 276 277 Notes: 278 `ptr` must have been allocated using `PetscDeviceMalloc()` or `PetscDeviceCalloc()`. 279 280 This uses the `sizeof()` of the memory type requested to determine the total memory to be 281 zeroed, therefore you should not multiply the number of elements by the `sizeof()` the type\: 282 283 .vb 284 PetscInt *ptr; 285 286 // correct 287 PetscDeviceArrayZero(dctx,ptr,n); 288 289 // incorrect 290 PetscDeviceArrayZero(dctx,ptr,n*sizeof(*ptr)); 291 .ve 292 293 See `PetscDeviceMemset()` for further discussion. 294 295 .N ASYNC_API 296 297 .seealso: `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, `PetscDeviceFree()`, 298 `PetscDeviceArrayCopy()`, `PetscDeviceMemset()` 299 M*/ 300 #define PetscDeviceArrayZero(dctx, ptr, n) PetscDeviceMemset((dctx), (ptr), 0, (size_t)(n) * sizeof(*(ptr))) 301