1 #pragma once
2
3 #include <petscdevice.h>
4 #include <petsc/private/petscimpl.h>
5
6 #if defined(PETSC_HAVE_CUPM)
7 PETSC_INTERN int PetscDeviceCUPMRuntimeArch; // The real CUDA/HIP arch the code is run with. For log view and error diagnosis
8 #endif
9
10 /* logging support */
11 PETSC_INTERN PetscLogEvent CUBLAS_HANDLE_CREATE;
12 PETSC_INTERN PetscLogEvent CUSOLVER_HANDLE_CREATE;
13 PETSC_INTERN PetscLogEvent HIPSOLVER_HANDLE_CREATE;
14 PETSC_INTERN PetscLogEvent HIPBLAS_HANDLE_CREATE;
15
16 PETSC_INTERN PetscLogEvent DCONTEXT_Create;
17 PETSC_INTERN PetscLogEvent DCONTEXT_Destroy;
18 PETSC_INTERN PetscLogEvent DCONTEXT_ChangeStream;
19 PETSC_INTERN PetscLogEvent DCONTEXT_SetDevice;
20 PETSC_INTERN PetscLogEvent DCONTEXT_SetUp;
21 PETSC_INTERN PetscLogEvent DCONTEXT_Duplicate;
22 PETSC_INTERN PetscLogEvent DCONTEXT_QueryIdle;
23 PETSC_INTERN PetscLogEvent DCONTEXT_WaitForCtx;
24 PETSC_INTERN PetscLogEvent DCONTEXT_Fork;
25 PETSC_INTERN PetscLogEvent DCONTEXT_Join;
26 PETSC_INTERN PetscLogEvent DCONTEXT_Sync;
27 PETSC_INTERN PetscLogEvent DCONTEXT_Mark;
28
29 /* type cast macros for some additional type-safety in C++ land */
30 #if defined(__cplusplus)
31 #define PetscStreamTypeCast(...) static_cast<PetscStreamType>(__VA_ARGS__)
32 #define PetscDeviceTypeCast(...) static_cast<PetscDeviceType>(__VA_ARGS__)
33 #define PetscDeviceInitTypeCast(...) static_cast<PetscDeviceInitType>(__VA_ARGS__)
34 #else
35 #define PetscStreamTypeCast(...) ((PetscStreamType)(__VA_ARGS__))
36 #define PetscDeviceTypeCast(...) ((PetscDeviceType)(__VA_ARGS__))
37 #define PetscDeviceInitTypeCast(...) ((PetscDeviceInitType)(__VA_ARGS__))
38 #endif
39
40 #if defined(PETSC_CLANG_STATIC_ANALYZER)
41 template <typename T>
42 extern void PetscValidDeviceType(T, int);
43 template <typename T, typename U>
44 extern void PetscCheckCompatibleDeviceTypes(T, int, U, int);
45 template <typename T>
46 extern void PetscValidDevice(T, int);
47 template <typename T>
48 extern void PetscValidDeviceAttribute(T, int);
49 template <typename T, typename U>
50 extern void PetscCheckCompatibleDevices(T, int, U, int);
51 template <typename T>
52 extern void PetscValidStreamType(T, int);
53 template <typename T>
54 extern void PetscValidDeviceContext(T, int);
55 template <typename T, typename U>
56 extern void PetscCheckCompatibleDeviceContexts(T, int, U, int);
57 #elif PetscDefined(DEVICELANGUAGE_CXX) && (PetscDefined(USE_DEBUG) || PetscDefined(DEVICE_KEEP_ERROR_CHECKING_MACROS))
58 #define PetscValidDeviceType(dtype, argno) \
59 do { \
60 PetscDeviceType pvdt_dtype_ = PetscDeviceTypeCast(dtype); \
61 int pvdt_argno_ = (int)(argno); \
62 PetscCheck(((int)pvdt_dtype_ >= (int)PETSC_DEVICE_HOST) && ((int)pvdt_dtype_ <= (int)PETSC_DEVICE_MAX), PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscDeviceType '%d': Argument #%d", pvdt_dtype_, pvdt_argno_); \
63 if (PetscUnlikely(!PetscDeviceConfiguredFor_Internal(pvdt_dtype_))) { \
64 PetscCheck((int)pvdt_dtype_ != (int)PETSC_DEVICE_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Invalid PetscDeviceType '%s': Argument #%d", PetscDeviceTypes[pvdt_dtype_], pvdt_argno_); \
65 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, \
66 "Not configured for PetscDeviceType '%s': Argument #%d;" \
67 " run configure --help %s for available options", \
68 PetscDeviceTypes[pvdt_dtype_], pvdt_argno_, PetscDeviceTypes[pvdt_dtype_]); \
69 } \
70 } while (0)
71
72 #define PetscCheckCompatibleDeviceTypes(dtype1, argno1, dtype2, argno2) \
73 do { \
74 PetscDeviceType pccdt_dtype1_ = PetscDeviceTypeCast(dtype1); \
75 PetscDeviceType pccdt_dtype2_ = PetscDeviceTypeCast(dtype2); \
76 PetscValidDeviceType(pccdt_dtype1_, 1); \
77 PetscValidDeviceType(pccdt_dtype2_, 2); \
78 PetscCheck(pccdt_dtype1_ == pccdt_dtype2_, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMETYPE, "PetscDeviceTypes are incompatible: Arguments #%d and #%d. Expected PetscDeviceType '%s' but have '%s' instead", argno1, argno2, PetscDeviceTypes[pccdt_dtype1_], PetscDeviceTypes[pccdt_dtype2_]); \
79 } while (0)
80
81 #define PetscValidDevice(dev, argno) \
82 do { \
83 PetscDevice pvd_dev_ = dev; \
84 int pvd_argno_ = (int)(argno); \
85 PetscAssertPointer(pvd_dev_, pvd_argno_); \
86 PetscValidDeviceType(pvd_dev_->type, pvd_argno_); \
87 PetscCheck(pvd_dev_->id >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid PetscDevice: Argument #%d; id %" PetscInt_FMT " < 0", pvd_argno_, pvd_dev_->id); \
88 PetscCheck(pvd_dev_->refcnt >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid PetscDevice: Argument #%d; negative reference count %" PetscInt_FMT, pvd_argno_, pvd_dev_->refcnt); \
89 } while (0)
90
91 #define PetscValidDeviceAttribute(dattr, argno) \
92 do { \
93 PetscDeviceAttribute pvda_attr_ = (dattr); \
94 int pvda_argno_ = (int)(argno); \
95 PetscCheck((((int)pvda_attr_) >= 0) && (pvda_attr_ <= PETSC_DEVICE_ATTR_MAX), PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscDeviceAttribute '%d': Argument #%d", (int)pvda_attr_, pvda_argno_); \
96 PetscCheck(pvda_attr_ != PETSC_DEVICE_ATTR_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Invalid PetscDeviceAttribute '%s': Argument #%d", PetscDeviceAttributes[pvda_attr_], pvda_argno_); \
97 } while (0)
98
99 /*
100 for now just checks strict equality, but this can be changed as some devices (i.e. kokkos and
101 any cupm should be compatible once implemented)
102 */
103 #define PetscCheckCompatibleDevices(dev1, argno1, dev2, argno2) \
104 do { \
105 PetscDevice pccd_dev1_ = (dev1), pccd_dev2_ = (dev2); \
106 int pccd_argno1_ = (int)(argno1), pccd_argno2_ = (int)(argno2); \
107 PetscValidDevice(pccd_dev1_, pccd_argno1_); \
108 PetscValidDevice(pccd_dev2_, pccd_argno2_); \
109 PetscCheckCompatibleDeviceTypes(pccd_dev1_->type, pccd_argno1_, pccd_dev2_->type, pccd_argno2_); \
110 } while (0)
111
112 #define PetscValidStreamType(stype, argno) \
113 do { \
114 PetscStreamType pvst_stype_ = PetscStreamTypeCast(stype); \
115 int pvst_argno_ = (int)(argno); \
116 PetscCheck(((int)pvst_stype_ >= 0) && ((int)pvst_stype_ <= (int)PETSC_STREAM_MAX), PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscStreamType '%d': Argument #%d", pvst_stype_, pvst_argno_); \
117 PetscCheck((int)pvst_stype_ != (int)PETSC_STREAM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Invalid PetscStreamType '%s': Argument #%d", PetscStreamTypes[pvst_stype_], pvst_argno_); \
118 } while (0)
119
120 #define PetscValidDeviceContext(dctx, argno) \
121 do { \
122 PetscDeviceContext pvdc_dctx_ = dctx; \
123 int pvdc_argno_ = (int)(argno); \
124 PetscValidHeaderSpecific(pvdc_dctx_, PETSC_DEVICE_CONTEXT_CLASSID, pvdc_argno_); \
125 PetscValidStreamType(pvdc_dctx_->streamType, pvdc_argno_); \
126 if (pvdc_dctx_->device) { \
127 PetscValidDevice(pvdc_dctx_->device, pvdc_argno_); \
128 } else { \
129 PetscCheck(!pvdc_dctx_->setup, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, \
130 "Invalid PetscDeviceContext: Argument #%d; " \
131 "PetscDeviceContext is setup but has no PetscDevice", \
132 pvdc_argno_); \
133 } \
134 PetscCheck(((PetscObject)pvdc_dctx_)->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid PetscDeviceContext: Argument #%d; id %" PetscInt64_FMT " < 1", pvdc_argno_, ((PetscObject)pvdc_dctx_)->id); \
135 PetscCheck(pvdc_dctx_->numChildren <= pvdc_dctx_->maxNumChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Invalid PetscDeviceContext: Argument #%d; number of children %" PetscInt_FMT " > max number of children %" PetscInt_FMT, pvdc_argno_, \
136 pvdc_dctx_->numChildren, pvdc_dctx_->maxNumChildren); \
137 } while (0)
138
139 #define PetscCheckCompatibleDeviceContexts(dctx1, argno1, dctx2, argno2) \
140 do { \
141 PetscDeviceContext pccdc_dctx1_ = (dctx1), pccdc_dctx2_ = (dctx2); \
142 int pccdc_argno1_ = (int)(argno1), pccdc_argno2_ = (int)(argno2); \
143 PetscValidDeviceContext(pccdc_dctx1_, pccdc_argno1_); \
144 PetscValidDeviceContext(pccdc_dctx2_, pccdc_argno2_); \
145 if (pccdc_dctx1_->device && pccdc_dctx2_->device) PetscCheckCompatibleDevices(pccdc_dctx1_->device, pccdc_argno1_, pccdc_dctx2_->device, pccdc_argno2_); \
146 } while (0)
147 #else /* PetscDefined(USE_DEBUG) */
148 #define PetscValidDeviceType(dtype, argno)
149 #define PetscCheckCompatibleDeviceTypes(dtype1, argno1, dtype2, argno2)
150 #define PetscValidDeviceAttribute(dattr, argno)
151 #define PetscValidDevice(dev, argno)
152 #define PetscCheckCompatibleDevices(dev1, argno1, dev2, argno2)
153 #define PetscValidStreamType(stype, argno)
154 #define PetscValidDeviceContext(dctx, argno)
155 #define PetscCheckCompatibleDeviceContexts(dctx1, argno1, dctx2, argno2) \
156 do { \
157 } while (0)
158 #endif /* PetscDefined(USE_DEBUG) */
159
160 /* if someone is ready to rock with more than 128 GPUs on hand then we're in real trouble */
161 #define PETSC_DEVICE_MAX_DEVICES 128
162
163 /*
164 the configure-time default device type, used as the initial the value of
165 PETSC_DEVICE_DEFAULT() as well as what it is restored to during PetscFinalize()
166 */
167 #if PetscDefined(HAVE_HIP)
168 #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_HIP
169 #elif PetscDefined(HAVE_CUDA)
170 #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_CUDA
171 #elif PetscDefined(HAVE_SYCL)
172 #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_SYCL
173 #else
174 #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_HOST
175 #endif
176
177 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE_TYPE PETSC_DEVICE_HARDWARE_DEFAULT_TYPE
178 // REMOVE ME (change)
179 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE PETSC_STREAM_DEFAULT
180
181 typedef struct _DeviceOps *DeviceOps;
182 struct _DeviceOps {
183 /* the creation routine for the corresponding PetscDeviceContext, this is NOT intended
184 * to be called by the PetscDevice itself */
185 PetscErrorCode (*createcontext)(PetscDeviceContext);
186 PetscErrorCode (*configure)(PetscDevice);
187 PetscErrorCode (*view)(PetscDevice, PetscViewer);
188 PetscErrorCode (*getattribute)(PetscDevice, PetscDeviceAttribute, void *);
189 };
190
191 struct _n_PetscDevice {
192 struct _DeviceOps ops[1];
193 void *data; /* placeholder */
194 PetscInt refcnt; /* reference count for the device */
195 PetscInt id; /* unique id per created PetscDevice */
196 PetscInt deviceId; /* the id of the underlying device, i.e. the return of
197 * cudaGetDevice() for example */
198 PetscDeviceType type; /* type of device */
199 };
200
201 typedef struct _n_PetscEvent *PetscEvent;
202 struct _n_PetscEvent {
203 PetscDeviceType dtype; // this cannot change for the lifetime of the event
204 PetscObjectId dctx_id; // id of last dctx to record this event
205 PetscObjectState dctx_state; // state of last dctx to record this event
206 void *data; // event handle
207 PetscErrorCode (*destroy)(PetscEvent);
208 };
209
210 typedef struct _DeviceContextOps *DeviceContextOps;
211 struct _DeviceContextOps {
212 PetscErrorCode (*destroy)(PetscDeviceContext);
213 PetscErrorCode (*changestreamtype)(PetscDeviceContext, PetscStreamType);
214 PetscErrorCode (*setup)(PetscDeviceContext);
215 PetscErrorCode (*query)(PetscDeviceContext, PetscBool *);
216 PetscErrorCode (*waitforcontext)(PetscDeviceContext, PetscDeviceContext);
217 PetscErrorCode (*synchronize)(PetscDeviceContext);
218 PetscErrorCode (*getblashandle)(PetscDeviceContext, void *);
219 PetscErrorCode (*getsolverhandle)(PetscDeviceContext, void *);
220 PetscErrorCode (*getstreamhandle)(PetscDeviceContext, void **);
221 PetscErrorCode (*begintimer)(PetscDeviceContext);
222 PetscErrorCode (*endtimer)(PetscDeviceContext, PetscLogDouble *);
223 PetscErrorCode (*getpower)(PetscDeviceContext, PetscLogDouble *);
224 PetscErrorCode (*beginenergymeter)(PetscDeviceContext);
225 PetscErrorCode (*endenergymeter)(PetscDeviceContext, PetscLogDouble *);
226 PetscErrorCode (*memalloc)(PetscDeviceContext, PetscBool, PetscMemType, size_t, size_t, void **); // optional
227 PetscErrorCode (*memfree)(PetscDeviceContext, PetscMemType, void **); // optional
228 PetscErrorCode (*memcopy)(PetscDeviceContext, void *PETSC_RESTRICT, const void *PETSC_RESTRICT, size_t, PetscDeviceCopyMode); // optional
229 PetscErrorCode (*memset)(PetscDeviceContext, PetscMemType, void *, PetscInt, size_t); // optional
230 PetscErrorCode (*createevent)(PetscDeviceContext, PetscEvent); // optional
231 PetscErrorCode (*recordevent)(PetscDeviceContext, PetscEvent); // optional
232 PetscErrorCode (*waitforevent)(PetscDeviceContext, PetscEvent); // optional
233 };
234
235 struct _p_PetscDeviceContext {
236 PETSCHEADER(struct _DeviceContextOps);
237 PetscDevice device; /* the device this context stems from */
238 void *data; /* solver contexts, event, stream */
239 PetscObjectId *childIDs; /* array containing ids of contexts currently forked from this one */
240 PetscInt numChildren; /* how many children does this context expect to destroy */
241 PetscInt maxNumChildren; /* how many children can this context have room for without realloc'ing */
242 PetscStreamType streamType; /* how should this contexts stream behave around other streams? */
243 PetscBool setup;
244 PetscBool usersetdevice;
245 };
246
247 // ===================================================================================
248 // PetscDevice Internal Functions
249 // ===================================================================================
250 PETSC_INTERN PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm);
251 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType, PetscDevice *);
252
PetscDeviceReference_Internal(PetscDevice device)253 static inline PetscErrorCode PetscDeviceReference_Internal(PetscDevice device)
254 {
255 PetscFunctionBegin;
256 if (PetscDefined(DEVICELANGUAGE_CXX)) ++device->refcnt;
257 PetscFunctionReturn(PETSC_SUCCESS);
258 }
259
260 #if PetscDefined(DEVICELANGUAGE_CXX)
PetscDeviceDereference_Internal(PetscDevice device)261 static inline PetscErrorCode PetscDeviceDereference_Internal(PetscDevice device)
262 {
263 PetscFunctionBegin;
264 --device->refcnt;
265 PetscAssert(device->refcnt >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "PetscDevice has negative reference count %" PetscInt_FMT, device->refcnt);
266 PetscFunctionReturn(PETSC_SUCCESS);
267 }
268
PetscDeviceCheckDeviceCount_Internal(PetscInt count)269 static inline PetscErrorCode PetscDeviceCheckDeviceCount_Internal(PetscInt count)
270 {
271 PetscFunctionBegin;
272 PetscAssert(count < PETSC_DEVICE_MAX_DEVICES, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Detected %" PetscInt_FMT " devices, which is larger than maximum supported number of devices %d", count, PETSC_DEVICE_MAX_DEVICES);
273 PetscFunctionReturn(PETSC_SUCCESS);
274 }
275 #endif /* PETSC_DEVICELANGUAGE_CXX for PetscDevice Internal Functions */
276
277 /* More general form of PetscDeviceDefaultType_Internal(), as it calls the former using
278 * the automatically selected default PetscDeviceType */
279 #define PetscDeviceGetDefault_Internal(device) PetscDeviceGetDefaultForType_Internal(PETSC_DEVICE_DEFAULT(), device)
280
PetscDeviceConfiguredFor_Internal(PetscDeviceType type)281 static inline PETSC_CONSTEXPR_14 PetscBool PetscDeviceConfiguredFor_Internal(PetscDeviceType type)
282 {
283 switch (type) {
284 case PETSC_DEVICE_HOST:
285 return PETSC_TRUE;
286 /* casts are needed in C++ */
287 case PETSC_DEVICE_CUDA:
288 return (PetscBool)PetscDefined(HAVE_CUDA);
289 case PETSC_DEVICE_HIP:
290 return (PetscBool)PetscDefined(HAVE_HIP);
291 case PETSC_DEVICE_SYCL:
292 return (PetscBool)PetscDefined(HAVE_SYCL);
293 case PETSC_DEVICE_MAX:
294 return PETSC_FALSE;
295 /* Do not add default case! Will make compiler warn on new additions to PetscDeviceType! */
296 }
297 PetscUnreachable();
298 return PETSC_FALSE;
299 }
300
301 // ===================================================================================
302 // PetscDeviceContext Internal Functions
303 // ===================================================================================
304 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *);
305 #if PetscDefined(DEVICELANGUAGE_CXX)
PetscDeviceContextGetBLASHandle_Internal(PetscDeviceContext dctx,void * handle)306 static inline PetscErrorCode PetscDeviceContextGetBLASHandle_Internal(PetscDeviceContext dctx, void *handle)
307 {
308 PetscFunctionBegin;
309 /* we do error checking here as this routine is an entry-point */
310 PetscValidDeviceContext(dctx, 1);
311 PetscUseTypeMethod(dctx, getblashandle, handle);
312 PetscFunctionReturn(PETSC_SUCCESS);
313 }
314
PetscDeviceContextGetSOLVERHandle_Internal(PetscDeviceContext dctx,void * handle)315 static inline PetscErrorCode PetscDeviceContextGetSOLVERHandle_Internal(PetscDeviceContext dctx, void *handle)
316 {
317 PetscFunctionBegin;
318 /* we do error checking here as this routine is an entry-point */
319 PetscValidDeviceContext(dctx, 1);
320 PetscUseTypeMethod(dctx, getsolverhandle, handle);
321 PetscFunctionReturn(PETSC_SUCCESS);
322 }
323
PetscDeviceContextGetStreamHandle_Internal(PetscDeviceContext dctx,void ** handle)324 static inline PetscErrorCode PetscDeviceContextGetStreamHandle_Internal(PetscDeviceContext dctx, void **handle)
325 {
326 PetscFunctionBegin;
327 /* we do error checking here as this routine is an entry-point */
328 PetscValidDeviceContext(dctx, 1);
329 PetscAssertPointer(handle, 2);
330 PetscUseTypeMethod(dctx, getstreamhandle, handle);
331 PetscFunctionReturn(PETSC_SUCCESS);
332 }
333
PetscDeviceContextBeginTimer_Internal(PetscDeviceContext dctx)334 static inline PetscErrorCode PetscDeviceContextBeginTimer_Internal(PetscDeviceContext dctx)
335 {
336 PetscFunctionBegin;
337 /* we do error checking here as this routine is an entry-point */
338 PetscValidDeviceContext(dctx, 1);
339 PetscUseTypeMethod(dctx, begintimer);
340 PetscFunctionReturn(PETSC_SUCCESS);
341 }
342
PetscDeviceContextEndTimer_Internal(PetscDeviceContext dctx,PetscLogDouble * elapsed)343 static inline PetscErrorCode PetscDeviceContextEndTimer_Internal(PetscDeviceContext dctx, PetscLogDouble *elapsed)
344 {
345 PetscFunctionBegin;
346 /* we do error checking here as this routine is an entry-point */
347 PetscValidDeviceContext(dctx, 1);
348 PetscAssertPointer(elapsed, 2);
349 PetscUseTypeMethod(dctx, endtimer, elapsed);
350 PetscFunctionReturn(PETSC_SUCCESS);
351 }
352
353 #if PetscDefined(HAVE_CUDA_VERSION_12_2PLUS)
PetscDeviceContextGetPower_Internal(PetscDeviceContext dctx,PetscLogDouble * power)354 static inline PetscErrorCode PetscDeviceContextGetPower_Internal(PetscDeviceContext dctx, PetscLogDouble *power)
355 {
356 PetscFunctionBegin;
357 PetscValidDeviceContext(dctx, 1);
358 PetscAssertPointer(power, 2);
359 PetscUseTypeMethod(dctx, getpower, power);
360 PetscFunctionReturn(PETSC_SUCCESS);
361 }
362 #endif
363
PetscDeviceContextBeginEnergyMeter_Internal(PetscDeviceContext dctx)364 static inline PetscErrorCode PetscDeviceContextBeginEnergyMeter_Internal(PetscDeviceContext dctx)
365 {
366 PetscFunctionBegin;
367 /* we do error checking here as this routine is an entry-point */
368 PetscValidDeviceContext(dctx, 1);
369 PetscUseTypeMethod(dctx, beginenergymeter);
370 PetscFunctionReturn(PETSC_SUCCESS);
371 }
372
PetscDeviceContextEndEnergyMeter_Internal(PetscDeviceContext dctx,PetscLogDouble * energy)373 static inline PetscErrorCode PetscDeviceContextEndEnergyMeter_Internal(PetscDeviceContext dctx, PetscLogDouble *energy)
374 {
375 PetscFunctionBegin;
376 /* we do error checking here as this routine is an entry-point */
377 PetscValidDeviceContext(dctx, 1);
378 PetscAssertPointer(energy, 2);
379 PetscUseTypeMethod(dctx, endenergymeter, energy);
380 PetscFunctionReturn(PETSC_SUCCESS);
381 }
382 #endif /* PETSC_DEVICELANGUAGE_CXX for PetscDeviceContext Internal Functions */
383
384 /* note, only does assertion checking in debug mode */
PetscDeviceContextGetCurrentContextAssertType_Internal(PetscDeviceContext * dctx,PetscDeviceType type)385 static inline PetscErrorCode PetscDeviceContextGetCurrentContextAssertType_Internal(PetscDeviceContext *dctx, PetscDeviceType type)
386 {
387 PetscFunctionBegin;
388 PetscCall(PetscDeviceContextGetCurrentContext(dctx));
389 if (PetscDefined(USE_DEBUG)) {
390 PetscDeviceType dtype;
391
392 PetscValidDeviceType(type, 2);
393 PetscCall(PetscDeviceContextGetDeviceType(*dctx, &dtype));
394 PetscCheckCompatibleDeviceTypes(dtype, 1, type, 2);
395 } else (void)type;
396 PetscFunctionReturn(PETSC_SUCCESS);
397 }
398
PetscDeviceContextGetOptionalNullContext_Internal(PetscDeviceContext * dctx)399 static inline PetscErrorCode PetscDeviceContextGetOptionalNullContext_Internal(PetscDeviceContext *dctx)
400 {
401 PetscFunctionBegin;
402 PetscAssertPointer(dctx, 1);
403 if (!*dctx) PetscCall(PetscDeviceContextGetNullContext_Internal(dctx));
404 PetscValidDeviceContext(*dctx, 1);
405 PetscFunctionReturn(PETSC_SUCCESS);
406 }
407
408 /* Experimental API -- it will eventually become public */
409 PETSC_EXTERN PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT, PetscMemType, size_t);
410 PETSC_EXTERN PetscErrorCode PetscDeviceGetAttribute(PetscDevice, PetscDeviceAttribute, void *);
411 #if PetscDefined(DEVICELANGUAGE_CXX)
412 PETSC_EXTERN PetscErrorCode PetscDeviceContextMarkIntentFromID(PetscDeviceContext, PetscObjectId, PetscMemoryAccessMode, const char name[]);
413 #endif
414 // Used for testing purposes, internal use ONLY
415 PETSC_EXTERN PetscErrorCode PetscGetMarkedObjectMap_Internal(size_t *, PetscObjectId **, PetscMemoryAccessMode **, size_t **, PetscEvent ***);
416 PETSC_EXTERN PetscErrorCode PetscRestoreMarkedObjectMap_Internal(size_t, PetscObjectId **, PetscMemoryAccessMode **, size_t **, PetscEvent ***);
417 #if PetscDefined(DEVICELANGUAGE_CXX) && defined(__cplusplus)
418 namespace
419 {
420
PetscDeviceContextMarkIntentFromID(PetscDeviceContext dctx,PetscObject obj,PetscMemoryAccessMode mode,const char name[])421 inline PetscErrorCode PetscDeviceContextMarkIntentFromID(PetscDeviceContext dctx, PetscObject obj, PetscMemoryAccessMode mode, const char name[])
422 {
423 PetscFunctionBegin;
424 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, obj->id, mode, name));
425 PetscFunctionReturn(PETSC_SUCCESS);
426 }
427
428 } // anonymous namespace
429 #endif
430
431 PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_HOST(PetscDeviceContext);
432 #if PetscDefined(HAVE_CUDA)
433 PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_CUDA(PetscDeviceContext);
434 #endif
435 #if PetscDefined(HAVE_HIP)
436 PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_HIP(PetscDeviceContext);
437 #endif
438 #if PetscDefined(HAVE_SYCL)
439 PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext);
440 #endif
441
PetscDeviceContextSynchronizeIfWithBarrier_Internal(PetscDeviceContext dctx)442 static inline PetscErrorCode PetscDeviceContextSynchronizeIfWithBarrier_Internal(PetscDeviceContext dctx)
443 {
444 PetscStreamType stream_type;
445
446 PetscFunctionBegin;
447 PetscCall(PetscDeviceContextGetStreamType(dctx, &stream_type));
448 if (stream_type == PETSC_STREAM_DEFAULT_WITH_BARRIER || stream_type == PETSC_STREAM_NONBLOCKING_WITH_BARRIER) PetscCall(PetscDeviceContextSynchronize(dctx));
449 PetscFunctionReturn(PETSC_SUCCESS);
450 }
451
452 #if PetscDefined(HAVE_CUDA)
453
454 #define PetscCallNVML(...) \
455 do { \
456 nvmlReturn_t nvmlerr = __VA_ARGS__; \
457 PetscCheck(nvmlerr == NVML_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s, error string: %s", __func__, nvmlErrorString(nvmlerr)); \
458 } while (0)
459
460 #endif
461