xref: /petsc/include/petsc/private/deviceimpl.h (revision 5fa70555f2cfa5f8527759fb2fd8b5523acdf153)
1 #pragma once
2 
3 #include <petscdevice.h>
4 #include <petsc/private/petscimpl.h>
5 
6 #if defined(PETSC_HAVE_CUPM)
7 PETSC_INTERN int PetscDeviceCUPMRuntimeArch; // The real CUDA/HIP arch the code is run with. For log view and error diagnosis
8 #endif
9 
10 /* logging support */
11 PETSC_INTERN PetscLogEvent CUBLAS_HANDLE_CREATE;
12 PETSC_INTERN PetscLogEvent CUSOLVER_HANDLE_CREATE;
13 PETSC_INTERN PetscLogEvent HIPSOLVER_HANDLE_CREATE;
14 PETSC_INTERN PetscLogEvent HIPBLAS_HANDLE_CREATE;
15 
16 PETSC_INTERN PetscLogEvent DCONTEXT_Create;
17 PETSC_INTERN PetscLogEvent DCONTEXT_Destroy;
18 PETSC_INTERN PetscLogEvent DCONTEXT_ChangeStream;
19 PETSC_INTERN PetscLogEvent DCONTEXT_SetDevice;
20 PETSC_INTERN PetscLogEvent DCONTEXT_SetUp;
21 PETSC_INTERN PetscLogEvent DCONTEXT_Duplicate;
22 PETSC_INTERN PetscLogEvent DCONTEXT_QueryIdle;
23 PETSC_INTERN PetscLogEvent DCONTEXT_WaitForCtx;
24 PETSC_INTERN PetscLogEvent DCONTEXT_Fork;
25 PETSC_INTERN PetscLogEvent DCONTEXT_Join;
26 PETSC_INTERN PetscLogEvent DCONTEXT_Sync;
27 PETSC_INTERN PetscLogEvent DCONTEXT_Mark;
28 
29 /* type cast macros for some additional type-safety in C++ land */
30 #if defined(__cplusplus)
31   #define PetscStreamTypeCast(...)     static_cast<PetscStreamType>(__VA_ARGS__)
32   #define PetscDeviceTypeCast(...)     static_cast<PetscDeviceType>(__VA_ARGS__)
33   #define PetscDeviceInitTypeCast(...) static_cast<PetscDeviceInitType>(__VA_ARGS__)
34 #else
35   #define PetscStreamTypeCast(...)     ((PetscStreamType)(__VA_ARGS__))
36   #define PetscDeviceTypeCast(...)     ((PetscDeviceType)(__VA_ARGS__))
37   #define PetscDeviceInitTypeCast(...) ((PetscDeviceInitType)(__VA_ARGS__))
38 #endif
39 
40 #if defined(PETSC_CLANG_STATIC_ANALYZER)
41 template <typename T>
42 extern void PetscValidDeviceType(T, int);
43 template <typename T, typename U>
44 extern void PetscCheckCompatibleDeviceTypes(T, int, U, int);
45 template <typename T>
46 extern void PetscValidDevice(T, int);
47 template <typename T>
48 extern void PetscValidDeviceAttribute(T, int);
49 template <typename T, typename U>
50 extern void PetscCheckCompatibleDevices(T, int, U, int);
51 template <typename T>
52 extern void PetscValidStreamType(T, int);
53 template <typename T>
54 extern void PetscValidDeviceContext(T, int);
55 template <typename T, typename U>
56 extern void PetscCheckCompatibleDeviceContexts(T, int, U, int);
57 #elif PetscDefined(DEVICELANGUAGE_CXX) && (PetscDefined(USE_DEBUG) || PetscDefined(DEVICE_KEEP_ERROR_CHECKING_MACROS))
58   #define PetscValidDeviceType(dtype, argno) \
59     do { \
60       PetscDeviceType pvdt_dtype_ = PetscDeviceTypeCast(dtype); \
61       int             pvdt_argno_ = (int)(argno); \
62       PetscCheck(((int)pvdt_dtype_ >= (int)PETSC_DEVICE_HOST) && ((int)pvdt_dtype_ <= (int)PETSC_DEVICE_MAX), PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscDeviceType '%d': Argument #%d", pvdt_dtype_, pvdt_argno_); \
63       if (PetscUnlikely(!PetscDeviceConfiguredFor_Internal(pvdt_dtype_))) { \
64         PetscCheck((int)pvdt_dtype_ != (int)PETSC_DEVICE_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Invalid PetscDeviceType '%s': Argument #%d", PetscDeviceTypes[pvdt_dtype_], pvdt_argno_); \
65         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, \
66                 "Not configured for PetscDeviceType '%s': Argument #%d;" \
67                 " run configure --help %s for available options", \
68                 PetscDeviceTypes[pvdt_dtype_], pvdt_argno_, PetscDeviceTypes[pvdt_dtype_]); \
69       } \
70     } while (0)
71 
72   #define PetscCheckCompatibleDeviceTypes(dtype1, argno1, dtype2, argno2) \
73     do { \
74       PetscDeviceType pccdt_dtype1_ = PetscDeviceTypeCast(dtype1); \
75       PetscDeviceType pccdt_dtype2_ = PetscDeviceTypeCast(dtype2); \
76       PetscValidDeviceType(pccdt_dtype1_, 1); \
77       PetscValidDeviceType(pccdt_dtype2_, 2); \
78       PetscCheck(pccdt_dtype1_ == pccdt_dtype2_, PETSC_COMM_SELF, PETSC_ERR_ARG_NOTSAMETYPE, "PetscDeviceTypes are incompatible: Arguments #%d and #%d. Expected PetscDeviceType '%s' but have '%s' instead", argno1, argno2, PetscDeviceTypes[pccdt_dtype1_], PetscDeviceTypes[pccdt_dtype2_]); \
79     } while (0)
80 
81   #define PetscValidDevice(dev, argno) \
82     do { \
83       PetscDevice pvd_dev_   = dev; \
84       int         pvd_argno_ = (int)(argno); \
85       PetscAssertPointer(pvd_dev_, pvd_argno_); \
86       PetscValidDeviceType(pvd_dev_->type, pvd_argno_); \
87       PetscCheck(pvd_dev_->id >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid PetscDevice: Argument #%d; id %" PetscInt_FMT " < 0", pvd_argno_, pvd_dev_->id); \
88       PetscCheck(pvd_dev_->refcnt >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid PetscDevice: Argument #%d; negative reference count %" PetscInt_FMT, pvd_argno_, pvd_dev_->refcnt); \
89     } while (0)
90 
91   #define PetscValidDeviceAttribute(dattr, argno) \
92     do { \
93       PetscDeviceAttribute pvda_attr_  = (dattr); \
94       int                  pvda_argno_ = (int)(argno); \
95       PetscCheck((((int)pvda_attr_) >= 0) && (pvda_attr_ <= PETSC_DEVICE_ATTR_MAX), PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscDeviceAttribute '%d': Argument #%d", (int)pvda_attr_, pvda_argno_); \
96       PetscCheck(pvda_attr_ != PETSC_DEVICE_ATTR_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Invalid PetscDeviceAttribute '%s': Argument #%d", PetscDeviceAttributes[pvda_attr_], pvda_argno_); \
97     } while (0)
98 
99   /*
100   for now just checks strict equality, but this can be changed as some devices (i.e. kokkos and
101   any cupm should be compatible once implemented)
102 */
103   #define PetscCheckCompatibleDevices(dev1, argno1, dev2, argno2) \
104     do { \
105       PetscDevice pccd_dev1_ = (dev1), pccd_dev2_ = (dev2); \
106       int         pccd_argno1_ = (int)(argno1), pccd_argno2_ = (int)(argno2); \
107       PetscValidDevice(pccd_dev1_, pccd_argno1_); \
108       PetscValidDevice(pccd_dev2_, pccd_argno2_); \
109       PetscCheckCompatibleDeviceTypes(pccd_dev1_->type, pccd_argno1_, pccd_dev2_->type, pccd_argno2_); \
110     } while (0)
111 
112   #define PetscValidStreamType(stype, argno) \
113     do { \
114       PetscStreamType pvst_stype_ = PetscStreamTypeCast(stype); \
115       int             pvst_argno_ = (int)(argno); \
116       PetscCheck(((int)pvst_stype_ >= 0) && ((int)pvst_stype_ <= (int)PETSC_STREAM_MAX), PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown PetscStreamType '%d': Argument #%d", pvst_stype_, pvst_argno_); \
117       PetscCheck((int)pvst_stype_ != (int)PETSC_STREAM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Invalid PetscStreamType '%s': Argument #%d", PetscStreamTypes[pvst_stype_], pvst_argno_); \
118     } while (0)
119 
120   #define PetscValidDeviceContext(dctx, argno) \
121     do { \
122       PetscDeviceContext pvdc_dctx_  = dctx; \
123       int                pvdc_argno_ = (int)(argno); \
124       PetscValidHeaderSpecific(pvdc_dctx_, PETSC_DEVICE_CONTEXT_CLASSID, pvdc_argno_); \
125       PetscValidStreamType(pvdc_dctx_->streamType, pvdc_argno_); \
126       if (pvdc_dctx_->device) { \
127         PetscValidDevice(pvdc_dctx_->device, pvdc_argno_); \
128       } else { \
129         PetscCheck(!pvdc_dctx_->setup, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, \
130                    "Invalid PetscDeviceContext: Argument #%d; " \
131                    "PetscDeviceContext is setup but has no PetscDevice", \
132                    pvdc_argno_); \
133       } \
134       PetscCheck(((PetscObject)pvdc_dctx_)->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid PetscDeviceContext: Argument #%d; id %" PetscInt64_FMT " < 1", pvdc_argno_, ((PetscObject)pvdc_dctx_)->id); \
135       PetscCheck(pvdc_dctx_->numChildren <= pvdc_dctx_->maxNumChildren, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Invalid PetscDeviceContext: Argument #%d; number of children %" PetscInt_FMT " > max number of children %" PetscInt_FMT, pvdc_argno_, \
136                  pvdc_dctx_->numChildren, pvdc_dctx_->maxNumChildren); \
137     } while (0)
138 
139   #define PetscCheckCompatibleDeviceContexts(dctx1, argno1, dctx2, argno2) \
140     do { \
141       PetscDeviceContext pccdc_dctx1_ = (dctx1), pccdc_dctx2_ = (dctx2); \
142       int                pccdc_argno1_ = (int)(argno1), pccdc_argno2_ = (int)(argno2); \
143       PetscValidDeviceContext(pccdc_dctx1_, pccdc_argno1_); \
144       PetscValidDeviceContext(pccdc_dctx2_, pccdc_argno2_); \
145       if (pccdc_dctx1_->device && pccdc_dctx2_->device) PetscCheckCompatibleDevices(pccdc_dctx1_->device, pccdc_argno1_, pccdc_dctx2_->device, pccdc_argno2_); \
146     } while (0)
147 #else /* PetscDefined(USE_DEBUG) */
148   #define PetscValidDeviceType(dtype, argno)
149   #define PetscCheckCompatibleDeviceTypes(dtype1, argno1, dtype2, argno2)
150   #define PetscValidDeviceAttribute(dattr, argno)
151   #define PetscValidDevice(dev, argno)
152   #define PetscCheckCompatibleDevices(dev1, argno1, dev2, argno2)
153   #define PetscValidStreamType(stype, argno)
154   #define PetscValidDeviceContext(dctx, argno)
155   #define PetscCheckCompatibleDeviceContexts(dctx1, argno1, dctx2, argno2) \
156     do { \
157     } while (0)
158 #endif /* PetscDefined(USE_DEBUG) */
159 
160 /* if someone is ready to rock with more than 128 GPUs on hand then we're in real trouble */
161 #define PETSC_DEVICE_MAX_DEVICES 128
162 
163 /*
164   the configure-time default device type, used as the initial the value of
165   PETSC_DEVICE_DEFAULT() as well as what it is restored to during PetscFinalize()
166 */
167 #if PetscDefined(HAVE_HIP)
168   #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_HIP
169 #elif PetscDefined(HAVE_CUDA)
170   #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_CUDA
171 #elif PetscDefined(HAVE_SYCL)
172   #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_SYCL
173 #else
174   #define PETSC_DEVICE_HARDWARE_DEFAULT_TYPE PETSC_DEVICE_HOST
175 #endif
176 
177 #define PETSC_DEVICE_CONTEXT_DEFAULT_DEVICE_TYPE PETSC_DEVICE_HARDWARE_DEFAULT_TYPE
178 // REMOVE ME (change)
179 #define PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE PETSC_STREAM_DEFAULT
180 
181 typedef struct _DeviceOps *DeviceOps;
182 struct _DeviceOps {
183   /* the creation routine for the corresponding PetscDeviceContext, this is NOT intended
184    * to be called by the PetscDevice itself */
185   PetscErrorCode (*createcontext)(PetscDeviceContext);
186   PetscErrorCode (*configure)(PetscDevice);
187   PetscErrorCode (*view)(PetscDevice, PetscViewer);
188   PetscErrorCode (*getattribute)(PetscDevice, PetscDeviceAttribute, void *);
189 };
190 
191 struct _n_PetscDevice {
192   struct _DeviceOps ops[1];
193   void             *data;     /* placeholder */
194   PetscInt          refcnt;   /* reference count for the device */
195   PetscInt          id;       /* unique id per created PetscDevice */
196   PetscInt          deviceId; /* the id of the underlying device, i.e. the return of
197                                * cudaGetDevice() for example */
198   PetscDeviceType   type;     /* type of device */
199 };
200 
201 typedef struct _n_PetscEvent *PetscEvent;
202 struct _n_PetscEvent {
203   PetscDeviceType  dtype;      // this cannot change for the lifetime of the event
204   PetscObjectId    dctx_id;    // id of last dctx to record this event
205   PetscObjectState dctx_state; // state of last dctx to record this event
206   void            *data;       // event handle
207   PetscErrorCode (*destroy)(PetscEvent);
208 };
209 
210 typedef struct _DeviceContextOps *DeviceContextOps;
211 struct _DeviceContextOps {
212   PetscErrorCode (*destroy)(PetscDeviceContext);
213   PetscErrorCode (*changestreamtype)(PetscDeviceContext, PetscStreamType);
214   PetscErrorCode (*setup)(PetscDeviceContext);
215   PetscErrorCode (*query)(PetscDeviceContext, PetscBool *);
216   PetscErrorCode (*waitforcontext)(PetscDeviceContext, PetscDeviceContext);
217   PetscErrorCode (*synchronize)(PetscDeviceContext);
218   PetscErrorCode (*getblashandle)(PetscDeviceContext, void *);
219   PetscErrorCode (*getsolverhandle)(PetscDeviceContext, void *);
220   PetscErrorCode (*getstreamhandle)(PetscDeviceContext, void **);
221   PetscErrorCode (*begintimer)(PetscDeviceContext);
222   PetscErrorCode (*endtimer)(PetscDeviceContext, PetscLogDouble *);
223   PetscErrorCode (*getpower)(PetscDeviceContext, PetscLogDouble *);
224   PetscErrorCode (*beginenergymeter)(PetscDeviceContext);
225   PetscErrorCode (*endenergymeter)(PetscDeviceContext, PetscLogDouble *);
226   PetscErrorCode (*memalloc)(PetscDeviceContext, PetscBool, PetscMemType, size_t, size_t, void **);                             // optional
227   PetscErrorCode (*memfree)(PetscDeviceContext, PetscMemType, void **);                                                         // optional
228   PetscErrorCode (*memcopy)(PetscDeviceContext, void *PETSC_RESTRICT, const void *PETSC_RESTRICT, size_t, PetscDeviceCopyMode); // optional
229   PetscErrorCode (*memset)(PetscDeviceContext, PetscMemType, void *, PetscInt, size_t);                                         // optional
230   PetscErrorCode (*createevent)(PetscDeviceContext, PetscEvent);                                                                // optional
231   PetscErrorCode (*recordevent)(PetscDeviceContext, PetscEvent);                                                                // optional
232   PetscErrorCode (*waitforevent)(PetscDeviceContext, PetscEvent);                                                               // optional
233 };
234 
235 struct _p_PetscDeviceContext {
236   PETSCHEADER(struct _DeviceContextOps);
237   PetscDevice     device;         /* the device this context stems from */
238   void           *data;           /* solver contexts, event, stream */
239   PetscObjectId  *childIDs;       /* array containing ids of contexts currently forked from this one */
240   PetscInt        numChildren;    /* how many children does this context expect to destroy */
241   PetscInt        maxNumChildren; /* how many children can this context have room for without realloc'ing */
242   PetscStreamType streamType;     /* how should this contexts stream behave around other streams? */
243   PetscBool       setup;
244   PetscBool       usersetdevice;
245 };
246 
247 // ===================================================================================
248 //                            PetscDevice Internal Functions
249 // ===================================================================================
250 PETSC_INTERN PetscErrorCode                PetscDeviceInitializeFromOptions_Internal(MPI_Comm);
251 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType, PetscDevice *);
252 
PetscDeviceReference_Internal(PetscDevice device)253 static inline PetscErrorCode PetscDeviceReference_Internal(PetscDevice device)
254 {
255   PetscFunctionBegin;
256   if (PetscDefined(DEVICELANGUAGE_CXX)) ++device->refcnt;
257   PetscFunctionReturn(PETSC_SUCCESS);
258 }
259 
260 #if PetscDefined(DEVICELANGUAGE_CXX)
PetscDeviceDereference_Internal(PetscDevice device)261 static inline PetscErrorCode PetscDeviceDereference_Internal(PetscDevice device)
262 {
263   PetscFunctionBegin;
264   --device->refcnt;
265   PetscAssert(device->refcnt >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "PetscDevice has negative reference count %" PetscInt_FMT, device->refcnt);
266   PetscFunctionReturn(PETSC_SUCCESS);
267 }
268 
PetscDeviceCheckDeviceCount_Internal(PetscInt count)269 static inline PetscErrorCode PetscDeviceCheckDeviceCount_Internal(PetscInt count)
270 {
271   PetscFunctionBegin;
272   PetscAssert(count < PETSC_DEVICE_MAX_DEVICES, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Detected %" PetscInt_FMT " devices, which is larger than maximum supported number of devices %d", count, PETSC_DEVICE_MAX_DEVICES);
273   PetscFunctionReturn(PETSC_SUCCESS);
274 }
275 #endif /* PETSC_DEVICELANGUAGE_CXX for PetscDevice Internal Functions */
276 
277 /* More general form of PetscDeviceDefaultType_Internal(), as it calls the former using
278  * the automatically selected default PetscDeviceType */
279 #define PetscDeviceGetDefault_Internal(device) PetscDeviceGetDefaultForType_Internal(PETSC_DEVICE_DEFAULT(), device)
280 
PetscDeviceConfiguredFor_Internal(PetscDeviceType type)281 static inline PETSC_CONSTEXPR_14 PetscBool PetscDeviceConfiguredFor_Internal(PetscDeviceType type)
282 {
283   switch (type) {
284   case PETSC_DEVICE_HOST:
285     return PETSC_TRUE;
286     /* casts are needed in C++ */
287   case PETSC_DEVICE_CUDA:
288     return (PetscBool)PetscDefined(HAVE_CUDA);
289   case PETSC_DEVICE_HIP:
290     return (PetscBool)PetscDefined(HAVE_HIP);
291   case PETSC_DEVICE_SYCL:
292     return (PetscBool)PetscDefined(HAVE_SYCL);
293   case PETSC_DEVICE_MAX:
294     return PETSC_FALSE;
295     /* Do not add default case! Will make compiler warn on new additions to PetscDeviceType! */
296   }
297   PetscUnreachable();
298   return PETSC_FALSE;
299 }
300 
301 // ===================================================================================
302 //                     PetscDeviceContext Internal Functions
303 // ===================================================================================
304 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscDeviceContextGetNullContext_Internal(PetscDeviceContext *);
305 #if PetscDefined(DEVICELANGUAGE_CXX)
PetscDeviceContextGetBLASHandle_Internal(PetscDeviceContext dctx,void * handle)306 static inline PetscErrorCode PetscDeviceContextGetBLASHandle_Internal(PetscDeviceContext dctx, void *handle)
307 {
308   PetscFunctionBegin;
309   /* we do error checking here as this routine is an entry-point */
310   PetscValidDeviceContext(dctx, 1);
311   PetscUseTypeMethod(dctx, getblashandle, handle);
312   PetscFunctionReturn(PETSC_SUCCESS);
313 }
314 
PetscDeviceContextGetSOLVERHandle_Internal(PetscDeviceContext dctx,void * handle)315 static inline PetscErrorCode PetscDeviceContextGetSOLVERHandle_Internal(PetscDeviceContext dctx, void *handle)
316 {
317   PetscFunctionBegin;
318   /* we do error checking here as this routine is an entry-point */
319   PetscValidDeviceContext(dctx, 1);
320   PetscUseTypeMethod(dctx, getsolverhandle, handle);
321   PetscFunctionReturn(PETSC_SUCCESS);
322 }
323 
PetscDeviceContextGetStreamHandle_Internal(PetscDeviceContext dctx,void ** handle)324 static inline PetscErrorCode PetscDeviceContextGetStreamHandle_Internal(PetscDeviceContext dctx, void **handle)
325 {
326   PetscFunctionBegin;
327   /* we do error checking here as this routine is an entry-point */
328   PetscValidDeviceContext(dctx, 1);
329   PetscAssertPointer(handle, 2);
330   PetscUseTypeMethod(dctx, getstreamhandle, handle);
331   PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 
PetscDeviceContextBeginTimer_Internal(PetscDeviceContext dctx)334 static inline PetscErrorCode PetscDeviceContextBeginTimer_Internal(PetscDeviceContext dctx)
335 {
336   PetscFunctionBegin;
337   /* we do error checking here as this routine is an entry-point */
338   PetscValidDeviceContext(dctx, 1);
339   PetscUseTypeMethod(dctx, begintimer);
340   PetscFunctionReturn(PETSC_SUCCESS);
341 }
342 
PetscDeviceContextEndTimer_Internal(PetscDeviceContext dctx,PetscLogDouble * elapsed)343 static inline PetscErrorCode PetscDeviceContextEndTimer_Internal(PetscDeviceContext dctx, PetscLogDouble *elapsed)
344 {
345   PetscFunctionBegin;
346   /* we do error checking here as this routine is an entry-point */
347   PetscValidDeviceContext(dctx, 1);
348   PetscAssertPointer(elapsed, 2);
349   PetscUseTypeMethod(dctx, endtimer, elapsed);
350   PetscFunctionReturn(PETSC_SUCCESS);
351 }
352 
353   #if PetscDefined(HAVE_CUDA_VERSION_12_2PLUS)
PetscDeviceContextGetPower_Internal(PetscDeviceContext dctx,PetscLogDouble * power)354 static inline PetscErrorCode PetscDeviceContextGetPower_Internal(PetscDeviceContext dctx, PetscLogDouble *power)
355 {
356   PetscFunctionBegin;
357   PetscValidDeviceContext(dctx, 1);
358   PetscAssertPointer(power, 2);
359   PetscUseTypeMethod(dctx, getpower, power);
360   PetscFunctionReturn(PETSC_SUCCESS);
361 }
362   #endif
363 
PetscDeviceContextBeginEnergyMeter_Internal(PetscDeviceContext dctx)364 static inline PetscErrorCode PetscDeviceContextBeginEnergyMeter_Internal(PetscDeviceContext dctx)
365 {
366   PetscFunctionBegin;
367   /* we do error checking here as this routine is an entry-point */
368   PetscValidDeviceContext(dctx, 1);
369   PetscUseTypeMethod(dctx, beginenergymeter);
370   PetscFunctionReturn(PETSC_SUCCESS);
371 }
372 
PetscDeviceContextEndEnergyMeter_Internal(PetscDeviceContext dctx,PetscLogDouble * energy)373 static inline PetscErrorCode PetscDeviceContextEndEnergyMeter_Internal(PetscDeviceContext dctx, PetscLogDouble *energy)
374 {
375   PetscFunctionBegin;
376   /* we do error checking here as this routine is an entry-point */
377   PetscValidDeviceContext(dctx, 1);
378   PetscAssertPointer(energy, 2);
379   PetscUseTypeMethod(dctx, endenergymeter, energy);
380   PetscFunctionReturn(PETSC_SUCCESS);
381 }
382 #endif /* PETSC_DEVICELANGUAGE_CXX for PetscDeviceContext Internal Functions */
383 
384 /* note, only does assertion checking in debug mode */
PetscDeviceContextGetCurrentContextAssertType_Internal(PetscDeviceContext * dctx,PetscDeviceType type)385 static inline PetscErrorCode PetscDeviceContextGetCurrentContextAssertType_Internal(PetscDeviceContext *dctx, PetscDeviceType type)
386 {
387   PetscFunctionBegin;
388   PetscCall(PetscDeviceContextGetCurrentContext(dctx));
389   if (PetscDefined(USE_DEBUG)) {
390     PetscDeviceType dtype;
391 
392     PetscValidDeviceType(type, 2);
393     PetscCall(PetscDeviceContextGetDeviceType(*dctx, &dtype));
394     PetscCheckCompatibleDeviceTypes(dtype, 1, type, 2);
395   } else (void)type;
396   PetscFunctionReturn(PETSC_SUCCESS);
397 }
398 
PetscDeviceContextGetOptionalNullContext_Internal(PetscDeviceContext * dctx)399 static inline PetscErrorCode PetscDeviceContextGetOptionalNullContext_Internal(PetscDeviceContext *dctx)
400 {
401   PetscFunctionBegin;
402   PetscAssertPointer(dctx, 1);
403   if (!*dctx) PetscCall(PetscDeviceContextGetNullContext_Internal(dctx));
404   PetscValidDeviceContext(*dctx, 1);
405   PetscFunctionReturn(PETSC_SUCCESS);
406 }
407 
408 /* Experimental API -- it will eventually become public */
409 PETSC_EXTERN PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT, PetscMemType, size_t);
410 PETSC_EXTERN PetscErrorCode PetscDeviceGetAttribute(PetscDevice, PetscDeviceAttribute, void *);
411 #if PetscDefined(DEVICELANGUAGE_CXX)
412 PETSC_EXTERN PetscErrorCode PetscDeviceContextMarkIntentFromID(PetscDeviceContext, PetscObjectId, PetscMemoryAccessMode, const char name[]);
413 #endif
414 // Used for testing purposes, internal use ONLY
415 PETSC_EXTERN PetscErrorCode PetscGetMarkedObjectMap_Internal(size_t *, PetscObjectId **, PetscMemoryAccessMode **, size_t **, PetscEvent ***);
416 PETSC_EXTERN PetscErrorCode PetscRestoreMarkedObjectMap_Internal(size_t, PetscObjectId **, PetscMemoryAccessMode **, size_t **, PetscEvent ***);
417 #if PetscDefined(DEVICELANGUAGE_CXX) && defined(__cplusplus)
418 namespace
419 {
420 
PetscDeviceContextMarkIntentFromID(PetscDeviceContext dctx,PetscObject obj,PetscMemoryAccessMode mode,const char name[])421 inline PetscErrorCode PetscDeviceContextMarkIntentFromID(PetscDeviceContext dctx, PetscObject obj, PetscMemoryAccessMode mode, const char name[])
422 {
423   PetscFunctionBegin;
424   PetscCall(PetscDeviceContextMarkIntentFromID(dctx, obj->id, mode, name));
425   PetscFunctionReturn(PETSC_SUCCESS);
426 }
427 
428 } // anonymous namespace
429 #endif
430 
431 PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_HOST(PetscDeviceContext);
432 #if PetscDefined(HAVE_CUDA)
433 PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_CUDA(PetscDeviceContext);
434 #endif
435 #if PetscDefined(HAVE_HIP)
436 PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_HIP(PetscDeviceContext);
437 #endif
438 #if PetscDefined(HAVE_SYCL)
439 PETSC_INTERN PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext);
440 #endif
441 
PetscDeviceContextSynchronizeIfWithBarrier_Internal(PetscDeviceContext dctx)442 static inline PetscErrorCode PetscDeviceContextSynchronizeIfWithBarrier_Internal(PetscDeviceContext dctx)
443 {
444   PetscStreamType stream_type;
445 
446   PetscFunctionBegin;
447   PetscCall(PetscDeviceContextGetStreamType(dctx, &stream_type));
448   if (stream_type == PETSC_STREAM_DEFAULT_WITH_BARRIER || stream_type == PETSC_STREAM_NONBLOCKING_WITH_BARRIER) PetscCall(PetscDeviceContextSynchronize(dctx));
449   PetscFunctionReturn(PETSC_SUCCESS);
450 }
451 
452 #if PetscDefined(HAVE_CUDA)
453 
454   #define PetscCallNVML(...) \
455     do { \
456       nvmlReturn_t nvmlerr = __VA_ARGS__; \
457       PetscCheck(nvmlerr == NVML_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s, error string: %s", __func__, nvmlErrorString(nvmlerr)); \
458     } while (0)
459 
460 #endif
461