xref: /petsc/include/petsclog.h (revision 69eda9da2cab3444df2acd68fbbc2fdf1947414f)
1 /*
2     Defines profile/logging in PETSc.
3 */
4 #ifndef PETSCLOG_H
5 #define PETSCLOG_H
6 
7 #include <petscsys.h>
8 #include <petsctime.h>
9 
10 /* SUBMANSEC = Sys */
11 
12 /* General logging of information; different from event logging */
13 PETSC_EXTERN PetscErrorCode PetscInfo_Private(const char[], PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(3, 4);
14 #if defined(PETSC_USE_INFO)
15   #define PetscInfo(A, ...) PetscInfo_Private(PETSC_FUNCTION_NAME, ((PetscObject)A), __VA_ARGS__)
16 #else
17   #define PetscInfo(A, ...) 0
18 #endif
19 
20 #define PetscInfo1(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
21 #define PetscInfo2(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
22 #define PetscInfo3(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
23 #define PetscInfo4(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
24 #define PetscInfo5(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
25 #define PetscInfo6(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
26 #define PetscInfo7(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
27 #define PetscInfo8(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
28 #define PetscInfo9(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
29 
30 /*E
31   PetscInfoCommFlag - Describes the method by which to filter `PetscInfo()` by communicator size
32 
33   Used as an input for `PetscInfoSetFilterCommSelf()`
34 
35 + `PETSC_INFO_COMM_ALL` - Default uninitialized value. `PetscInfo()` will not filter based on
36 communicator size (i.e. will print for all communicators)
37 . `PETSC_INFO_COMM_NO_SELF` - `PetscInfo()` will NOT print for communicators with size = 1 (i.e. *_COMM_SELF)
38 - `PETSC_INFO_COMM_ONLY_SELF` - `PetscInfo()` will ONLY print for communicators with size = 1
39 
40   Level: intermediate
41 
42 .seealso: `PetscInfo()`, `PetscInfoSetFromOptions()`, `PetscInfoSetFilterCommSelf()`
43 E*/
44 typedef enum {
45   PETSC_INFO_COMM_ALL       = -1,
46   PETSC_INFO_COMM_NO_SELF   = 0,
47   PETSC_INFO_COMM_ONLY_SELF = 1
48 } PetscInfoCommFlag;
49 
50 PETSC_EXTERN const char *const PetscInfoCommFlags[];
51 PETSC_EXTERN PetscErrorCode    PetscInfoDeactivateClass(PetscClassId);
52 PETSC_EXTERN PetscErrorCode    PetscInfoActivateClass(PetscClassId);
53 PETSC_EXTERN PetscErrorCode    PetscInfoEnabled(PetscClassId, PetscBool *);
54 PETSC_EXTERN PetscErrorCode    PetscInfoAllow(PetscBool);
55 PETSC_EXTERN PetscErrorCode    PetscInfoSetFile(const char[], const char[]);
56 PETSC_EXTERN PetscErrorCode    PetscInfoGetFile(char **, FILE **);
57 PETSC_EXTERN PetscErrorCode    PetscInfoSetClasses(PetscBool, PetscInt, const char *const *);
58 PETSC_EXTERN PetscErrorCode    PetscInfoGetClass(const char *, PetscBool *);
59 PETSC_EXTERN PetscErrorCode    PetscInfoGetInfo(PetscBool *, PetscBool *, PetscBool *, PetscBool *, PetscInfoCommFlag *);
60 PETSC_EXTERN PetscErrorCode    PetscInfoProcessClass(const char[], PetscInt, const PetscClassId[]);
61 PETSC_EXTERN PetscErrorCode    PetscInfoSetFilterCommSelf(PetscInfoCommFlag);
62 PETSC_EXTERN PetscErrorCode    PetscInfoSetFromOptions(PetscOptions);
63 PETSC_EXTERN PetscErrorCode    PetscInfoDestroy(void);
64 PETSC_EXTERN PetscBool         PetscLogPrintInfo; /* if true, indicates PetscInfo() is turned on */
65 
66 /*MC
67     PetscLogEvent - id used to identify PETSc or user events which timed portions (blocks of executable)
68      code.
69 
70     Level: intermediate
71 
72 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStage`
73 M*/
74 typedef int PetscLogEvent;
75 
76 /*MC
77     PetscLogStage - id used to identify user stages (phases, sections) of runs - for logging
78 
79     Level: intermediate
80 
81 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEvent`
82 M*/
83 typedef int PetscLogStage;
84 
85 #define PETSC_EVENT 1311311
86 PETSC_EXTERN PetscLogEvent PETSC_LARGEST_EVENT;
87 
88 /* Handle multithreading */
89 #if defined(PETSC_HAVE_THREADSAFETY)
90   #if defined(__cplusplus)
91     #define PETSC_TLS thread_local
92   #else
93     #define PETSC_TLS _Thread_local
94   #endif
95   #define PETSC_EXTERN_TLS extern PETSC_TLS PETSC_VISIBILITY_PUBLIC
96 PETSC_EXTERN PetscErrorCode PetscAddLogDouble(PetscLogDouble *, PetscLogDouble *, PetscLogDouble);
97 PETSC_EXTERN PetscErrorCode PetscAddLogDoubleCnt(PetscLogDouble *, PetscLogDouble *, PetscLogDouble *, PetscLogDouble *, PetscLogDouble);
98 #else
99   #define PETSC_EXTERN_TLS PETSC_EXTERN
100   #define PETSC_TLS
101   #define PetscAddLogDouble(a, b, c)          (*(a) += c, 0) || (*(b) += c, 0)
102   #define PetscAddLogDoubleCnt(a, b, c, d, e) PetscAddLogDouble(a, c, 1) || PetscAddLogDouble(b, d, e)
103 #endif
104 
105 /* We must make the following structures available to access the event
106      activation flags in the PetscLogEventBegin/End() macros. These are not part of the PETSc public
107      API and are not intended to be used by other parts of PETSc or by users.
108 
109      The code that manipulates these structures is in src/sys/logging/utils.
110 */
111 typedef struct _n_PetscIntStack *PetscIntStack;
112 
113 /* -----------------------------------------------------------------------------------------------------*/
114 /*
115     PetscClassRegInfo, PetscClassPerfInfo - Each class has two data structures associated with it. The first has
116        static information about it, the second collects statistics on how many objects of the class are created,
117        how much memory they use, etc.
118 
119     PetscClassRegLog, PetscClassPerfLog - arrays of the PetscClassRegInfo and PetscClassPerfInfo for all classes.
120 */
121 typedef struct {
122   char        *name;    /* The class name */
123   PetscClassId classid; /* The integer identifying this class */
124 } PetscClassRegInfo;
125 
126 typedef struct {
127   PetscClassId   id;           /* The integer identifying this class */
128   int            creations;    /* The number of objects of this class created */
129   int            destructions; /* The number of objects of this class destroyed */
130   PetscLogDouble mem;          /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */
131   PetscLogDouble descMem;      /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */
132 } PetscClassPerfInfo;
133 
134 typedef struct _n_PetscClassRegLog *PetscClassRegLog;
135 struct _n_PetscClassRegLog {
136   int                numClasses; /* The number of classes registered */
137   int                maxClasses; /* The maximum number of classes */
138   PetscClassRegInfo *classInfo;  /* The structure for class information (classids are monotonicly increasing) */
139 };
140 
141 typedef struct _n_PetscClassPerfLog *PetscClassPerfLog;
142 struct _n_PetscClassPerfLog {
143   int                 numClasses; /* The number of logging classes */
144   int                 maxClasses; /* The maximum number of classes */
145   PetscClassPerfInfo *classInfo;  /* The structure for class information (classids are monotonicly increasing) */
146 };
147 /* -----------------------------------------------------------------------------------------------------*/
148 /*
149     PetscEventRegInfo, PetscEventPerfInfo - Each event has two data structures associated with it. The first has
150        static information about it, the second collects statistics on how many times the event is used, how
151        much time it takes, etc.
152 
153     PetscEventRegLog, PetscEventPerfLog - an array of all PetscEventRegInfo and PetscEventPerfInfo for all events. There is one
154       of these for each stage.
155 
156 */
157 typedef struct {
158   char        *name;       /* The name of this event */
159   PetscClassId classid;    /* The class the event is associated with */
160   PetscBool    collective; /* Flag this event as collective */
161 #if defined(PETSC_HAVE_TAU_PERFSTUBS)
162   void *timer; /* Associated external tool timer for this event */
163 #endif
164 #if defined(PETSC_HAVE_MPE)
165   int mpe_id_begin; /* MPE IDs that define the event */
166   int mpe_id_end;
167 #endif
168 } PetscEventRegInfo;
169 
170 typedef struct {
171   int            id;                  /* The integer identifying this event */
172   PetscBool      active;              /* The flag to activate logging */
173   PetscBool      visible;             /* The flag to print info in summary */
174   int            depth;               /* The nesting depth of the event call */
175   int            count;               /* The number of times this event was executed */
176   PetscLogDouble flops;               /* The flops used in this event */
177   PetscLogDouble flops2;              /* The square of flops used in this event */
178   PetscLogDouble flopsTmp;            /* The accumulator for flops used in this event */
179   PetscLogDouble time;                /* The time taken for this event */
180   PetscLogDouble time2;               /* The square of time taken for this event */
181   PetscLogDouble timeTmp;             /* The accumulator for time taken for this event */
182   PetscLogDouble syncTime;            /* The synchronization barrier time */
183   PetscLogDouble dof[8];              /* The number of degrees of freedom associated with this event */
184   PetscLogDouble errors[8];           /* The errors (user-defined) associated with this event */
185   PetscLogDouble numMessages;         /* The number of messages in this event */
186   PetscLogDouble messageLength;       /* The total message lengths in this event */
187   PetscLogDouble numReductions;       /* The number of reductions in this event */
188   PetscLogDouble memIncrease;         /* How much the resident memory has increased in this event */
189   PetscLogDouble mallocIncrease;      /* How much the maximum malloced space has increased in this event */
190   PetscLogDouble mallocSpace;         /* How much the space was malloced and kept during this event */
191   PetscLogDouble mallocIncreaseEvent; /* Maximum of the high water mark with in event minus memory available at the end of the event */
192 #if defined(PETSC_HAVE_DEVICE)
193   PetscLogDouble CpuToGpuCount; /* The total number of CPU to GPU copies */
194   PetscLogDouble GpuToCpuCount; /* The total number of GPU to CPU copies */
195   PetscLogDouble CpuToGpuSize;  /* The total size of CPU to GPU copies */
196   PetscLogDouble GpuToCpuSize;  /* The total size of GPU to CPU copies */
197   PetscLogDouble GpuFlops;      /* The flops done on a GPU in this event */
198   PetscLogDouble GpuTime;       /* The time spent on a GPU in this event */
199 #endif
200 } PetscEventPerfInfo;
201 
202 typedef struct _n_PetscEventRegLog *PetscEventRegLog;
203 struct _n_PetscEventRegLog {
204   int                numEvents; /* The number of registered events */
205   int                maxEvents; /* The maximum number of events */
206   PetscEventRegInfo *eventInfo; /* The registration information for each event */
207 };
208 
209 typedef struct _n_PetscEventPerfLog *PetscEventPerfLog;
210 struct _n_PetscEventPerfLog {
211   int                 numEvents; /* The number of logging events */
212   int                 maxEvents; /* The maximum number of events */
213   PetscEventPerfInfo *eventInfo; /* The performance information for each event */
214 };
215 /* ------------------------------------------------------------------------------------------------------------*/
216 /*
217    PetscStageInfo - Contains all the information about a particular stage.
218 
219    PetscStageLog - An array of PetscStageInfo for each registered stage. There is a single one of these in the code.
220 */
221 typedef struct _PetscStageInfo {
222   char              *name;     /* The stage name */
223   PetscBool          used;     /* The stage was pushed on this processor */
224   PetscEventPerfInfo perfInfo; /* The stage performance information */
225   PetscEventPerfLog  eventLog; /* The event information for this stage */
226   PetscClassPerfLog  classLog; /* The class information for this stage */
227 #if defined(PETSC_HAVE_TAU_PERFSTUBS)
228   void *timer; /* Associated external tool timer for this stage */
229 #endif
230 } PetscStageInfo;
231 
232 typedef struct _n_PetscStageLog *PetscStageLog;
233 struct _n_PetscStageLog {
234   int              numStages; /* The number of registered stages */
235   int              maxStages; /* The maximum number of stages */
236   PetscIntStack    stack;     /* The stack for active stages */
237   int              curStage;  /* The current stage (only used in macros so we don't call PetscIntStackTop) */
238   PetscStageInfo  *stageInfo; /* The information for each stage */
239   PetscEventRegLog eventLog;  /* The registered events */
240   PetscClassRegLog classLog;  /* The registered classes */
241 };
242 /* -----------------------------------------------------------------------------------------------------*/
243 
244 PETSC_DEPRECATED_FUNCTION("PetscLogObjectParent() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectParent(PetscObject o, PetscObject p)
245 {
246   (void)o;
247   (void)p;
248   return 0;
249 }
250 
251 PETSC_DEPRECATED_FUNCTION("PetscLogObjectMemory() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectMemory(PetscObject o, PetscLogDouble m)
252 {
253   (void)o;
254   (void)m;
255   return 0;
256 }
257 
258 #if defined(PETSC_USE_LOG) /* --- Logging is turned on --------------------------------*/
259 PETSC_EXTERN PetscStageLog  petsc_stageLog;
260 PETSC_EXTERN PetscErrorCode PetscLogGetStageLog(PetscStageLog *);
261 PETSC_EXTERN PetscErrorCode PetscStageLogGetCurrent(PetscStageLog, int *);
262 PETSC_EXTERN PetscErrorCode PetscStageLogGetEventPerfLog(PetscStageLog, int, PetscEventPerfLog *);
263 
264 PETSC_EXTERN PetscErrorCode PetscGetFlops(PetscLogDouble *);
265 
266   #if defined(PETSC_HAVE_MPE)
267 PETSC_EXTERN PetscErrorCode PetscLogMPEBegin(void);
268 PETSC_EXTERN PetscErrorCode PetscLogMPEDump(const char[]);
269   #endif
270 
271 PETSC_EXTERN PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
272 PETSC_EXTERN PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
273 PETSC_EXTERN PetscErrorCode (*PetscLogPHC)(PetscObject);
274 PETSC_EXTERN PetscErrorCode (*PetscLogPHD)(PetscObject);
275 
276   #define PetscLogObjectParents(p, n, d) PetscMacroReturnStandard(for (int _i = 0; _i < (n); ++_i) PetscCall(PetscLogObjectParent((PetscObject)(p), (PetscObject)(d)[_i]));)
277   #define PetscLogObjectCreate(h)        ((PetscLogPHC) ? (*PetscLogPHC)((PetscObject)(h)) : 0)
278   #define PetscLogObjectDestroy(h)       ((PetscLogPHD) ? (*PetscLogPHD)((PetscObject)(h)) : 0)
279 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3);
280 
281 /* Initialization functions */
282 PETSC_EXTERN PetscErrorCode PetscLogDefaultBegin(void);
283 PETSC_EXTERN PetscErrorCode PetscLogAllBegin(void);
284 PETSC_EXTERN PetscErrorCode PetscLogNestedBegin(void);
285 PETSC_EXTERN PetscErrorCode PetscLogTraceBegin(FILE *);
286 PETSC_EXTERN PetscErrorCode PetscLogActions(PetscBool);
287 PETSC_EXTERN PetscErrorCode PetscLogObjects(PetscBool);
288 PETSC_EXTERN PetscErrorCode PetscLogSetThreshold(PetscLogDouble, PetscLogDouble *);
289 PETSC_EXTERN PetscErrorCode PetscLogSet(PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject));
290 
291 /* Output functions */
292 PETSC_EXTERN PetscErrorCode PetscLogView(PetscViewer);
293 PETSC_EXTERN PetscErrorCode PetscLogViewFromOptions(void);
294 PETSC_EXTERN PetscErrorCode PetscLogDump(const char[]);
295 
296 /* Status checking functions */
297 PETSC_EXTERN PetscErrorCode PetscLogIsActive(PetscBool *);
298 
299 /* Stage functions */
300 PETSC_EXTERN PetscErrorCode PetscLogStageRegister(const char[], PetscLogStage *);
301 PETSC_EXTERN PetscErrorCode PetscLogStagePush(PetscLogStage);
302 PETSC_EXTERN PetscErrorCode PetscLogStagePop(void);
303 PETSC_EXTERN PetscErrorCode PetscLogStageSetActive(PetscLogStage, PetscBool);
304 PETSC_EXTERN PetscErrorCode PetscLogStageGetActive(PetscLogStage, PetscBool *);
305 PETSC_EXTERN PetscErrorCode PetscLogStageSetVisible(PetscLogStage, PetscBool);
306 PETSC_EXTERN PetscErrorCode PetscLogStageGetVisible(PetscLogStage, PetscBool *);
307 PETSC_EXTERN PetscErrorCode PetscLogStageGetId(const char[], PetscLogStage *);
308 
309 /* Event functions */
310 PETSC_EXTERN PetscErrorCode PetscLogEventRegister(const char[], PetscClassId, PetscLogEvent *);
311 PETSC_EXTERN PetscErrorCode PetscLogEventSetCollective(PetscLogEvent, PetscBool);
312 PETSC_EXTERN PetscErrorCode PetscLogEventIncludeClass(PetscClassId);
313 PETSC_EXTERN PetscErrorCode PetscLogEventExcludeClass(PetscClassId);
314 PETSC_EXTERN PetscErrorCode PetscLogEventActivate(PetscLogEvent);
315 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivate(PetscLogEvent);
316 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent);
317 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent);
318 PETSC_EXTERN PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent, PetscBool);
319 PETSC_EXTERN PetscErrorCode PetscLogEventActivateClass(PetscClassId);
320 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivateClass(PetscClassId);
321 PETSC_EXTERN PetscErrorCode PetscLogEventGetId(const char[], PetscLogEvent *);
322 PETSC_EXTERN PetscErrorCode PetscLogEventGetPerfInfo(int, PetscLogEvent, PetscEventPerfInfo *);
323 PETSC_EXTERN PetscErrorCode PetscLogEventSetDof(PetscLogEvent, PetscInt, PetscLogDouble);
324 PETSC_EXTERN PetscErrorCode PetscLogEventSetError(PetscLogEvent, PetscInt, PetscLogDouble);
325 PETSC_EXTERN PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent);
326 PETSC_EXTERN PetscErrorCode PetscLogPopCurrentEvent_Internal(void);
327 
328 PETSC_EXTERN PetscBool PetscLogMemory;
329 
330 PETSC_EXTERN PetscBool      PetscLogSyncOn; /* true if logging synchronization is enabled */
331 PETSC_EXTERN PetscErrorCode PetscLogEventSynchronize(PetscLogEvent, MPI_Comm);
332 
333   #define PetscLogEventSync(e, comm) \
334     (((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? PetscLogEventSynchronize((e), (comm)) : 0))
335 
336   #define PetscLogEventBegin(e, o1, o2, o3, o4) \
337     ((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLB)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPushCurrentEvent_Internal(e)) : 0)
338 
339   #define PetscLogEventEnd(e, o1, o2, o3, o4) \
340     ((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLE)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPopCurrentEvent_Internal()) : 0)
341 
342 PETSC_EXTERN PetscErrorCode PetscLogEventGetFlops(PetscLogEvent, PetscLogDouble *);
343 PETSC_EXTERN PetscErrorCode PetscLogEventZeroFlops(PetscLogEvent);
344 
345 /* Global flop counter */
346 PETSC_EXTERN PetscLogDouble petsc_TotalFlops;
347 PETSC_EXTERN PetscLogDouble petsc_irecv_ct;
348 PETSC_EXTERN PetscLogDouble petsc_isend_ct;
349 PETSC_EXTERN PetscLogDouble petsc_recv_ct;
350 PETSC_EXTERN PetscLogDouble petsc_send_ct;
351 PETSC_EXTERN PetscLogDouble petsc_irecv_len;
352 PETSC_EXTERN PetscLogDouble petsc_isend_len;
353 PETSC_EXTERN PetscLogDouble petsc_recv_len;
354 PETSC_EXTERN PetscLogDouble petsc_send_len;
355 PETSC_EXTERN PetscLogDouble petsc_allreduce_ct;
356 PETSC_EXTERN PetscLogDouble petsc_gather_ct;
357 PETSC_EXTERN PetscLogDouble petsc_scatter_ct;
358 PETSC_EXTERN PetscLogDouble petsc_wait_ct;
359 PETSC_EXTERN PetscLogDouble petsc_wait_any_ct;
360 PETSC_EXTERN PetscLogDouble petsc_wait_all_ct;
361 PETSC_EXTERN PetscLogDouble petsc_sum_of_waits_ct;
362 
363 /* Thread local storage */
364 PETSC_EXTERN_TLS PetscLogDouble petsc_TotalFlops_th;
365 PETSC_EXTERN_TLS PetscLogDouble petsc_irecv_ct_th;
366 PETSC_EXTERN_TLS PetscLogDouble petsc_isend_ct_th;
367 PETSC_EXTERN_TLS PetscLogDouble petsc_recv_ct_th;
368 PETSC_EXTERN_TLS PetscLogDouble petsc_send_ct_th;
369 PETSC_EXTERN_TLS PetscLogDouble petsc_irecv_len_th;
370 PETSC_EXTERN_TLS PetscLogDouble petsc_isend_len_th;
371 PETSC_EXTERN_TLS PetscLogDouble petsc_recv_len_th;
372 PETSC_EXTERN_TLS PetscLogDouble petsc_send_len_th;
373 PETSC_EXTERN_TLS PetscLogDouble petsc_allreduce_ct_th;
374 PETSC_EXTERN_TLS PetscLogDouble petsc_gather_ct_th;
375 PETSC_EXTERN_TLS PetscLogDouble petsc_scatter_ct_th;
376 PETSC_EXTERN_TLS PetscLogDouble petsc_wait_ct_th;
377 PETSC_EXTERN_TLS PetscLogDouble petsc_wait_any_ct_th;
378 PETSC_EXTERN_TLS PetscLogDouble petsc_wait_all_ct_th;
379 PETSC_EXTERN_TLS PetscLogDouble petsc_sum_of_waits_ct_th;
380 
381   /*
382    Flop counting:  We count each arithmetic operation (e.g., addition, multiplication) separately.
383 
384    For the complex numbers version, note that
385        1 complex addition = 2 flops
386        1 complex multiplication = 6 flops,
387    where we define 1 flop as that for a double precision scalar.  We roughly approximate
388    flop counting for complex numbers by multiplying the total flops by 4; this corresponds
389    to the assumption that we're counting mostly additions and multiplications -- and
390    roughly the same number of each.  More accurate counting could be done by distinguishing
391    among the various arithmetic operations.
392  */
393 
394   #if defined(PETSC_USE_COMPLEX)
395     #define PETSC_FLOPS_PER_OP 4.0
396   #else
397     #define PETSC_FLOPS_PER_OP 1.0
398   #endif
399 
400 /*@C
401        PetscLogFlops - Log how many flops are performed in a calculation
402 
403    Input Parameter:
404 .   flops - the number of flops
405 
406    Level: intermediate
407 
408    Note:
409      To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double,
410      not an integer. Use `PetscLogFlops`(4.0*n) not `PetscLogFlops`(4*n)
411 
412 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`
413 @*/
414 static inline PetscErrorCode PetscLogFlops(PetscLogDouble n)
415 {
416   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops");
417   return PetscAddLogDouble(&petsc_TotalFlops, &petsc_TotalFlops_th, PETSC_FLOPS_PER_OP * n);
418 }
419 
420   /*
421      These are used internally in the PETSc routines to keep a count of MPI messages and
422    their sizes.
423 
424      This does not work for MPI-Uni because our include/petsc/mpiuni/mpi.h file
425    uses macros to defined the MPI operations.
426 
427      It does not work correctly from HP-UX because it processes the
428    macros in a way that sometimes it double counts, hence
429    PETSC_HAVE_BROKEN_RECURSIVE_MACRO
430 
431      It does not work with Windows because winmpich lacks MPI_Type_size()
432 */
433   #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined(PETSC_HAVE_MPI_MISSING_TYPESIZE)
434 /*
435    Logging of MPI activities
436 */
437 static inline PetscErrorCode PetscMPITypeSize(PetscInt count, MPI_Datatype type, PetscLogDouble *length, PetscLogDouble *length_th)
438 {
439   PetscMPIInt typesize;
440 
441   if (type == MPI_DATATYPE_NULL) return 0;
442   PetscCallMPI(MPI_Type_size(type, &typesize));
443   return PetscAddLogDouble(length, length_th, (PetscLogDouble)(count * typesize));
444 }
445 
446 static inline PetscErrorCode PetscMPITypeSizeComm(MPI_Comm comm, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length, PetscLogDouble *length_th)
447 {
448   PetscMPIInt    typesize, size, p;
449   PetscLogDouble l;
450 
451   if (type == MPI_DATATYPE_NULL) return 0;
452   PetscCallMPI(MPI_Comm_size(comm, &size));
453   PetscCallMPI(MPI_Type_size(type, &typesize));
454   for (p = 0, l = 0.0; p < size; ++p) l += (PetscLogDouble)(counts[p] * typesize);
455   return PetscAddLogDouble(length, length_th, l);
456 }
457 
458 /*
459     Returns 1 if the communicator is parallel else zero
460 */
461 static inline int PetscMPIParallelComm(MPI_Comm comm)
462 {
463   PetscMPIInt size;
464   MPI_Comm_size(comm, &size);
465   return size > 1;
466 }
467 
468     #define MPI_Irecv(buf, count, datatype, source, tag, comm, request) \
469       (PetscAddLogDouble(&petsc_irecv_ct, &petsc_irecv_ct_th, 1) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len), &(petsc_irecv_len_th)) || MPI_Irecv((buf), (count), (datatype), (source), (tag), (comm), (request)))
470 
471     #define MPI_Irecv_c(buf, count, datatype, source, tag, comm, request) \
472       (PetscAddLogDouble(&petsc_irecv_ct, &petsc_irecv_ct_th, 1) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len), &(petsc_irecv_len_th)) || MPI_Irecv_c((buf), (count), (datatype), (source), (tag), (comm), (request)))
473 
474     #define MPI_Isend(buf, count, datatype, dest, tag, comm, request) \
475       (PetscAddLogDouble(&petsc_isend_ct, &petsc_isend_ct_th, 1) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len), &(petsc_isend_len_th)) || MPI_Isend((buf), (count), (datatype), (dest), (tag), (comm), (request)))
476 
477     #define MPI_Isend_c(buf, count, datatype, dest, tag, comm, request) \
478       (PetscAddLogDouble(&petsc_isend_ct, &petsc_isend_ct_th, 1) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len), &(petsc_isend_len_th)) || MPI_Isend_c((buf), (count), (datatype), (dest), (tag), (comm), (request)))
479 
480     #define MPI_Startall_irecv(count, datatype, number, requests) \
481       (PetscAddLogDouble(&petsc_irecv_ct, &petsc_irecv_ct_th, number) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len), &(petsc_irecv_len_th)) || ((number) && MPI_Startall((number), (requests))))
482 
483     #define MPI_Startall_isend(count, datatype, number, requests) \
484       (PetscAddLogDouble(&petsc_isend_ct, &petsc_isend_ct_th, number) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len), &(petsc_isend_len_th)) || ((number) && MPI_Startall((number), (requests))))
485 
486     #define MPI_Start_isend(count, datatype, requests) (PetscAddLogDouble(&petsc_isend_ct, &petsc_isend_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_isend_len), (&petsc_isend_len_th)) || MPI_Start((requests)))
487 
488     #define MPI_Recv(buf, count, datatype, source, tag, comm, status) \
489       (PetscAddLogDouble(&petsc_recv_ct, &petsc_recv_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len), (&petsc_recv_len_th)) || MPI_Recv((buf), (count), (datatype), (source), (tag), (comm), (status)))
490 
491     #define MPI_Recv_c(buf, count, datatype, source, tag, comm, status) \
492       (PetscAddLogDouble(&petsc_recv_ct, &petsc_recv_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len), &(petsc_recv_len_th)) || MPI_Recv_c((buf), (count), (datatype), (source), (tag), (comm), (status)))
493 
494     #define MPI_Send(buf, count, datatype, dest, tag, comm) \
495       (PetscAddLogDouble(&petsc_send_ct, &petsc_send_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Send((buf), (count), (datatype), (dest), (tag), (comm)))
496 
497     #define MPI_Send_c(buf, count, datatype, dest, tag, comm) \
498       (PetscAddLogDouble(&petsc_send_ct, &petsc_send_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Send_c((buf), (count), (datatype), (dest), (tag), (comm)))
499 
500     #define MPI_Wait(request, status) (PetscAddLogDouble(&petsc_wait_ct, &petsc_wait_ct_th, 1) || PetscAddLogDouble(&petsc_sum_of_waits_ct, &petsc_sum_of_waits_ct_th, 1) || MPI_Wait((request), (status)))
501 
502     #define MPI_Waitany(a, b, c, d) (PetscAddLogDouble(&petsc_wait_any_ct, &petsc_wait_any_ct_th, 1) || PetscAddLogDouble(&petsc_sum_of_waits_ct, &petsc_sum_of_waits_ct_th, 1) || MPI_Waitany((a), (b), (c), (d)))
503 
504     #define MPI_Waitall(count, array_of_requests, array_of_statuses) \
505       (PetscAddLogDouble(&petsc_wait_all_ct, &petsc_wait_all_ct_th, 1) || PetscAddLogDouble(&petsc_sum_of_waits_ct, &petsc_sum_of_waits_ct_th, count) || MPI_Waitall((count), (array_of_requests), (array_of_statuses)))
506 
507     #define MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm) (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || MPI_Allreduce((sendbuf), (recvbuf), (count), (datatype), (op), (comm)))
508 
509     #define MPI_Bcast(buffer, count, datatype, root, comm) (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || MPI_Bcast((buffer), (count), (datatype), (root), (comm)))
510 
511     #define MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm) \
512       (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || MPI_Reduce_scatter_block((sendbuf), (recvbuf), (recvcount), (datatype), (op), (comm)))
513 
514     #define MPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) \
515       (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Alltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm)))
516 
517     #define MPI_Alltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm) \
518       (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Alltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm)))
519 
520     #define MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) \
521       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Allgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm)))
522 
523     #define MPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm) \
524       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Allgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm)))
525 
526     #define MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \
527       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, 1) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Gather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))
528 
529     #define MPI_Gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm) \
530       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, 1) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Gatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm)))
531 
532     #define MPI_Scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \
533       (PetscAddLogDouble(&petsc_scatter_ct, &petsc_scatter_ct_th, 1) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len), &(petsc_recv_len_th)) || MPI_Scatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))
534 
535     #define MPI_Scatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm) \
536       (PetscAddLogDouble(&petsc_scatter_ct, &petsc_scatter_ct_th, 1) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len), &(petsc_recv_len_th)) || MPI_Scatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))
537 
538     #define MPI_Ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \
539       (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Ialltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request)))
540 
541     #define MPI_Ialltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm, request) \
542       (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Ialltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm), (request)))
543 
544     #define MPI_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \
545       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Iallgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request)))
546 
547     #define MPI_Iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm, request) \
548       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Iallgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm), (request)))
549 
550     #define MPI_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
551       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, 1) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Igather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))
552 
553     #define MPI_Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm, request) \
554       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, 1) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Igatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm), (request)))
555 
556     #define MPI_Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
557       (PetscAddLogDouble(&petsc_scatter_ct, &petsc_scatter_ct_th, 1) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len), (&petsc_recv_len_th)) || MPI_Iscatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))
558 
559     #define MPI_Iscatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
560       (PetscAddLogDouble(&petsc_scatter_ct, &petsc_scatter_ct_th, 1) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len), (&petsc_recv_len_th)) || MPI_Iscatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))
561 
562   #else
563 
564     #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests)))
565 
566     #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests)))
567 
568     #define MPI_Start_isend(count, datatype, requests) (MPI_Start((requests)))
569 
570   #endif /* !MPIUNI_H && ! PETSC_HAVE_BROKEN_RECURSIVE_MACRO */
571 
572 #else /* ---Logging is turned off --------------------------------------------*/
573 
574   #define PetscLogMemory PETSC_FALSE
575 
576   #define PetscLogFlops(n) 0
577   #define PetscGetFlops(a) (*(a) = 0.0, 0)
578 
579   #define PetscLogStageRegister(a, b)   0
580   #define PetscLogStagePush(a)          0
581   #define PetscLogStagePop()            0
582   #define PetscLogStageSetActive(a, b)  0
583   #define PetscLogStageGetActive(a, b)  0
584   #define PetscLogStageGetVisible(a, b) 0
585   #define PetscLogStageSetVisible(a, b) 0
586   #define PetscLogStageGetId(a, b)      (*(b) = 0, 0)
587 
588   #define PetscLogEventRegister(a, b, c)    0
589   #define PetscLogEventSetCollective(a, b)  0
590   #define PetscLogEventIncludeClass(a)      0
591   #define PetscLogEventExcludeClass(a)      0
592   #define PetscLogEventActivate(a)          0
593   #define PetscLogEventDeactivate(a)        0
594   #define PetscLogEventDeactivatePush(a)    0
595   #define PetscLogEventDeactivatePop(a)     0
596   #define PetscLogEventActivateClass(a)     0
597   #define PetscLogEventDeactivateClass(a)   0
598   #define PetscLogEventSetActiveAll(a, b)   0
599   #define PetscLogEventGetId(a, b)          (*(b) = 0, 0)
600   #define PetscLogEventGetPerfInfo(a, b, c) 0
601   #define PetscLogEventSetDof(a, b, c)      0
602   #define PetscLogEventSetError(a, b, c)    0
603 
604   #define PetscLogPLB 0
605   #define PetscLogPLE 0
606   #define PetscLogPHC 0
607   #define PetscLogPHD 0
608 
609   #define PetscLogObjectParents(p, n, c) 0
610   #define PetscLogObjectCreate(h)        0
611   #define PetscLogObjectDestroy(h)       0
612 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3);
613 
614   #define PetscLogDefaultBegin()     0
615   #define PetscLogAllBegin()         0
616   #define PetscLogNestedBegin()      0
617   #define PetscLogTraceBegin(file)   0
618   #define PetscLogActions(a)         0
619   #define PetscLogObjects(a)         0
620   #define PetscLogSetThreshold(a, b) 0
621   #define PetscLogSet(lb, le)        0
622   #define PetscLogIsActive(flag)     (*(flag) = PETSC_FALSE, 0)
623 
624   #define PetscLogView(viewer)      0
625   #define PetscLogViewFromOptions() 0
626   #define PetscLogDump(c)           0
627 
628   #define PetscLogEventSync(e, comm)                            0
629   #define PetscLogEventBegin(e, o1, o2, o3, o4)                 0
630   #define PetscLogEventEnd(e, o1, o2, o3, o4)                   0
631 
632   /* If PETSC_USE_LOG is NOT defined, these still need to be! */
633   #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall(number, requests))
634   #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall(number, requests))
635   #define MPI_Start_isend(count, datatype, requests)            MPI_Start(requests)
636 
637 #endif /* PETSC_USE_LOG */
638 
639 #define PetscPreLoadBegin(flag, name) \
640   do { \
641     PetscBool     PetscPreLoading = flag; \
642     int           PetscPreLoadMax, PetscPreLoadIt; \
643     PetscLogStage _stageNum; \
644     PetscCall(PetscOptionsGetBool(NULL, NULL, "-preload", &PetscPreLoading, NULL)); \
645     PetscPreLoadMax     = (int)(PetscPreLoading); \
646     PetscPreLoadingUsed = PetscPreLoading ? PETSC_TRUE : PetscPreLoadingUsed; \
647     for (PetscPreLoadIt = 0; PetscPreLoadIt <= PetscPreLoadMax; PetscPreLoadIt++) { \
648       PetscPreLoadingOn = PetscPreLoading; \
649       PetscCall(PetscBarrier(NULL)); \
650       if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \
651       else PetscCall(PetscLogStageRegister(name, &_stageNum)); \
652       PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \
653       PetscCall(PetscLogStagePush(_stageNum));
654 
655 #define PetscPreLoadEnd() \
656   PetscCall(PetscLogStagePop()); \
657   PetscPreLoading = PETSC_FALSE; \
658   } \
659   } \
660   while (0)
661 
662 #define PetscPreLoadStage(name) \
663   do { \
664     PetscCall(PetscLogStagePop()); \
665     if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \
666     else PetscCall(PetscLogStageRegister(name, &_stageNum)); \
667     PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \
668     PetscCall(PetscLogStagePush(_stageNum)); \
669   } while (0)
670 
671 /* some vars for logging */
672 PETSC_EXTERN PetscBool PetscPreLoadingUsed; /* true if we are or have done preloading */
673 PETSC_EXTERN PetscBool PetscPreLoadingOn;   /* true if we are currently in a preloading calculation */
674 
675 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_DEVICE)
676 
677 /* Global GPU counters */
678 PETSC_EXTERN PetscLogDouble petsc_ctog_ct;
679 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct;
680 PETSC_EXTERN PetscLogDouble petsc_ctog_sz;
681 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz;
682 PETSC_EXTERN PetscLogDouble petsc_ctog_ct_scalar;
683 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct_scalar;
684 PETSC_EXTERN PetscLogDouble petsc_ctog_sz_scalar;
685 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz_scalar;
686 PETSC_EXTERN PetscLogDouble petsc_gflops;
687 PETSC_EXTERN PetscLogDouble petsc_gtime;
688 
689 /* Thread local storage */
690 PETSC_EXTERN_TLS PetscLogDouble petsc_ctog_ct_th;
691 PETSC_EXTERN_TLS PetscLogDouble petsc_gtoc_ct_th;
692 PETSC_EXTERN_TLS PetscLogDouble petsc_ctog_sz_th;
693 PETSC_EXTERN_TLS PetscLogDouble petsc_gtoc_sz_th;
694 PETSC_EXTERN_TLS PetscLogDouble petsc_ctog_ct_scalar_th;
695 PETSC_EXTERN_TLS PetscLogDouble petsc_gtoc_ct_scalar_th;
696 PETSC_EXTERN_TLS PetscLogDouble petsc_ctog_sz_scalar_th;
697 PETSC_EXTERN_TLS PetscLogDouble petsc_gtoc_sz_scalar_th;
698 PETSC_EXTERN_TLS PetscLogDouble petsc_gflops_th;
699 PETSC_EXTERN_TLS PetscLogDouble petsc_gtime_th;
700 
701 PETSC_EXTERN PetscErrorCode PetscLogGpuTime(void);
702 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeBegin(void);
703 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeEnd(void);
704 
705 /*@C
706        PetscLogGpuFlops - Log how many flops are performed in a calculation on the device
707 
708    Input Parameter:
709 .   flops - the number of flops
710 
711    Level: intermediate
712 
713    Notes:
714      To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double,
715      not an integer. Use `PetscLogFlops`(4.0*n) not `PetscLogFlops`(4*n)
716 
717      The values are also added to the total flop count for the MPI rank that is set with `PetscLogFlops()`; hence the number of flops
718      just on the CPU would be the value from set from `PetscLogFlops()` minus the value set from `PetscLogGpuFlops()`
719 
720 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogFlops()`, `PetscLogGpuTimeBegin()`, `PetscLogGpuTimeEnd()`
721 @*/
722 static inline PetscErrorCode PetscLogGpuFlops(PetscLogDouble n)
723 {
724   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops");
725   PetscAddLogDouble(&petsc_TotalFlops, &petsc_TotalFlops_th, PETSC_FLOPS_PER_OP * n);
726   PetscAddLogDouble(&petsc_gflops, &petsc_gflops_th, PETSC_FLOPS_PER_OP * n);
727   return 0;
728 }
729 
730 static inline PetscErrorCode PetscLogGpuTimeAdd(PetscLogDouble t)
731 {
732   PetscAddLogDouble(&petsc_gtime, &petsc_gtime_th, t);
733   return 0;
734 }
735 
736 static inline PetscErrorCode PetscLogCpuToGpu(PetscLogDouble size)
737 {
738   PetscAddLogDoubleCnt(&petsc_ctog_ct, &petsc_ctog_sz, &petsc_ctog_ct_th, &petsc_ctog_sz_th, size);
739   return 0;
740 }
741 
742 static inline PetscErrorCode PetscLogGpuToCpu(PetscLogDouble size)
743 {
744   PetscAddLogDoubleCnt(&petsc_gtoc_ct, &petsc_gtoc_sz, &petsc_gtoc_ct_th, &petsc_gtoc_sz_th, size);
745   return 0;
746 }
747 
748 static inline PetscErrorCode PetscLogCpuToGpuScalar(PetscLogDouble size)
749 {
750   PetscAddLogDoubleCnt(&petsc_ctog_ct_scalar, &petsc_ctog_sz_scalar, &petsc_ctog_ct_scalar_th, &petsc_ctog_sz_scalar_th, size);
751   return 0;
752 }
753 
754 static inline PetscErrorCode PetscLogGpuToCpuScalar(PetscLogDouble size)
755 {
756   PetscAddLogDoubleCnt(&petsc_gtoc_ct_scalar, &petsc_gtoc_sz_scalar, &petsc_gtoc_ct_scalar_th, &petsc_gtoc_sz_scalar_th, size);
757   return 0;
758 }
759 #else
760 
761   #define PetscLogCpuToGpu(a)       0
762   #define PetscLogGpuToCpu(a)       0
763   #define PetscLogCpuToGpuScalar(a) 0
764   #define PetscLogGpuToCpuScalar(a) 0
765   #define PetscLogGpuFlops(a)       0
766   #define PetscLogGpuTime()         0
767   #define PetscLogGpuTimeAdd(a)     0
768   #define PetscLogGpuTimeBegin()    0
769   #define PetscLogGpuTimeEnd()      0
770 
771 #endif /* PETSC_USE_LOG && PETSC_HAVE_DEVICE */
772 
773 /* remove TLS defines */
774 #undef PETSC_EXTERN_TLS
775 #undef PETSC_TLS
776 
777 #endif
778