xref: /petsc/include/petsclog.h (revision df4cd43f92eaa320656440c40edb1046daee8f75)
1 /*
2     Defines profile/logging in PETSc.
3 */
4 #ifndef PETSCLOG_H
5 #define PETSCLOG_H
6 
7 #include <petscsys.h>
8 #include <petsctime.h>
9 
10 /* SUBMANSEC = Sys */
11 
12 /* General logging of information; different from event logging */
13 PETSC_EXTERN PetscErrorCode PetscInfo_Private(const char[], PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(3, 4);
14 #if defined(PETSC_USE_INFO)
15   #define PetscInfo(A, ...) PetscInfo_Private(PETSC_FUNCTION_NAME, ((PetscObject)A), __VA_ARGS__)
16 #else
17   #define PetscInfo(A, ...) PETSC_SUCCESS
18 #endif
19 
20 #define PetscInfo1(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
21 #define PetscInfo2(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
22 #define PetscInfo3(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
23 #define PetscInfo4(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
24 #define PetscInfo5(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
25 #define PetscInfo6(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
26 #define PetscInfo7(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
27 #define PetscInfo8(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
28 #define PetscInfo9(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
29 
30 /*E
31   PetscInfoCommFlag - Describes the method by which to filter `PetscInfo()` by communicator size
32 
33   Values:
34 + `PETSC_INFO_COMM_ALL` - Default uninitialized value. `PetscInfo()` will not filter based on
35                           communicator size (i.e. will print for all communicators)
36 . `PETSC_INFO_COMM_NO_SELF` - `PetscInfo()` will NOT print for communicators with size = 1 (i.e. *_COMM_SELF)
37 - `PETSC_INFO_COMM_ONLY_SELF` - `PetscInfo()` will ONLY print for communicators with size = 1
38 
39   Level: intermediate
40 
41   Note:
42   Used as an input for `PetscInfoSetFilterCommSelf()`
43 
44 .seealso: `PetscInfo()`, `PetscInfoSetFromOptions()`, `PetscInfoSetFilterCommSelf()`
45 E*/
46 typedef enum {
47   PETSC_INFO_COMM_ALL       = -1,
48   PETSC_INFO_COMM_NO_SELF   = 0,
49   PETSC_INFO_COMM_ONLY_SELF = 1
50 } PetscInfoCommFlag;
51 
52 PETSC_EXTERN const char *const PetscInfoCommFlags[];
53 PETSC_EXTERN PetscErrorCode    PetscInfoDeactivateClass(PetscClassId);
54 PETSC_EXTERN PetscErrorCode    PetscInfoActivateClass(PetscClassId);
55 PETSC_EXTERN PetscErrorCode    PetscInfoEnabled(PetscClassId, PetscBool *);
56 PETSC_EXTERN PetscErrorCode    PetscInfoAllow(PetscBool);
57 PETSC_EXTERN PetscErrorCode    PetscInfoSetFile(const char[], const char[]);
58 PETSC_EXTERN PetscErrorCode    PetscInfoGetFile(char **, FILE **);
59 PETSC_EXTERN PetscErrorCode    PetscInfoSetClasses(PetscBool, PetscInt, const char *const *);
60 PETSC_EXTERN PetscErrorCode    PetscInfoGetClass(const char *, PetscBool *);
61 PETSC_EXTERN PetscErrorCode    PetscInfoGetInfo(PetscBool *, PetscBool *, PetscBool *, PetscBool *, PetscInfoCommFlag *);
62 PETSC_EXTERN PetscErrorCode    PetscInfoProcessClass(const char[], PetscInt, const PetscClassId[]);
63 PETSC_EXTERN PetscErrorCode    PetscInfoSetFilterCommSelf(PetscInfoCommFlag);
64 PETSC_EXTERN PetscErrorCode    PetscInfoSetFromOptions(PetscOptions);
65 PETSC_EXTERN PetscErrorCode    PetscInfoDestroy(void);
66 PETSC_EXTERN PetscBool         PetscLogPrintInfo; /* if true, indicates PetscInfo() is turned on */
67 
68 /*MC
69     PetscLogEvent - id used to identify PETSc or user events which timed portions (blocks of executable)
70      code.
71 
72     Level: intermediate
73 
74 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStage`
75 M*/
76 typedef int PetscLogEvent;
77 
78 /*MC
79     PetscLogStage - id used to identify user stages (phases, sections) of runs - for logging
80 
81     Level: intermediate
82 
83 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEvent`
84 M*/
85 typedef int PetscLogStage;
86 
87 #define PETSC_EVENT 1311311
88 PETSC_EXTERN PetscLogEvent PETSC_LARGEST_EVENT;
89 
90 /* Handle multithreading */
91 #if defined(PETSC_HAVE_THREADSAFETY)
92   #if defined(__cplusplus)
93     #define PETSC_TLS thread_local
94   #else
95     #define PETSC_TLS _Thread_local
96   #endif
97   #define PETSC_EXTERN_TLS extern PETSC_TLS PETSC_VISIBILITY_PUBLIC
98 PETSC_EXTERN PetscErrorCode PetscAddLogDouble(PetscLogDouble *, PetscLogDouble *, PetscLogDouble);
99 PETSC_EXTERN PetscErrorCode PetscAddLogDoubleCnt(PetscLogDouble *, PetscLogDouble *, PetscLogDouble *, PetscLogDouble *, PetscLogDouble);
100 #else
101   #define PETSC_EXTERN_TLS PETSC_EXTERN
102   #define PETSC_TLS
103   #define PetscAddLogDouble(a, b, c)          ((PetscErrorCode)((*(a) += (c), PETSC_SUCCESS) || ((*(b) += (c)), PETSC_SUCCESS)))
104   #define PetscAddLogDoubleCnt(a, b, c, d, e) ((PetscErrorCode)(PetscAddLogDouble(a, c, 1) || PetscAddLogDouble(b, d, e)))
105 #endif
106 
107 /* We must make the following structures available to access the event
108      activation flags in the PetscLogEventBegin/End() macros. These are not part of the PETSc public
109      API and are not intended to be used by other parts of PETSc or by users.
110 
111      The code that manipulates these structures is in src/sys/logging/utils.
112 */
113 typedef struct _n_PetscIntStack *PetscIntStack;
114 
115 /* -----------------------------------------------------------------------------------------------------*/
116 /*
117     PetscClassRegInfo, PetscClassPerfInfo - Each class has two data structures associated with it. The first has
118        static information about it, the second collects statistics on how many objects of the class are created,
119        how much memory they use, etc.
120 
121     PetscClassRegLog, PetscClassPerfLog - arrays of the PetscClassRegInfo and PetscClassPerfInfo for all classes.
122 */
123 typedef struct {
124   char        *name;    /* The class name */
125   PetscClassId classid; /* The integer identifying this class */
126 } PetscClassRegInfo;
127 
128 typedef struct {
129   PetscClassId   id;           /* The integer identifying this class */
130   int            creations;    /* The number of objects of this class created */
131   int            destructions; /* The number of objects of this class destroyed */
132   PetscLogDouble mem;          /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */
133   PetscLogDouble descMem;      /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */
134 } PetscClassPerfInfo;
135 
136 typedef struct _n_PetscClassRegLog *PetscClassRegLog;
137 struct _n_PetscClassRegLog {
138   int                numClasses; /* The number of classes registered */
139   int                maxClasses; /* The maximum number of classes */
140   PetscClassRegInfo *classInfo;  /* The structure for class information (classids are monotonicly increasing) */
141 };
142 
143 typedef struct _n_PetscClassPerfLog *PetscClassPerfLog;
144 struct _n_PetscClassPerfLog {
145   int                 numClasses; /* The number of logging classes */
146   int                 maxClasses; /* The maximum number of classes */
147   PetscClassPerfInfo *classInfo;  /* The structure for class information (classids are monotonicly increasing) */
148 };
149 /* -----------------------------------------------------------------------------------------------------*/
150 /*
151     PetscEventRegInfo, PetscEventPerfInfo - Each event has two data structures associated with it. The first has
152        static information about it, the second collects statistics on how many times the event is used, how
153        much time it takes, etc.
154 
155     PetscEventRegLog, PetscEventPerfLog - an array of all PetscEventRegInfo and PetscEventPerfInfo for all events. There is one
156       of these for each stage.
157 
158 */
159 typedef struct {
160   char        *name;       /* The name of this event */
161   PetscClassId classid;    /* The class the event is associated with */
162   PetscBool    collective; /* Flag this event as collective */
163 #if defined(PETSC_HAVE_TAU_PERFSTUBS)
164   void *timer; /* Associated external tool timer for this event */
165 #endif
166 #if defined(PETSC_HAVE_MPE)
167   int mpe_id_begin; /* MPE IDs that define the event */
168   int mpe_id_end;
169 #endif
170 } PetscEventRegInfo;
171 
172 typedef struct {
173   int            id;                  /* The integer identifying this event */
174   PetscBool      active;              /* The flag to activate logging */
175   PetscBool      visible;             /* The flag to print info in summary */
176   int            depth;               /* The nesting depth of the event call */
177   int            count;               /* The number of times this event was executed */
178   PetscLogDouble flops;               /* The flops used in this event */
179   PetscLogDouble flops2;              /* The square of flops used in this event */
180   PetscLogDouble flopsTmp;            /* The accumulator for flops used in this event */
181   PetscLogDouble time;                /* The time taken for this event */
182   PetscLogDouble time2;               /* The square of time taken for this event */
183   PetscLogDouble timeTmp;             /* The accumulator for time taken for this event */
184   PetscLogDouble syncTime;            /* The synchronization barrier time */
185   PetscLogDouble dof[8];              /* The number of degrees of freedom associated with this event */
186   PetscLogDouble errors[8];           /* The errors (user-defined) associated with this event */
187   PetscLogDouble numMessages;         /* The number of messages in this event */
188   PetscLogDouble messageLength;       /* The total message lengths in this event */
189   PetscLogDouble numReductions;       /* The number of reductions in this event */
190   PetscLogDouble memIncrease;         /* How much the resident memory has increased in this event */
191   PetscLogDouble mallocIncrease;      /* How much the maximum malloced space has increased in this event */
192   PetscLogDouble mallocSpace;         /* How much the space was malloced and kept during this event */
193   PetscLogDouble mallocIncreaseEvent; /* Maximum of the high water mark with in event minus memory available at the end of the event */
194 #if defined(PETSC_HAVE_DEVICE)
195   PetscLogDouble CpuToGpuCount; /* The total number of CPU to GPU copies */
196   PetscLogDouble GpuToCpuCount; /* The total number of GPU to CPU copies */
197   PetscLogDouble CpuToGpuSize;  /* The total size of CPU to GPU copies */
198   PetscLogDouble GpuToCpuSize;  /* The total size of GPU to CPU copies */
199   PetscLogDouble GpuFlops;      /* The flops done on a GPU in this event */
200   PetscLogDouble GpuTime;       /* The time spent on a GPU in this event */
201 #endif
202 } PetscEventPerfInfo;
203 
204 typedef struct _n_PetscEventRegLog *PetscEventRegLog;
205 struct _n_PetscEventRegLog {
206   int                numEvents; /* The number of registered events */
207   int                maxEvents; /* The maximum number of events */
208   PetscEventRegInfo *eventInfo; /* The registration information for each event */
209 };
210 
211 typedef struct _n_PetscEventPerfLog *PetscEventPerfLog;
212 struct _n_PetscEventPerfLog {
213   int                 numEvents; /* The number of logging events */
214   int                 maxEvents; /* The maximum number of events */
215   PetscEventPerfInfo *eventInfo; /* The performance information for each event */
216 };
217 /* ------------------------------------------------------------------------------------------------------------*/
218 /*
219    PetscStageInfo - Contains all the information about a particular stage.
220 
221    PetscStageLog - An array of PetscStageInfo for each registered stage. There is a single one of these in the code.
222 */
223 typedef struct _PetscStageInfo {
224   char              *name;     /* The stage name */
225   PetscBool          used;     /* The stage was pushed on this processor */
226   PetscEventPerfInfo perfInfo; /* The stage performance information */
227   PetscEventPerfLog  eventLog; /* The event information for this stage */
228   PetscClassPerfLog  classLog; /* The class information for this stage */
229 #if defined(PETSC_HAVE_TAU_PERFSTUBS)
230   void *timer; /* Associated external tool timer for this stage */
231 #endif
232 } PetscStageInfo;
233 
234 typedef struct _n_PetscStageLog *PetscStageLog;
235 struct _n_PetscStageLog {
236   int              numStages; /* The number of registered stages */
237   int              maxStages; /* The maximum number of stages */
238   PetscIntStack    stack;     /* The stack for active stages */
239   int              curStage;  /* The current stage (only used in macros so we don't call PetscIntStackTop) */
240   PetscStageInfo  *stageInfo; /* The information for each stage */
241   PetscEventRegLog eventLog;  /* The registered events */
242   PetscClassRegLog classLog;  /* The registered classes */
243 };
244 /* -----------------------------------------------------------------------------------------------------*/
245 
246 PETSC_DEPRECATED_FUNCTION("PetscLogObjectParent() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectParent(PetscObject o, PetscObject p)
247 {
248   (void)o;
249   (void)p;
250   return PETSC_SUCCESS;
251 }
252 
253 PETSC_DEPRECATED_FUNCTION("PetscLogObjectMemory() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectMemory(PetscObject o, PetscLogDouble m)
254 {
255   (void)o;
256   (void)m;
257   return PETSC_SUCCESS;
258 }
259 
260 #if defined(PETSC_USE_LOG) /* --- Logging is turned on --------------------------------*/
261 PETSC_EXTERN PetscStageLog  petsc_stageLog;
262 PETSC_EXTERN PetscErrorCode PetscLogGetStageLog(PetscStageLog *);
263 PETSC_EXTERN PetscErrorCode PetscStageLogGetCurrent(PetscStageLog, int *);
264 PETSC_EXTERN PetscErrorCode PetscStageLogGetEventPerfLog(PetscStageLog, int, PetscEventPerfLog *);
265 
266 PETSC_EXTERN PetscErrorCode PetscGetFlops(PetscLogDouble *);
267 
268   #if defined(PETSC_HAVE_MPE)
269 PETSC_EXTERN PetscErrorCode PetscLogMPEBegin(void);
270 PETSC_EXTERN PetscErrorCode PetscLogMPEDump(const char[]);
271   #endif
272 
273 PETSC_EXTERN PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
274 PETSC_EXTERN PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
275 PETSC_EXTERN PetscErrorCode (*PetscLogPHC)(PetscObject);
276 PETSC_EXTERN PetscErrorCode (*PetscLogPHD)(PetscObject);
277 
278   #define PetscLogObjectParents(p, n, d) PetscMacroReturnStandard(for (int _i = 0; _i < (n); ++_i) PetscCall(PetscLogObjectParent((PetscObject)(p), (PetscObject)(d)[_i]));)
279   #define PetscLogObjectCreate(h)        ((PetscLogPHC) ? (*PetscLogPHC)((PetscObject)(h)) : PETSC_SUCCESS)
280   #define PetscLogObjectDestroy(h)       ((PetscLogPHD) ? (*PetscLogPHD)((PetscObject)(h)) : PETSC_SUCCESS)
281 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3);
282 
283 /* Initialization functions */
284 PETSC_EXTERN PetscErrorCode PetscLogDefaultBegin(void);
285 PETSC_EXTERN PetscErrorCode PetscLogAllBegin(void);
286 PETSC_EXTERN PetscErrorCode PetscLogNestedBegin(void);
287 PETSC_EXTERN PetscErrorCode PetscLogTraceBegin(FILE *);
288 PETSC_EXTERN PetscErrorCode PetscLogActions(PetscBool);
289 PETSC_EXTERN PetscErrorCode PetscLogObjects(PetscBool);
290 PETSC_EXTERN PetscErrorCode PetscLogSetThreshold(PetscLogDouble, PetscLogDouble *);
291 PETSC_EXTERN PetscErrorCode PetscLogSet(PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject));
292 
293 /* Output functions */
294 PETSC_EXTERN PetscErrorCode PetscLogView(PetscViewer);
295 PETSC_EXTERN PetscErrorCode PetscLogViewFromOptions(void);
296 PETSC_EXTERN PetscErrorCode PetscLogDump(const char[]);
297 
298 /* Status checking functions */
299 PETSC_EXTERN PetscErrorCode PetscLogIsActive(PetscBool *);
300 
301 /* Stage functions */
302 PETSC_EXTERN PetscErrorCode PetscLogStageRegister(const char[], PetscLogStage *);
303 PETSC_EXTERN PetscErrorCode PetscLogStagePush(PetscLogStage);
304 PETSC_EXTERN PetscErrorCode PetscLogStagePop(void);
305 PETSC_EXTERN PetscErrorCode PetscLogStageSetActive(PetscLogStage, PetscBool);
306 PETSC_EXTERN PetscErrorCode PetscLogStageGetActive(PetscLogStage, PetscBool *);
307 PETSC_EXTERN PetscErrorCode PetscLogStageSetVisible(PetscLogStage, PetscBool);
308 PETSC_EXTERN PetscErrorCode PetscLogStageGetVisible(PetscLogStage, PetscBool *);
309 PETSC_EXTERN PetscErrorCode PetscLogStageGetId(const char[], PetscLogStage *);
310 
311 /* Event functions */
312 PETSC_EXTERN PetscErrorCode PetscLogEventRegister(const char[], PetscClassId, PetscLogEvent *);
313 PETSC_EXTERN PetscErrorCode PetscLogEventSetCollective(PetscLogEvent, PetscBool);
314 PETSC_EXTERN PetscErrorCode PetscLogEventIncludeClass(PetscClassId);
315 PETSC_EXTERN PetscErrorCode PetscLogEventExcludeClass(PetscClassId);
316 PETSC_EXTERN PetscErrorCode PetscLogEventActivate(PetscLogEvent);
317 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivate(PetscLogEvent);
318 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent);
319 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent);
320 PETSC_EXTERN PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent, PetscBool);
321 PETSC_EXTERN PetscErrorCode PetscLogEventActivateClass(PetscClassId);
322 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivateClass(PetscClassId);
323 PETSC_EXTERN PetscErrorCode PetscLogEventGetId(const char[], PetscLogEvent *);
324 PETSC_EXTERN PetscErrorCode PetscLogEventGetPerfInfo(int, PetscLogEvent, PetscEventPerfInfo *);
325 PETSC_EXTERN PetscErrorCode PetscLogEventSetDof(PetscLogEvent, PetscInt, PetscLogDouble);
326 PETSC_EXTERN PetscErrorCode PetscLogEventSetError(PetscLogEvent, PetscInt, PetscLogDouble);
327 PETSC_EXTERN PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent);
328 PETSC_EXTERN PetscErrorCode PetscLogPopCurrentEvent_Internal(void);
329 
330 PETSC_EXTERN PetscBool PetscLogMemory;
331 
332 PETSC_EXTERN PetscBool      PetscLogSyncOn; /* true if logging synchronization is enabled */
333 PETSC_EXTERN PetscErrorCode PetscLogEventSynchronize(PetscLogEvent, MPI_Comm);
334 
335   #define PetscLogEventSync(e, comm) \
336     ((PetscErrorCode)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? PetscLogEventSynchronize((e), (comm)) : PETSC_SUCCESS)))
337 
338   #define PetscLogEventBegin(e, o1, o2, o3, o4) \
339     ((PetscErrorCode)((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (PetscErrorCode)(((*PetscLogPLB)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPushCurrentEvent_Internal(e)) : PETSC_SUCCESS))
340 
341   #define PetscLogEventEnd(e, o1, o2, o3, o4) \
342     ((PetscErrorCode)((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (PetscErrorCode)(((*PetscLogPLE)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPopCurrentEvent_Internal()) : PETSC_SUCCESS))
343 
344 PETSC_EXTERN PetscErrorCode PetscLogEventGetFlops(PetscLogEvent, PetscLogDouble *);
345 PETSC_EXTERN PetscErrorCode PetscLogEventZeroFlops(PetscLogEvent);
346 
347 /* Global flop counter */
348 PETSC_EXTERN PetscLogDouble petsc_TotalFlops;
349 PETSC_EXTERN PetscLogDouble petsc_irecv_ct;
350 PETSC_EXTERN PetscLogDouble petsc_isend_ct;
351 PETSC_EXTERN PetscLogDouble petsc_recv_ct;
352 PETSC_EXTERN PetscLogDouble petsc_send_ct;
353 PETSC_EXTERN PetscLogDouble petsc_irecv_len;
354 PETSC_EXTERN PetscLogDouble petsc_isend_len;
355 PETSC_EXTERN PetscLogDouble petsc_recv_len;
356 PETSC_EXTERN PetscLogDouble petsc_send_len;
357 PETSC_EXTERN PetscLogDouble petsc_allreduce_ct;
358 PETSC_EXTERN PetscLogDouble petsc_gather_ct;
359 PETSC_EXTERN PetscLogDouble petsc_scatter_ct;
360 PETSC_EXTERN PetscLogDouble petsc_wait_ct;
361 PETSC_EXTERN PetscLogDouble petsc_wait_any_ct;
362 PETSC_EXTERN PetscLogDouble petsc_wait_all_ct;
363 PETSC_EXTERN PetscLogDouble petsc_sum_of_waits_ct;
364 
365 /* Thread local storage */
366 PETSC_EXTERN_TLS PetscLogDouble petsc_TotalFlops_th;
367 PETSC_EXTERN_TLS PetscLogDouble petsc_irecv_ct_th;
368 PETSC_EXTERN_TLS PetscLogDouble petsc_isend_ct_th;
369 PETSC_EXTERN_TLS PetscLogDouble petsc_recv_ct_th;
370 PETSC_EXTERN_TLS PetscLogDouble petsc_send_ct_th;
371 PETSC_EXTERN_TLS PetscLogDouble petsc_irecv_len_th;
372 PETSC_EXTERN_TLS PetscLogDouble petsc_isend_len_th;
373 PETSC_EXTERN_TLS PetscLogDouble petsc_recv_len_th;
374 PETSC_EXTERN_TLS PetscLogDouble petsc_send_len_th;
375 PETSC_EXTERN_TLS PetscLogDouble petsc_allreduce_ct_th;
376 PETSC_EXTERN_TLS PetscLogDouble petsc_gather_ct_th;
377 PETSC_EXTERN_TLS PetscLogDouble petsc_scatter_ct_th;
378 PETSC_EXTERN_TLS PetscLogDouble petsc_wait_ct_th;
379 PETSC_EXTERN_TLS PetscLogDouble petsc_wait_any_ct_th;
380 PETSC_EXTERN_TLS PetscLogDouble petsc_wait_all_ct_th;
381 PETSC_EXTERN_TLS PetscLogDouble petsc_sum_of_waits_ct_th;
382 
383   /*
384    Flop counting:  We count each arithmetic operation (e.g., addition, multiplication) separately.
385 
386    For the complex numbers version, note that
387        1 complex addition = 2 flops
388        1 complex multiplication = 6 flops,
389    where we define 1 flop as that for a double precision scalar.  We roughly approximate
390    flop counting for complex numbers by multiplying the total flops by 4; this corresponds
391    to the assumption that we're counting mostly additions and multiplications -- and
392    roughly the same number of each.  More accurate counting could be done by distinguishing
393    among the various arithmetic operations.
394  */
395 
396   #if defined(PETSC_USE_COMPLEX)
397     #define PETSC_FLOPS_PER_OP 4.0
398   #else
399     #define PETSC_FLOPS_PER_OP 1.0
400   #endif
401 
402 /*@C
403        PetscLogFlops - Log how many flops are performed in a calculation
404 
405    Input Parameter:
406 .   flops - the number of flops
407 
408    Level: intermediate
409 
410    Note:
411      To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double,
412      not an integer. Use `PetscLogFlops`(4.0*n) not `PetscLogFlops`(4*n)
413 
414 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`
415 @*/
416 static inline PetscErrorCode PetscLogFlops(PetscLogDouble n)
417 {
418   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops");
419   return PetscAddLogDouble(&petsc_TotalFlops, &petsc_TotalFlops_th, PETSC_FLOPS_PER_OP * n);
420 }
421 
422   /*
423      These are used internally in the PETSc routines to keep a count of MPI messages and
424    their sizes.
425 
426      This does not work for MPI-Uni because our include/petsc/mpiuni/mpi.h file
427    uses macros to defined the MPI operations.
428 
429      It does not work correctly from HP-UX because it processes the
430    macros in a way that sometimes it double counts, hence
431    PETSC_HAVE_BROKEN_RECURSIVE_MACRO
432 
433      It does not work with Windows because winmpich lacks MPI_Type_size()
434 */
435   #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO)
436 /*
437    Logging of MPI activities
438 */
439 static inline PetscErrorCode PetscMPITypeSize(PetscInt count, MPI_Datatype type, PetscLogDouble *length, PetscLogDouble *length_th)
440 {
441   PetscMPIInt typesize;
442 
443   if (type == MPI_DATATYPE_NULL) return PETSC_SUCCESS;
444   PetscCallMPI(MPI_Type_size(type, &typesize));
445   return PetscAddLogDouble(length, length_th, (PetscLogDouble)(count * typesize));
446 }
447 
448 static inline PetscErrorCode PetscMPITypeSizeComm(MPI_Comm comm, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length, PetscLogDouble *length_th)
449 {
450   PetscMPIInt    typesize, size, p;
451   PetscLogDouble l;
452 
453   if (type == MPI_DATATYPE_NULL) return PETSC_SUCCESS;
454   PetscCallMPI(MPI_Comm_size(comm, &size));
455   PetscCallMPI(MPI_Type_size(type, &typesize));
456   for (p = 0, l = 0.0; p < size; ++p) l += (PetscLogDouble)(counts[p] * typesize);
457   return PetscAddLogDouble(length, length_th, l);
458 }
459 
460 /*
461     Returns 1 if the communicator is parallel else zero
462 */
463 static inline int PetscMPIParallelComm(MPI_Comm comm)
464 {
465   PetscMPIInt size;
466   MPI_Comm_size(comm, &size);
467   return size > 1;
468 }
469 
470     #define MPI_Irecv(buf, count, datatype, source, tag, comm, request) \
471       (PetscAddLogDouble(&petsc_irecv_ct, &petsc_irecv_ct_th, 1) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len), &(petsc_irecv_len_th)) || MPI_Irecv((buf), (count), (datatype), (source), (tag), (comm), (request)))
472 
473     #define MPI_Irecv_c(buf, count, datatype, source, tag, comm, request) \
474       (PetscAddLogDouble(&petsc_irecv_ct, &petsc_irecv_ct_th, 1) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len), &(petsc_irecv_len_th)) || MPI_Irecv_c((buf), (count), (datatype), (source), (tag), (comm), (request)))
475 
476     #define MPI_Isend(buf, count, datatype, dest, tag, comm, request) \
477       (PetscAddLogDouble(&petsc_isend_ct, &petsc_isend_ct_th, 1) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len), &(petsc_isend_len_th)) || MPI_Isend((buf), (count), (datatype), (dest), (tag), (comm), (request)))
478 
479     #define MPI_Isend_c(buf, count, datatype, dest, tag, comm, request) \
480       (PetscAddLogDouble(&petsc_isend_ct, &petsc_isend_ct_th, 1) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len), &(petsc_isend_len_th)) || MPI_Isend_c((buf), (count), (datatype), (dest), (tag), (comm), (request)))
481 
482     #define MPI_Startall_irecv(count, datatype, number, requests) \
483       (PetscAddLogDouble(&petsc_irecv_ct, &petsc_irecv_ct_th, number) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len), &(petsc_irecv_len_th)) || ((number) && MPI_Startall((number), (requests))))
484 
485     #define MPI_Startall_isend(count, datatype, number, requests) \
486       (PetscAddLogDouble(&petsc_isend_ct, &petsc_isend_ct_th, number) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len), &(petsc_isend_len_th)) || ((number) && MPI_Startall((number), (requests))))
487 
488     #define MPI_Start_isend(count, datatype, requests) (PetscAddLogDouble(&petsc_isend_ct, &petsc_isend_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_isend_len), (&petsc_isend_len_th)) || MPI_Start((requests)))
489 
490     #define MPI_Recv(buf, count, datatype, source, tag, comm, status) \
491       (PetscAddLogDouble(&petsc_recv_ct, &petsc_recv_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len), (&petsc_recv_len_th)) || MPI_Recv((buf), (count), (datatype), (source), (tag), (comm), (status)))
492 
493     #define MPI_Recv_c(buf, count, datatype, source, tag, comm, status) \
494       (PetscAddLogDouble(&petsc_recv_ct, &petsc_recv_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len), &(petsc_recv_len_th)) || MPI_Recv_c((buf), (count), (datatype), (source), (tag), (comm), (status)))
495 
496     #define MPI_Send(buf, count, datatype, dest, tag, comm) \
497       (PetscAddLogDouble(&petsc_send_ct, &petsc_send_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Send((buf), (count), (datatype), (dest), (tag), (comm)))
498 
499     #define MPI_Send_c(buf, count, datatype, dest, tag, comm) \
500       (PetscAddLogDouble(&petsc_send_ct, &petsc_send_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Send_c((buf), (count), (datatype), (dest), (tag), (comm)))
501 
502     #define MPI_Wait(request, status) (PetscAddLogDouble(&petsc_wait_ct, &petsc_wait_ct_th, 1) || PetscAddLogDouble(&petsc_sum_of_waits_ct, &petsc_sum_of_waits_ct_th, 1) || MPI_Wait((request), (status)))
503 
504     #define MPI_Waitany(a, b, c, d) (PetscAddLogDouble(&petsc_wait_any_ct, &petsc_wait_any_ct_th, 1) || PetscAddLogDouble(&petsc_sum_of_waits_ct, &petsc_sum_of_waits_ct_th, 1) || MPI_Waitany((a), (b), (c), (d)))
505 
506     #define MPI_Waitall(count, array_of_requests, array_of_statuses) \
507       (PetscAddLogDouble(&petsc_wait_all_ct, &petsc_wait_all_ct_th, 1) || PetscAddLogDouble(&petsc_sum_of_waits_ct, &petsc_sum_of_waits_ct_th, count) || MPI_Waitall((count), (array_of_requests), (array_of_statuses)))
508 
509     #define MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm) (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || MPI_Allreduce((sendbuf), (recvbuf), (count), (datatype), (op), (comm)))
510 
511     #define MPI_Bcast(buffer, count, datatype, root, comm) (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || MPI_Bcast((buffer), (count), (datatype), (root), (comm)))
512 
513     #define MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm) \
514       (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || MPI_Reduce_scatter_block((sendbuf), (recvbuf), (recvcount), (datatype), (op), (comm)))
515 
516     #define MPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) \
517       (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Alltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm)))
518 
519     #define MPI_Alltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm) \
520       (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Alltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm)))
521 
522     #define MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) \
523       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Allgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm)))
524 
525     #define MPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm) \
526       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Allgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm)))
527 
528     #define MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \
529       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, 1) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Gather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))
530 
531     #define MPI_Gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm) \
532       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, 1) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Gatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm)))
533 
534     #define MPI_Scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \
535       (PetscAddLogDouble(&petsc_scatter_ct, &petsc_scatter_ct_th, 1) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len), &(petsc_recv_len_th)) || MPI_Scatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))
536 
537     #define MPI_Scatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm) \
538       (PetscAddLogDouble(&petsc_scatter_ct, &petsc_scatter_ct_th, 1) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len), &(petsc_recv_len_th)) || MPI_Scatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))
539 
540     #define MPI_Ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \
541       (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Ialltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request)))
542 
543     #define MPI_Ialltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm, request) \
544       (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Ialltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm), (request)))
545 
546     #define MPI_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \
547       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Iallgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request)))
548 
549     #define MPI_Iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm, request) \
550       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Iallgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm), (request)))
551 
552     #define MPI_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
553       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, 1) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Igather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))
554 
555     #define MPI_Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm, request) \
556       (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, 1) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Igatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm), (request)))
557 
558     #define MPI_Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
559       (PetscAddLogDouble(&petsc_scatter_ct, &petsc_scatter_ct_th, 1) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len), (&petsc_recv_len_th)) || MPI_Iscatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))
560 
561     #define MPI_Iscatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
562       (PetscAddLogDouble(&petsc_scatter_ct, &petsc_scatter_ct_th, 1) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len), (&petsc_recv_len_th)) || MPI_Iscatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))
563 
564   #else
565 
566     #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests)))
567 
568     #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests)))
569 
570     #define MPI_Start_isend(count, datatype, requests) (MPI_Start((requests)))
571 
572   #endif /* !MPIUNI_H && ! PETSC_HAVE_BROKEN_RECURSIVE_MACRO */
573 
574 #else /* ---Logging is turned off --------------------------------------------*/
575 
576   #define PetscLogMemory PETSC_FALSE
577 
578   #define PetscLogFlops(n) ((void)(n), PETSC_SUCCESS)
579   #define PetscGetFlops(a) (*(a) = 0.0, PETSC_SUCCESS)
580 
581   #define PetscLogStageRegister(a, b)   PETSC_SUCCESS
582   #define PetscLogStagePush(a)          PETSC_SUCCESS
583   #define PetscLogStagePop()            PETSC_SUCCESS
584   #define PetscLogStageSetActive(a, b)  PETSC_SUCCESS
585   #define PetscLogStageGetActive(a, b)  PETSC_SUCCESS
586   #define PetscLogStageGetVisible(a, b) PETSC_SUCCESS
587   #define PetscLogStageSetVisible(a, b) PETSC_SUCCESS
588   #define PetscLogStageGetId(a, b)      (*(b) = 0, PETSC_SUCCESS)
589 
590   #define PetscLogEventRegister(a, b, c)    PETSC_SUCCESS
591   #define PetscLogEventSetCollective(a, b)  PETSC_SUCCESS
592   #define PetscLogEventIncludeClass(a)      PETSC_SUCCESS
593   #define PetscLogEventExcludeClass(a)      PETSC_SUCCESS
594   #define PetscLogEventActivate(a)          PETSC_SUCCESS
595   #define PetscLogEventDeactivate(a)        PETSC_SUCCESS
596   #define PetscLogEventDeactivatePush(a)    PETSC_SUCCESS
597   #define PetscLogEventDeactivatePop(a)     PETSC_SUCCESS
598   #define PetscLogEventActivateClass(a)     PETSC_SUCCESS
599   #define PetscLogEventDeactivateClass(a)   PETSC_SUCCESS
600   #define PetscLogEventSetActiveAll(a, b)   PETSC_SUCCESS
601   #define PetscLogEventGetId(a, b)          (*(b) = 0, PETSC_SUCCESS)
602   #define PetscLogEventGetPerfInfo(a, b, c) PETSC_SUCCESS
603   #define PetscLogEventSetDof(a, b, c)      PETSC_SUCCESS
604   #define PetscLogEventSetError(a, b, c)    PETSC_SUCCESS
605 
606   #define PetscLogPLB PETSC_SUCCESS
607   #define PetscLogPLE PETSC_SUCCESS
608   #define PetscLogPHC PETSC_SUCCESS
609   #define PetscLogPHD PETSC_SUCCESS
610 
611   #define PetscLogObjectParents(p, n, c) PETSC_SUCCESS
612   #define PetscLogObjectCreate(h)        PETSC_SUCCESS
613   #define PetscLogObjectDestroy(h)       PETSC_SUCCESS
614 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3);
615 
616   #define PetscLogDefaultBegin()     PETSC_SUCCESS
617   #define PetscLogAllBegin()         PETSC_SUCCESS
618   #define PetscLogNestedBegin()      PETSC_SUCCESS
619   #define PetscLogTraceBegin(file)   PETSC_SUCCESS
620   #define PetscLogActions(a)         PETSC_SUCCESS
621   #define PetscLogObjects(a)         PETSC_SUCCESS
622   #define PetscLogSetThreshold(a, b) PETSC_SUCCESS
623   #define PetscLogSet(lb, le)        PETSC_SUCCESS
624   #define PetscLogIsActive(flag)     (*(flag) = PETSC_FALSE, PETSC_SUCCESS)
625 
626   #define PetscLogView(viewer)      PETSC_SUCCESS
627   #define PetscLogViewFromOptions() PETSC_SUCCESS
628   #define PetscLogDump(c)           PETSC_SUCCESS
629 
630   #define PetscLogEventSync(e, comm)                            PETSC_SUCCESS
631   #define PetscLogEventBegin(e, o1, o2, o3, o4)                 PETSC_SUCCESS
632   #define PetscLogEventEnd(e, o1, o2, o3, o4)                   PETSC_SUCCESS
633 
634   /* If PETSC_USE_LOG is NOT defined, these still need to be! */
635   #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall(number, requests))
636   #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall(number, requests))
637   #define MPI_Start_isend(count, datatype, requests)            MPI_Start(requests)
638 
639 #endif /* PETSC_USE_LOG */
640 
641 #define PetscPreLoadBegin(flag, name) \
642   do { \
643     PetscBool     PetscPreLoading = flag; \
644     int           PetscPreLoadMax, PetscPreLoadIt; \
645     PetscLogStage _stageNum; \
646     PetscCall(PetscOptionsGetBool(NULL, NULL, "-preload", &PetscPreLoading, NULL)); \
647     PetscPreLoadMax     = (int)(PetscPreLoading); \
648     PetscPreLoadingUsed = PetscPreLoading ? PETSC_TRUE : PetscPreLoadingUsed; \
649     for (PetscPreLoadIt = 0; PetscPreLoadIt <= PetscPreLoadMax; PetscPreLoadIt++) { \
650       PetscPreLoadingOn = PetscPreLoading; \
651       PetscCall(PetscBarrier(NULL)); \
652       if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \
653       else PetscCall(PetscLogStageRegister(name, &_stageNum)); \
654       PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \
655       PetscCall(PetscLogStagePush(_stageNum));
656 
657 #define PetscPreLoadEnd() \
658   PetscCall(PetscLogStagePop()); \
659   PetscPreLoading = PETSC_FALSE; \
660   } \
661   } \
662   while (0)
663 
664 #define PetscPreLoadStage(name) \
665   do { \
666     PetscCall(PetscLogStagePop()); \
667     if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \
668     else PetscCall(PetscLogStageRegister(name, &_stageNum)); \
669     PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \
670     PetscCall(PetscLogStagePush(_stageNum)); \
671   } while (0)
672 
673 /* some vars for logging */
674 PETSC_EXTERN PetscBool PetscPreLoadingUsed; /* true if we are or have done preloading */
675 PETSC_EXTERN PetscBool PetscPreLoadingOn;   /* true if we are currently in a preloading calculation */
676 
677 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_DEVICE)
678 
679 /* Global GPU counters */
680 PETSC_EXTERN PetscLogDouble petsc_ctog_ct;
681 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct;
682 PETSC_EXTERN PetscLogDouble petsc_ctog_sz;
683 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz;
684 PETSC_EXTERN PetscLogDouble petsc_ctog_ct_scalar;
685 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct_scalar;
686 PETSC_EXTERN PetscLogDouble petsc_ctog_sz_scalar;
687 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz_scalar;
688 PETSC_EXTERN PetscLogDouble petsc_gflops;
689 PETSC_EXTERN PetscLogDouble petsc_gtime;
690 
691 /* Thread local storage */
692 PETSC_EXTERN_TLS PetscLogDouble petsc_ctog_ct_th;
693 PETSC_EXTERN_TLS PetscLogDouble petsc_gtoc_ct_th;
694 PETSC_EXTERN_TLS PetscLogDouble petsc_ctog_sz_th;
695 PETSC_EXTERN_TLS PetscLogDouble petsc_gtoc_sz_th;
696 PETSC_EXTERN_TLS PetscLogDouble petsc_ctog_ct_scalar_th;
697 PETSC_EXTERN_TLS PetscLogDouble petsc_gtoc_ct_scalar_th;
698 PETSC_EXTERN_TLS PetscLogDouble petsc_ctog_sz_scalar_th;
699 PETSC_EXTERN_TLS PetscLogDouble petsc_gtoc_sz_scalar_th;
700 PETSC_EXTERN_TLS PetscLogDouble petsc_gflops_th;
701 PETSC_EXTERN_TLS PetscLogDouble petsc_gtime_th;
702 
703 PETSC_EXTERN PetscErrorCode PetscLogGpuTime(void);
704 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeBegin(void);
705 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeEnd(void);
706 
707 /*@C
708        PetscLogGpuFlops - Log how many flops are performed in a calculation on the device
709 
710    Input Parameter:
711 .   flops - the number of flops
712 
713    Level: intermediate
714 
715    Notes:
716      To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double,
717      not an integer. Use `PetscLogFlops`(4.0*n) not `PetscLogFlops`(4*n)
718 
719      The values are also added to the total flop count for the MPI rank that is set with `PetscLogFlops()`; hence the number of flops
720      just on the CPU would be the value from set from `PetscLogFlops()` minus the value set from `PetscLogGpuFlops()`
721 
722 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogFlops()`, `PetscLogGpuTimeBegin()`, `PetscLogGpuTimeEnd()`
723 @*/
724 static inline PetscErrorCode PetscLogGpuFlops(PetscLogDouble n)
725 {
726   PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops");
727   PetscCall(PetscAddLogDouble(&petsc_TotalFlops, &petsc_TotalFlops_th, PETSC_FLOPS_PER_OP * n));
728   PetscCall(PetscAddLogDouble(&petsc_gflops, &petsc_gflops_th, PETSC_FLOPS_PER_OP * n));
729   return PETSC_SUCCESS;
730 }
731 
732 static inline PetscErrorCode PetscLogGpuTimeAdd(PetscLogDouble t)
733 {
734   return PetscAddLogDouble(&petsc_gtime, &petsc_gtime_th, t);
735 }
736 
737 static inline PetscErrorCode PetscLogCpuToGpu(PetscLogDouble size)
738 {
739   return PetscAddLogDoubleCnt(&petsc_ctog_ct, &petsc_ctog_sz, &petsc_ctog_ct_th, &petsc_ctog_sz_th, size);
740 }
741 
742 static inline PetscErrorCode PetscLogGpuToCpu(PetscLogDouble size)
743 {
744   return PetscAddLogDoubleCnt(&petsc_gtoc_ct, &petsc_gtoc_sz, &petsc_gtoc_ct_th, &petsc_gtoc_sz_th, size);
745 }
746 
747 static inline PetscErrorCode PetscLogCpuToGpuScalar(PetscLogDouble size)
748 {
749   return PetscAddLogDoubleCnt(&petsc_ctog_ct_scalar, &petsc_ctog_sz_scalar, &petsc_ctog_ct_scalar_th, &petsc_ctog_sz_scalar_th, size);
750 }
751 
752 static inline PetscErrorCode PetscLogGpuToCpuScalar(PetscLogDouble size)
753 {
754   return PetscAddLogDoubleCnt(&petsc_gtoc_ct_scalar, &petsc_gtoc_sz_scalar, &petsc_gtoc_ct_scalar_th, &petsc_gtoc_sz_scalar_th, size);
755 }
756 #else
757 
758   #define PetscLogCpuToGpu(a)       PETSC_SUCCESS
759   #define PetscLogGpuToCpu(a)       PETSC_SUCCESS
760   #define PetscLogCpuToGpuScalar(a) PETSC_SUCCESS
761   #define PetscLogGpuToCpuScalar(a) PETSC_SUCCESS
762   #define PetscLogGpuFlops(a)       PETSC_SUCCESS
763   #define PetscLogGpuTime()         PETSC_SUCCESS
764   #define PetscLogGpuTimeAdd(a)     PETSC_SUCCESS
765   #define PetscLogGpuTimeBegin()    PETSC_SUCCESS
766   #define PetscLogGpuTimeEnd()      PETSC_SUCCESS
767 
768 #endif /* PETSC_USE_LOG && PETSC_HAVE_DEVICE */
769 
770 /* remove TLS defines */
771 #undef PETSC_EXTERN_TLS
772 #undef PETSC_TLS
773 
774 #endif
775