xref: /petsc/include/petsclog.h (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1 /*
2     Defines profile/logging in PETSc.
3 */
4 #ifndef PETSCLOG_H
5 #define PETSCLOG_H
6 
7 #include <petscsys.h>
8 #include <petsctime.h>
9 
10 /* SUBMANSEC = Sys */
11 
12 /* General logging of information; different from event logging */
13 PETSC_EXTERN PetscErrorCode PetscInfo_Private(const char[], PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(3, 4);
14 #if defined(PETSC_USE_INFO)
15   #define PetscInfo(A, ...) PetscInfo_Private(PETSC_FUNCTION_NAME, ((PetscObject)A), __VA_ARGS__)
16 #else
17   #define PetscInfo(A, ...) 0
18 #endif
19 
20 #define PetscInfo1(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
21 #define PetscInfo2(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
22 #define PetscInfo3(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
23 #define PetscInfo4(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
24 #define PetscInfo5(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
25 #define PetscInfo6(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
26 #define PetscInfo7(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
27 #define PetscInfo8(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
28 #define PetscInfo9(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
29 
30 /*E
31   PetscInfoCommFlag - Describes the method by which to filter `PetscInfo()` by communicator size
32 
33   Used as an input for `PetscInfoSetFilterCommSelf()`
34 
35 $ `PETSC_INFO_COMM_ALL` - Default uninitialized value. `PetscInfo()` will not filter based on
36 communicator size (i.e. will print for all communicators)
37 $ `PETSC_INFO_COMM_NO_SELF` - `PetscInfo()` will NOT print for communicators with size = 1 (i.e. *_COMM_SELF)
38 $ `PETSC_INFO_COMM_ONLY_SELF` - `PetscInfo()` will ONLY print for communicators with size = 1
39 
40   Level: intermediate
41 
42 .seealso: `PetscInfo()`, `PetscInfoSetFromOptions()`, `PetscInfoSetFilterCommSelf()`
43 E*/
44 typedef enum {
45   PETSC_INFO_COMM_ALL       = -1,
46   PETSC_INFO_COMM_NO_SELF   = 0,
47   PETSC_INFO_COMM_ONLY_SELF = 1
48 } PetscInfoCommFlag;
49 
50 PETSC_EXTERN const char *const PetscInfoCommFlags[];
51 PETSC_EXTERN PetscErrorCode    PetscInfoDeactivateClass(PetscClassId);
52 PETSC_EXTERN PetscErrorCode    PetscInfoActivateClass(PetscClassId);
53 PETSC_EXTERN PetscErrorCode    PetscInfoEnabled(PetscClassId, PetscBool *);
54 PETSC_EXTERN PetscErrorCode    PetscInfoAllow(PetscBool);
55 PETSC_EXTERN PetscErrorCode    PetscInfoSetFile(const char[], const char[]);
56 PETSC_EXTERN PetscErrorCode    PetscInfoGetFile(char **, FILE **);
57 PETSC_EXTERN PetscErrorCode    PetscInfoSetClasses(PetscBool, PetscInt, const char *const *);
58 PETSC_EXTERN PetscErrorCode    PetscInfoGetClass(const char *, PetscBool *);
59 PETSC_EXTERN PetscErrorCode    PetscInfoGetInfo(PetscBool *, PetscBool *, PetscBool *, PetscBool *, PetscInfoCommFlag *);
60 PETSC_EXTERN PetscErrorCode    PetscInfoProcessClass(const char[], PetscInt, const PetscClassId[]);
61 PETSC_EXTERN PetscErrorCode    PetscInfoSetFilterCommSelf(PetscInfoCommFlag);
62 PETSC_EXTERN PetscErrorCode    PetscInfoSetFromOptions(PetscOptions);
63 PETSC_EXTERN PetscErrorCode    PetscInfoDestroy(void);
64 PETSC_EXTERN PetscBool         PetscLogPrintInfo; /* if true, indicates PetscInfo() is turned on */
65 
66 /*MC
67     PetscLogEvent - id used to identify PETSc or user events which timed portions (blocks of executable)
68      code.
69 
70     Level: intermediate
71 
72 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStage`
73 M*/
74 typedef int PetscLogEvent;
75 
76 /*MC
77     PetscLogStage - id used to identify user stages (phases, sections) of runs - for logging
78 
79     Level: intermediate
80 
81 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEvent`
82 M*/
83 typedef int PetscLogStage;
84 
85 #define PETSC_EVENT 1311311
86 PETSC_EXTERN PetscLogEvent PETSC_LARGEST_EVENT;
87 
88 /* Global flop counter */
89 PETSC_EXTERN PetscLogDouble petsc_TotalFlops;
90 PETSC_EXTERN PetscLogDouble petsc_tmp_flops;
91 
92 /* We must make the following structures available to access the event
93      activation flags in the PetscLogEventBegin/End() macros. These are not part of the PETSc public
94      API and are not intended to be used by other parts of PETSc or by users.
95 
96      The code that manipulates these structures is in src/sys/logging/utils.
97 */
98 typedef struct _n_PetscIntStack *PetscIntStack;
99 
100 /* -----------------------------------------------------------------------------------------------------*/
101 /*
102     PetscClassRegInfo, PetscClassPerfInfo - Each class has two data structures associated with it. The first has
103        static information about it, the second collects statistics on how many objects of the class are created,
104        how much memory they use, etc.
105 
106     PetscClassRegLog, PetscClassPerfLog - arrays of the PetscClassRegInfo and PetscClassPerfInfo for all classes.
107 */
108 typedef struct {
109   char        *name;    /* The class name */
110   PetscClassId classid; /* The integer identifying this class */
111 } PetscClassRegInfo;
112 
113 typedef struct {
114   PetscClassId   id;           /* The integer identifying this class */
115   int            creations;    /* The number of objects of this class created */
116   int            destructions; /* The number of objects of this class destroyed */
117   PetscLogDouble mem;          /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */
118   PetscLogDouble descMem;      /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */
119 } PetscClassPerfInfo;
120 
121 typedef struct _n_PetscClassRegLog *PetscClassRegLog;
122 struct _n_PetscClassRegLog {
123   int                numClasses; /* The number of classes registered */
124   int                maxClasses; /* The maximum number of classes */
125   PetscClassRegInfo *classInfo;  /* The structure for class information (classids are monotonicly increasing) */
126 };
127 
128 typedef struct _n_PetscClassPerfLog *PetscClassPerfLog;
129 struct _n_PetscClassPerfLog {
130   int                 numClasses; /* The number of logging classes */
131   int                 maxClasses; /* The maximum number of classes */
132   PetscClassPerfInfo *classInfo;  /* The structure for class information (classids are monotonicly increasing) */
133 };
134 /* -----------------------------------------------------------------------------------------------------*/
135 /*
136     PetscEventRegInfo, PetscEventPerfInfo - Each event has two data structures associated with it. The first has
137        static information about it, the second collects statistics on how many times the event is used, how
138        much time it takes, etc.
139 
140     PetscEventRegLog, PetscEventPerfLog - an array of all PetscEventRegInfo and PetscEventPerfInfo for all events. There is one
141       of these for each stage.
142 
143 */
144 typedef struct {
145   char        *name;       /* The name of this event */
146   PetscClassId classid;    /* The class the event is associated with */
147   PetscBool    collective; /* Flag this event as collective */
148 #if defined(PETSC_HAVE_MPE)
149   int mpe_id_begin; /* MPE IDs that define the event */
150   int mpe_id_end;
151 #endif
152 } PetscEventRegInfo;
153 
154 typedef struct {
155   int            id;                      /* The integer identifying this event */
156   PetscBool      active;                  /* The flag to activate logging */
157   PetscBool      visible;                 /* The flag to print info in summary */
158   int            depth;                   /* The nesting depth of the event call */
159   int            count;                   /* The number of times this event was executed */
160   PetscLogDouble flops, flops2, flopsTmp; /* The flops and flops^2 used in this event */
161   PetscLogDouble time, time2, timeTmp;    /* The time and time^2 taken for this event */
162   PetscLogDouble syncTime;                /* The synchronization barrier time */
163   PetscLogDouble dof[8];                  /* The number of degrees of freedom associated with this event */
164   PetscLogDouble errors[8];               /* The errors (user-defined) associated with this event */
165   PetscLogDouble numMessages;             /* The number of messages in this event */
166   PetscLogDouble messageLength;           /* The total message lengths in this event */
167   PetscLogDouble numReductions;           /* The number of reductions in this event */
168   PetscLogDouble memIncrease;             /* How much the resident memory has increased in this event */
169   PetscLogDouble mallocIncrease;          /* How much the maximum malloced space has increased in this event */
170   PetscLogDouble mallocSpace;             /* How much the space was malloced and kept during this event */
171   PetscLogDouble mallocIncreaseEvent;     /* Maximum of the high water mark with in event minus memory available at the end of the event */
172 #if defined(PETSC_HAVE_DEVICE)
173   PetscLogDouble CpuToGpuCount; /* The total number of CPU to GPU copies */
174   PetscLogDouble GpuToCpuCount; /* The total number of GPU to CPU copies */
175   PetscLogDouble CpuToGpuSize;  /* The total size of CPU to GPU copies */
176   PetscLogDouble GpuToCpuSize;  /* The total size of GPU to CPU copies */
177   PetscLogDouble GpuFlops;      /* The flops done on a GPU in this event */
178   PetscLogDouble GpuTime;       /* The time spent on a GPU in this event */
179 #endif
180 } PetscEventPerfInfo;
181 
182 typedef struct _n_PetscEventRegLog *PetscEventRegLog;
183 struct _n_PetscEventRegLog {
184   int                numEvents; /* The number of registered events */
185   int                maxEvents; /* The maximum number of events */
186   PetscEventRegInfo *eventInfo; /* The registration information for each event */
187 };
188 
189 typedef struct _n_PetscEventPerfLog *PetscEventPerfLog;
190 struct _n_PetscEventPerfLog {
191   int                 numEvents; /* The number of logging events */
192   int                 maxEvents; /* The maximum number of events */
193   PetscEventPerfInfo *eventInfo; /* The performance information for each event */
194 };
195 /* ------------------------------------------------------------------------------------------------------------*/
196 /*
197    PetscStageInfo - Contains all the information about a particular stage.
198 
199    PetscStageLog - An array of PetscStageInfo for each registered stage. There is a single one of these in the code.
200 */
201 typedef struct _PetscStageInfo {
202   char              *name;     /* The stage name */
203   PetscBool          used;     /* The stage was pushed on this processor */
204   PetscEventPerfInfo perfInfo; /* The stage performance information */
205   PetscEventPerfLog  eventLog; /* The event information for this stage */
206   PetscClassPerfLog  classLog; /* The class information for this stage */
207 } PetscStageInfo;
208 
209 typedef struct _n_PetscStageLog *PetscStageLog;
210 struct _n_PetscStageLog {
211   int              numStages; /* The number of registered stages */
212   int              maxStages; /* The maximum number of stages */
213   PetscIntStack    stack;     /* The stack for active stages */
214   int              curStage;  /* The current stage (only used in macros so we don't call PetscIntStackTop) */
215   PetscStageInfo  *stageInfo; /* The information for each stage */
216   PetscEventRegLog eventLog;  /* The registered events */
217   PetscClassRegLog classLog;  /* The registered classes */
218 };
219 /* -----------------------------------------------------------------------------------------------------*/
220 
221 PETSC_DEPRECATED_FUNCTION("PetscLogObjectParent() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectParent(PetscObject o, PetscObject p)
222 {
223   (void)o;
224   (void)p;
225   return 0;
226 }
227 
228 PETSC_DEPRECATED_FUNCTION("PetscLogObjectMemory() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectMemory(PetscObject o, PetscLogDouble m)
229 {
230   (void)o;
231   (void)m;
232   return 0;
233 }
234 
235 #if defined(PETSC_USE_LOG) /* --- Logging is turned on --------------------------------*/
236 PETSC_EXTERN PetscStageLog  petsc_stageLog;
237 PETSC_EXTERN PetscErrorCode PetscLogGetStageLog(PetscStageLog *);
238 PETSC_EXTERN PetscErrorCode PetscStageLogGetCurrent(PetscStageLog, int *);
239 PETSC_EXTERN PetscErrorCode PetscStageLogGetEventPerfLog(PetscStageLog, int, PetscEventPerfLog *);
240 
241   /*
242    Flop counting:  We count each arithmetic operation (e.g., addition, multiplication) separately.
243 
244    For the complex numbers version, note that
245        1 complex addition = 2 flops
246        1 complex multiplication = 6 flops,
247    where we define 1 flop as that for a double precision scalar.  We roughly approximate
248    flop counting for complex numbers by multiplying the total flops by 4; this corresponds
249    to the assumption that we're counting mostly additions and multiplications -- and
250    roughly the same number of each.  More accurate counting could be done by distinguishing
251    among the various arithmetic operations.
252  */
253 
254   #if defined(PETSC_USE_COMPLEX)
255     #define PETSC_FLOPS_PER_OP 4.0
256   #else
257     #define PETSC_FLOPS_PER_OP 1.0
258   #endif
259 
260 /*@C
261        PetscLogFlops - Log how many flops are performed in a calculation
262 
263    Input Parameter:
264 .   flops - the number of flops
265 
266    Notes:
267      To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double,
268      not an integer. Use PetscLogFlops(4.0*n) not PetscLogFlops(4*n)
269 
270    Level: intermediate
271 
272 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`
273 @*/
274 
275 static inline PetscErrorCode PetscLogFlops(PetscLogDouble n)
276 {
277   PetscFunctionBegin;
278   #if defined(PETSC_USE_DEBUG)
279   PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops");
280   #endif
281   petsc_TotalFlops += PETSC_FLOPS_PER_OP * n;
282   PetscFunctionReturn(0);
283 }
284 
285 PETSC_EXTERN PetscErrorCode PetscGetFlops(PetscLogDouble *);
286 
287   #if defined(PETSC_HAVE_MPE)
288 PETSC_EXTERN PetscErrorCode PetscLogMPEBegin(void);
289 PETSC_EXTERN PetscErrorCode PetscLogMPEDump(const char[]);
290   #endif
291 
292 PETSC_EXTERN PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
293 PETSC_EXTERN PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
294 PETSC_EXTERN PetscErrorCode (*PetscLogPHC)(PetscObject);
295 PETSC_EXTERN PetscErrorCode (*PetscLogPHD)(PetscObject);
296 
297   #define PetscLogObjectParents(p, n, d) PetscMacroReturnStandard(for (int _i = 0; _i < (n); ++_i) PetscCall(PetscLogObjectParent((PetscObject)(p), (PetscObject)(d)[_i]));)
298   #define PetscLogObjectCreate(h)        ((PetscLogPHC) ? (*PetscLogPHC)((PetscObject)(h)) : 0)
299   #define PetscLogObjectDestroy(h)       ((PetscLogPHD) ? (*PetscLogPHD)((PetscObject)(h)) : 0)
300 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3);
301 
302 /* Initialization functions */
303 PETSC_EXTERN PetscErrorCode PetscLogDefaultBegin(void);
304 PETSC_EXTERN PetscErrorCode PetscLogAllBegin(void);
305 PETSC_EXTERN PetscErrorCode PetscLogNestedBegin(void);
306 PETSC_EXTERN PetscErrorCode PetscLogTraceBegin(FILE *);
307 PETSC_EXTERN PetscErrorCode PetscLogActions(PetscBool);
308 PETSC_EXTERN PetscErrorCode PetscLogObjects(PetscBool);
309 PETSC_EXTERN PetscErrorCode PetscLogSetThreshold(PetscLogDouble, PetscLogDouble *);
310 PETSC_EXTERN PetscErrorCode PetscLogSet(PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject));
311 
312 /* Output functions */
313 PETSC_EXTERN PetscErrorCode PetscLogView(PetscViewer);
314 PETSC_EXTERN PetscErrorCode PetscLogViewFromOptions(void);
315 PETSC_EXTERN PetscErrorCode PetscLogDump(const char[]);
316 
317 /* Status checking functions */
318 PETSC_EXTERN PetscErrorCode PetscLogIsActive(PetscBool *);
319 
320 /* Stage functions */
321 PETSC_EXTERN PetscErrorCode PetscLogStageRegister(const char[], PetscLogStage *);
322 PETSC_EXTERN PetscErrorCode PetscLogStagePush(PetscLogStage);
323 PETSC_EXTERN PetscErrorCode PetscLogStagePop(void);
324 PETSC_EXTERN PetscErrorCode PetscLogStageSetActive(PetscLogStage, PetscBool);
325 PETSC_EXTERN PetscErrorCode PetscLogStageGetActive(PetscLogStage, PetscBool *);
326 PETSC_EXTERN PetscErrorCode PetscLogStageSetVisible(PetscLogStage, PetscBool);
327 PETSC_EXTERN PetscErrorCode PetscLogStageGetVisible(PetscLogStage, PetscBool *);
328 PETSC_EXTERN PetscErrorCode PetscLogStageGetId(const char[], PetscLogStage *);
329 
330 /* Event functions */
331 PETSC_EXTERN PetscErrorCode PetscLogEventRegister(const char[], PetscClassId, PetscLogEvent *);
332 PETSC_EXTERN PetscErrorCode PetscLogEventSetCollective(PetscLogEvent, PetscBool);
333 PETSC_EXTERN PetscErrorCode PetscLogEventIncludeClass(PetscClassId);
334 PETSC_EXTERN PetscErrorCode PetscLogEventExcludeClass(PetscClassId);
335 PETSC_EXTERN PetscErrorCode PetscLogEventActivate(PetscLogEvent);
336 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivate(PetscLogEvent);
337 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent);
338 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent);
339 PETSC_EXTERN PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent, PetscBool);
340 PETSC_EXTERN PetscErrorCode PetscLogEventActivateClass(PetscClassId);
341 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivateClass(PetscClassId);
342 PETSC_EXTERN PetscErrorCode PetscLogEventGetId(const char[], PetscLogEvent *);
343 PETSC_EXTERN PetscErrorCode PetscLogEventGetPerfInfo(int, PetscLogEvent, PetscEventPerfInfo *);
344 PETSC_EXTERN PetscErrorCode PetscLogEventSetDof(PetscLogEvent, PetscInt, PetscLogDouble);
345 PETSC_EXTERN PetscErrorCode PetscLogEventSetError(PetscLogEvent, PetscInt, PetscLogDouble);
346 PETSC_EXTERN PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent);
347 PETSC_EXTERN PetscErrorCode PetscLogPopCurrentEvent_Internal(void);
348 
349 /* Global counters */
350 PETSC_EXTERN PetscLogDouble petsc_irecv_ct;
351 PETSC_EXTERN PetscLogDouble petsc_isend_ct;
352 PETSC_EXTERN PetscLogDouble petsc_recv_ct;
353 PETSC_EXTERN PetscLogDouble petsc_send_ct;
354 PETSC_EXTERN PetscLogDouble petsc_irecv_len;
355 PETSC_EXTERN PetscLogDouble petsc_isend_len;
356 PETSC_EXTERN PetscLogDouble petsc_recv_len;
357 PETSC_EXTERN PetscLogDouble petsc_send_len;
358 PETSC_EXTERN PetscLogDouble petsc_allreduce_ct;
359 PETSC_EXTERN PetscLogDouble petsc_gather_ct;
360 PETSC_EXTERN PetscLogDouble petsc_scatter_ct;
361 PETSC_EXTERN PetscLogDouble petsc_wait_ct;
362 PETSC_EXTERN PetscLogDouble petsc_wait_any_ct;
363 PETSC_EXTERN PetscLogDouble petsc_wait_all_ct;
364 PETSC_EXTERN PetscLogDouble petsc_sum_of_waits_ct;
365 
366 PETSC_EXTERN PetscBool PetscLogMemory;
367 
368 PETSC_EXTERN PetscBool      PetscLogSyncOn; /* true if logging synchronization is enabled */
369 PETSC_EXTERN PetscErrorCode PetscLogEventSynchronize(PetscLogEvent, MPI_Comm);
370 
371   #define PetscLogEventSync(e, comm) \
372     (((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? PetscLogEventSynchronize((e), (comm)) : 0))
373 
374   #define PetscLogEventBegin(e, o1, o2, o3, o4) \
375     ((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLB)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPushCurrentEvent_Internal(e)) : 0)
376 
377   #define PetscLogEventEnd(e, o1, o2, o3, o4) \
378     ((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLE)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPopCurrentEvent_Internal()) : 0)
379 
380 PETSC_EXTERN PetscErrorCode PetscLogEventGetFlops(PetscLogEvent, PetscLogDouble *);
381 PETSC_EXTERN PetscErrorCode PetscLogEventZeroFlops(PetscLogEvent);
382 
383   /*
384      These are used internally in the PETSc routines to keep a count of MPI messages and
385    their sizes.
386 
387      This does not work for MPI-Uni because our include/petsc/mpiuni/mpi.h file
388    uses macros to defined the MPI operations.
389 
390      It does not work correctly from HP-UX because it processes the
391    macros in a way that sometimes it double counts, hence
392    PETSC_HAVE_BROKEN_RECURSIVE_MACRO
393 
394      It does not work with Windows because winmpich lacks MPI_Type_size()
395 */
396   #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined(PETSC_HAVE_MPI_MISSING_TYPESIZE)
397 /*
398    Logging of MPI activities
399 */
400 static inline PetscErrorCode PetscMPITypeSize(PetscInt count, MPI_Datatype type, PetscLogDouble *length)
401 {
402   PetscMPIInt typesize;
403 
404   if (type == MPI_DATATYPE_NULL) return 0;
405   PetscCallMPI(MPI_Type_size(type, &typesize));
406   *length += (PetscLogDouble)(count * typesize);
407   return 0;
408 }
409 
410 static inline PetscErrorCode PetscMPITypeSizeComm(MPI_Comm comm, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length)
411 {
412   PetscMPIInt typesize, size, p;
413 
414   if (type == MPI_DATATYPE_NULL) return 0;
415   PetscCallMPI(MPI_Comm_size(comm, &size));
416   PetscCallMPI(MPI_Type_size(type, &typesize));
417   for (p = 0; p < size; ++p) *length += (PetscLogDouble)(counts[p] * typesize);
418   return 0;
419 }
420 
421 static inline PetscErrorCode PetscMPITypeSizeCount(PetscInt n, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length)
422 {
423   PetscMPIInt typesize, p;
424 
425   if (type == MPI_DATATYPE_NULL) return 0;
426   PetscCallMPI(MPI_Type_size(type, &typesize));
427   for (p = 0; p < n; ++p) *length += (PetscLogDouble)(counts[p] * typesize);
428   return 0;
429 }
430 
431 /*
432     Returns 1 if the communicator is parallel else zero
433 */
434 static inline int PetscMPIParallelComm(MPI_Comm comm)
435 {
436   PetscMPIInt size;
437   MPI_Comm_size(comm, &size);
438   return size > 1;
439 }
440 
441     #define MPI_Irecv(buf, count, datatype, source, tag, comm, request) ((petsc_irecv_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || MPI_Irecv((buf), (count), (datatype), (source), (tag), (comm), (request)))
442 
443     #define MPI_Irecv_c(buf, count, datatype, source, tag, comm, request) ((petsc_irecv_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || MPI_Irecv_c((buf), (count), (datatype), (source), (tag), (comm), (request)))
444 
445     #define MPI_Isend(buf, count, datatype, dest, tag, comm, request) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || MPI_Isend((buf), (count), (datatype), (dest), (tag), (comm), (request)))
446 
447     #define MPI_Isend_c(buf, count, datatype, dest, tag, comm, request) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || MPI_Isend_c((buf), (count), (datatype), (dest), (tag), (comm), (request)))
448 
449     #define MPI_Startall_irecv(count, datatype, number, requests) ((petsc_irecv_ct += (PetscLogDouble)(number), 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || ((number) && MPI_Startall((number), (requests))))
450 
451     #define MPI_Startall_isend(count, datatype, number, requests) ((petsc_isend_ct += (PetscLogDouble)(number), 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || ((number) && MPI_Startall((number), (requests))))
452 
453     #define MPI_Start_isend(count, datatype, requests) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_isend_len)) || MPI_Start((requests)))
454 
455     #define MPI_Recv(buf, count, datatype, source, tag, comm, status) ((petsc_recv_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len)) || MPI_Recv((buf), (count), (datatype), (source), (tag), (comm), (status)))
456 
457     #define MPI_Recv_c(buf, count, datatype, source, tag, comm, status) ((petsc_recv_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len)) || MPI_Recv_c((buf), (count), (datatype), (source), (tag), (comm), (status)))
458 
459     #define MPI_Send(buf, count, datatype, dest, tag, comm) ((petsc_send_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_send_len)) || MPI_Send((buf), (count), (datatype), (dest), (tag), (comm)))
460 
461     #define MPI_Send_c(buf, count, datatype, dest, tag, comm) ((petsc_send_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_send_len)) || MPI_Send_c((buf), (count), (datatype), (dest), (tag), (comm)))
462 
463     #define MPI_Wait(request, status) ((petsc_wait_ct++, petsc_sum_of_waits_ct++, 0) || MPI_Wait((request), (status)))
464 
465     #define MPI_Waitany(a, b, c, d) ((petsc_wait_any_ct++, petsc_sum_of_waits_ct++, 0) || MPI_Waitany((a), (b), (c), (d)))
466 
467     #define MPI_Waitall(count, array_of_requests, array_of_statuses) ((petsc_wait_all_ct++, petsc_sum_of_waits_ct += (PetscLogDouble)(count), 0) || MPI_Waitall((count), (array_of_requests), (array_of_statuses)))
468 
469     #define MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm) (petsc_allreduce_ct += PetscMPIParallelComm((comm)), MPI_Allreduce((sendbuf), (recvbuf), (count), (datatype), (op), (comm)))
470 
471     #define MPI_Bcast(buffer, count, datatype, root, comm) ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || MPI_Bcast((buffer), (count), (datatype), (root), (comm)))
472 
473     #define MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm) ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || MPI_Reduce_scatter_block((sendbuf), (recvbuf), (recvcount), (datatype), (op), (comm)))
474 
475     #define MPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) \
476       ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Alltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm)))
477 
478     #define MPI_Alltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm) \
479       ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len)) || MPI_Alltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm)))
480 
481     #define MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Allgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm)))
482 
483     #define MPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm) \
484       ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Allgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm)))
485 
486     #define MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \
487       ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Gather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))
488 
489     #define MPI_Gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm) \
490       ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Gatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm)))
491 
492     #define MPI_Scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \
493       ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Scatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))
494 
495     #define MPI_Scatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm) \
496       ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Scatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))
497 
498     #define MPI_Ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \
499       ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Ialltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request)))
500 
501     #define MPI_Ialltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm, request) \
502       ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len)) || MPI_Ialltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm), (request)))
503 
504     #define MPI_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \
505       ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Iallgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request)))
506 
507     #define MPI_Iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm, request) \
508       ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Iallgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm), (request)))
509 
510     #define MPI_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
511       ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Igather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))
512 
513     #define MPI_Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm, request) \
514       ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Igatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm), (request)))
515 
516     #define MPI_Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
517       ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Iscatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))
518 
519     #define MPI_Iscatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
520       ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Iscatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))
521 
522   #else
523 
524     #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests)))
525 
526     #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests)))
527 
528     #define MPI_Start_isend(count, datatype, requests) (MPI_Start((requests)))
529 
530   #endif /* !MPIUNI_H && ! PETSC_HAVE_BROKEN_RECURSIVE_MACRO */
531 
532 #else /* ---Logging is turned off --------------------------------------------*/
533 
534   #define PetscLogMemory PETSC_FALSE
535 
536   #define PetscLogFlops(n) 0
537   #define PetscGetFlops(a) (*(a) = 0.0, 0)
538 
539   #define PetscLogStageRegister(a, b)   0
540   #define PetscLogStagePush(a)          0
541   #define PetscLogStagePop()            0
542   #define PetscLogStageSetActive(a, b)  0
543   #define PetscLogStageGetActive(a, b)  0
544   #define PetscLogStageGetVisible(a, b) 0
545   #define PetscLogStageSetVisible(a, b) 0
546   #define PetscLogStageGetId(a, b)      (*(b) = 0, 0)
547 
548   #define PetscLogEventRegister(a, b, c)    0
549   #define PetscLogEventSetCollective(a, b)  0
550   #define PetscLogEventIncludeClass(a)      0
551   #define PetscLogEventExcludeClass(a)      0
552   #define PetscLogEventActivate(a)          0
553   #define PetscLogEventDeactivate(a)        0
554   #define PetscLogEventDeactivatePush(a)    0
555   #define PetscLogEventDeactivatePop(a)     0
556   #define PetscLogEventActivateClass(a)     0
557   #define PetscLogEventDeactivateClass(a)   0
558   #define PetscLogEventSetActiveAll(a, b)   0
559   #define PetscLogEventGetId(a, b)          (*(b) = 0, 0)
560   #define PetscLogEventGetPerfInfo(a, b, c) 0
561   #define PetscLogEventSetDof(a, b, c)      0
562   #define PetscLogEventSetError(a, b, c)    0
563 
564   #define PetscLogPLB 0
565   #define PetscLogPLE 0
566   #define PetscLogPHC 0
567   #define PetscLogPHD 0
568 
569   #define PetscLogObjectParents(p, n, c) 0
570   #define PetscLogObjectCreate(h)        0
571   #define PetscLogObjectDestroy(h)       0
572 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3);
573 
574   #define PetscLogDefaultBegin()     0
575   #define PetscLogAllBegin()         0
576   #define PetscLogNestedBegin()      0
577   #define PetscLogTraceBegin(file)   0
578   #define PetscLogActions(a)         0
579   #define PetscLogObjects(a)         0
580   #define PetscLogSetThreshold(a, b) 0
581   #define PetscLogSet(lb, le)        0
582   #define PetscLogIsActive(flag)     (*(flag) = PETSC_FALSE, 0)
583 
584   #define PetscLogView(viewer)      0
585   #define PetscLogViewFromOptions() 0
586   #define PetscLogDump(c)           0
587 
588   #define PetscLogEventSync(e, comm)                            0
589   #define PetscLogEventBegin(e, o1, o2, o3, o4)                 0
590   #define PetscLogEventEnd(e, o1, o2, o3, o4)                   0
591 
592   /* If PETSC_USE_LOG is NOT defined, these still need to be! */
593   #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall(number, requests))
594   #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall(number, requests))
595   #define MPI_Start_isend(count, datatype, requests)            MPI_Start(requests)
596 
597 #endif /* PETSC_USE_LOG */
598 
599 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_DEVICE)
600 
601 /* Global GPU counters */
602 PETSC_EXTERN PetscLogDouble petsc_ctog_ct;
603 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct;
604 PETSC_EXTERN PetscLogDouble petsc_ctog_sz;
605 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz;
606 PETSC_EXTERN PetscLogDouble petsc_ctog_ct_scalar;
607 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct_scalar;
608 PETSC_EXTERN PetscLogDouble petsc_ctog_sz_scalar;
609 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz_scalar;
610 PETSC_EXTERN PetscLogDouble petsc_gflops;
611 PETSC_EXTERN PetscLogDouble petsc_gtime;
612 
613 static inline PetscErrorCode PetscLogCpuToGpu(PetscLogDouble size)
614 {
615   PetscFunctionBegin;
616   petsc_ctog_ct += 1;
617   petsc_ctog_sz += size;
618   PetscFunctionReturn(0);
619 }
620 
621 static inline PetscErrorCode PetscLogGpuToCpu(PetscLogDouble size)
622 {
623   PetscFunctionBegin;
624   petsc_gtoc_ct += 1;
625   petsc_gtoc_sz += size;
626   PetscFunctionReturn(0);
627 }
628 
629 static inline PetscErrorCode PetscLogCpuToGpuScalar(PetscLogDouble size)
630 {
631   PetscFunctionBegin;
632   petsc_ctog_ct_scalar += 1;
633   petsc_ctog_sz_scalar += size;
634   PetscFunctionReturn(0);
635 }
636 
637 static inline PetscErrorCode PetscLogGpuToCpuScalar(PetscLogDouble size)
638 {
639   PetscFunctionBegin;
640   petsc_gtoc_ct_scalar += 1;
641   petsc_gtoc_sz_scalar += size;
642   PetscFunctionReturn(0);
643 }
644 
645 /*@C
646        PetscLogGpuFlops - Log how many flops are performed in a calculation on the device
647 
648    Input Parameter:
649 .   flops - the number of flops
650 
651    Notes:
652      To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double,
653      not an integer. Use PetscLogFlops(4.0*n) not PetscLogFlops(4*n)
654 
655      The values are also added to the total flop count for the MPI rank that is set with `PetscLogFlops()`; hence the number of flops
656      just on the CPU would be the value from set from `PetscLogFlops()` minus the value set from `PetscLogGpuFlops()`
657 
658    Level: intermediate
659 
660 .seealso: `PetscLogView()`, `PetscLogFlops()`, `PetscLogGpuTimeBegin()`, `PetscLogGpuTimeEnd()`
661 @*/
662 static inline PetscErrorCode PetscLogGpuFlops(PetscLogDouble n)
663 {
664   PetscFunctionBegin;
665   PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops");
666   petsc_TotalFlops += PETSC_FLOPS_PER_OP * n;
667   petsc_gflops += PETSC_FLOPS_PER_OP * n;
668   PetscFunctionReturn(0);
669 }
670 
671 static inline PetscErrorCode PetscLogGpuTimeAdd(PetscLogDouble t)
672 {
673   PetscFunctionBegin;
674   petsc_gtime += t;
675   PetscFunctionReturn(0);
676 }
677 
678 PETSC_EXTERN PetscErrorCode PetscLogGpuTime(void);
679 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeBegin(void);
680 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeEnd(void);
681 
682 #else
683 
684   #define PetscLogCpuToGpu(a)       0
685   #define PetscLogGpuToCpu(a)       0
686   #define PetscLogCpuToGpuScalar(a) 0
687   #define PetscLogGpuToCpuScalar(a) 0
688   #define PetscLogGpuFlops(a)       0
689   #define PetscLogGpuTime()         0
690   #define PetscLogGpuTimeAdd(a)     0
691   #define PetscLogGpuTimeBegin()    0
692   #define PetscLogGpuTimeEnd()      0
693 
694 #endif /* PETSC_USE_LOG && PETSC_HAVE_DEVICE */
695 
696 #define PetscPreLoadBegin(flag, name) \
697   do { \
698     PetscBool     PetscPreLoading = flag; \
699     int           PetscPreLoadMax, PetscPreLoadIt; \
700     PetscLogStage _stageNum; \
701     PetscCall(PetscOptionsGetBool(NULL, NULL, "-preload", &PetscPreLoading, NULL)); \
702     PetscPreLoadMax     = (int)(PetscPreLoading); \
703     PetscPreLoadingUsed = PetscPreLoading ? PETSC_TRUE : PetscPreLoadingUsed; \
704     for (PetscPreLoadIt = 0; PetscPreLoadIt <= PetscPreLoadMax; PetscPreLoadIt++) { \
705       PetscPreLoadingOn = PetscPreLoading; \
706       PetscCall(PetscBarrier(NULL)); \
707       if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \
708       else PetscCall(PetscLogStageRegister(name, &_stageNum)); \
709       PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \
710       PetscCall(PetscLogStagePush(_stageNum));
711 
712 #define PetscPreLoadEnd() \
713   PetscCall(PetscLogStagePop()); \
714   PetscPreLoading = PETSC_FALSE; \
715   } \
716   } \
717   while (0)
718 
719 #define PetscPreLoadStage(name) \
720   do { \
721     PetscCall(PetscLogStagePop()); \
722     if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \
723     else PetscCall(PetscLogStageRegister(name, &_stageNum)); \
724     PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \
725     PetscCall(PetscLogStagePush(_stageNum)); \
726   } while (0)
727 
728 /* some vars for logging */
729 PETSC_EXTERN PetscBool PetscPreLoadingUsed; /* true if we are or have done preloading */
730 PETSC_EXTERN PetscBool PetscPreLoadingOn;   /* true if we are currently in a preloading calculation */
731 
732 #endif
733