xref: /petsc/include/petsclog.h (revision f092f6cb0ea570df2a26d5140f7c9e4c399b4663)
1 /*
2     Defines profile/logging in PETSc.
3 */
4 #ifndef PETSCLOG_H
5 #define PETSCLOG_H
6 
7 #include <petscsys.h>
8 #include <petsctime.h>
9 
10 /* SUBMANSEC = Sys */
11 
12 /* General logging of information; different from event logging */
13 PETSC_EXTERN PetscErrorCode PetscInfo_Private(const char[], PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(3, 4);
14 #if defined(PETSC_USE_INFO)
15   #define PetscInfo(A, ...) PetscInfo_Private(PETSC_FUNCTION_NAME, ((PetscObject)A), __VA_ARGS__)
16 #else
17   #define PetscInfo(A, ...) 0
18 #endif
19 
20 #define PetscInfo1(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
21 #define PetscInfo2(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
22 #define PetscInfo3(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
23 #define PetscInfo4(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
24 #define PetscInfo5(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
25 #define PetscInfo6(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
26 #define PetscInfo7(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
27 #define PetscInfo8(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
28 #define PetscInfo9(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__)
29 
30 /*E
31   PetscInfoCommFlag - Describes the method by which to filter `PetscInfo()` by communicator size
32 
33   Used as an input for `PetscInfoSetFilterCommSelf()`
34 
35 $ `PETSC_INFO_COMM_ALL` - Default uninitialized value. `PetscInfo()` will not filter based on
36 communicator size (i.e. will print for all communicators)
37 $ `PETSC_INFO_COMM_NO_SELF` - `PetscInfo()` will NOT print for communicators with size = 1 (i.e. *_COMM_SELF)
38 $ `PETSC_INFO_COMM_ONLY_SELF` - `PetscInfo()` will ONLY print for communicators with size = 1
39 
40   Level: intermediate
41 
42 .seealso: `PetscInfo()`, `PetscInfoSetFromOptions()`, `PetscInfoSetFilterCommSelf()`
43 E*/
44 typedef enum {
45   PETSC_INFO_COMM_ALL       = -1,
46   PETSC_INFO_COMM_NO_SELF   = 0,
47   PETSC_INFO_COMM_ONLY_SELF = 1
48 } PetscInfoCommFlag;
49 
50 PETSC_EXTERN const char *const PetscInfoCommFlags[];
51 PETSC_EXTERN PetscErrorCode    PetscInfoDeactivateClass(PetscClassId);
52 PETSC_EXTERN PetscErrorCode    PetscInfoActivateClass(PetscClassId);
53 PETSC_EXTERN PetscErrorCode    PetscInfoEnabled(PetscClassId, PetscBool *);
54 PETSC_EXTERN PetscErrorCode    PetscInfoAllow(PetscBool);
55 PETSC_EXTERN PetscErrorCode    PetscInfoSetFile(const char[], const char[]);
56 PETSC_EXTERN PetscErrorCode    PetscInfoGetFile(char **, FILE **);
57 PETSC_EXTERN PetscErrorCode    PetscInfoSetClasses(PetscBool, PetscInt, const char *const *);
58 PETSC_EXTERN PetscErrorCode    PetscInfoGetClass(const char *, PetscBool *);
59 PETSC_EXTERN PetscErrorCode    PetscInfoGetInfo(PetscBool *, PetscBool *, PetscBool *, PetscBool *, PetscInfoCommFlag *);
60 PETSC_EXTERN PetscErrorCode    PetscInfoProcessClass(const char[], PetscInt, const PetscClassId[]);
61 PETSC_EXTERN PetscErrorCode    PetscInfoSetFilterCommSelf(PetscInfoCommFlag);
62 PETSC_EXTERN PetscErrorCode    PetscInfoSetFromOptions(PetscOptions);
63 PETSC_EXTERN PetscErrorCode    PetscInfoDestroy(void);
64 PETSC_EXTERN PetscBool         PetscLogPrintInfo; /* if true, indicates PetscInfo() is turned on */
65 
66 /*MC
67     PetscLogEvent - id used to identify PETSc or user events which timed portions (blocks of executable)
68      code.
69 
70     Level: intermediate
71 
72 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStage`
73 M*/
74 typedef int PetscLogEvent;
75 
76 /*MC
77     PetscLogStage - id used to identify user stages (phases, sections) of runs - for logging
78 
79     Level: intermediate
80 
81 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEvent`
82 M*/
83 typedef int PetscLogStage;
84 
85 #define PETSC_EVENT 1311311
86 PETSC_EXTERN PetscLogEvent PETSC_LARGEST_EVENT;
87 
88 /* Global flop counter */
89 PETSC_EXTERN PetscLogDouble petsc_TotalFlops;
90 PETSC_EXTERN PetscLogDouble petsc_tmp_flops;
91 
92 /* We must make the following structures available to access the event
93      activation flags in the PetscLogEventBegin/End() macros. These are not part of the PETSc public
94      API and are not intended to be used by other parts of PETSc or by users.
95 
96      The code that manipulates these structures is in src/sys/logging/utils.
97 */
98 typedef struct _n_PetscIntStack *PetscIntStack;
99 
100 /* -----------------------------------------------------------------------------------------------------*/
101 /*
102     PetscClassRegInfo, PetscClassPerfInfo - Each class has two data structures associated with it. The first has
103        static information about it, the second collects statistics on how many objects of the class are created,
104        how much memory they use, etc.
105 
106     PetscClassRegLog, PetscClassPerfLog - arrays of the PetscClassRegInfo and PetscClassPerfInfo for all classes.
107 */
108 typedef struct {
109   char        *name;    /* The class name */
110   PetscClassId classid; /* The integer identifying this class */
111 } PetscClassRegInfo;
112 
113 typedef struct {
114   PetscClassId   id;           /* The integer identifying this class */
115   int            creations;    /* The number of objects of this class created */
116   int            destructions; /* The number of objects of this class destroyed */
117   PetscLogDouble mem;          /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */
118   PetscLogDouble descMem;      /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */
119 } PetscClassPerfInfo;
120 
121 typedef struct _n_PetscClassRegLog *PetscClassRegLog;
122 struct _n_PetscClassRegLog {
123   int                numClasses; /* The number of classes registered */
124   int                maxClasses; /* The maximum number of classes */
125   PetscClassRegInfo *classInfo;  /* The structure for class information (classids are monotonicly increasing) */
126 };
127 
128 typedef struct _n_PetscClassPerfLog *PetscClassPerfLog;
129 struct _n_PetscClassPerfLog {
130   int                 numClasses; /* The number of logging classes */
131   int                 maxClasses; /* The maximum number of classes */
132   PetscClassPerfInfo *classInfo;  /* The structure for class information (classids are monotonicly increasing) */
133 };
134 /* -----------------------------------------------------------------------------------------------------*/
135 /*
136     PetscEventRegInfo, PetscEventPerfInfo - Each event has two data structures associated with it. The first has
137        static information about it, the second collects statistics on how many times the event is used, how
138        much time it takes, etc.
139 
140     PetscEventRegLog, PetscEventPerfLog - an array of all PetscEventRegInfo and PetscEventPerfInfo for all events. There is one
141       of these for each stage.
142 
143 */
144 typedef struct {
145   char        *name;       /* The name of this event */
146   PetscClassId classid;    /* The class the event is associated with */
147   PetscBool    collective; /* Flag this event as collective */
148 #if defined(PETSC_HAVE_TAU_PERFSTUBS)
149   void *timer; /* Associated external tool timer for this event */
150 #endif
151 #if defined(PETSC_HAVE_MPE)
152   int mpe_id_begin; /* MPE IDs that define the event */
153   int mpe_id_end;
154 #endif
155 } PetscEventRegInfo;
156 
157 typedef struct {
158   int            id;                      /* The integer identifying this event */
159   PetscBool      active;                  /* The flag to activate logging */
160   PetscBool      visible;                 /* The flag to print info in summary */
161   int            depth;                   /* The nesting depth of the event call */
162   int            count;                   /* The number of times this event was executed */
163   PetscLogDouble flops, flops2, flopsTmp; /* The flops and flops^2 used in this event */
164   PetscLogDouble time, time2, timeTmp;    /* The time and time^2 taken for this event */
165   PetscLogDouble syncTime;                /* The synchronization barrier time */
166   PetscLogDouble dof[8];                  /* The number of degrees of freedom associated with this event */
167   PetscLogDouble errors[8];               /* The errors (user-defined) associated with this event */
168   PetscLogDouble numMessages;             /* The number of messages in this event */
169   PetscLogDouble messageLength;           /* The total message lengths in this event */
170   PetscLogDouble numReductions;           /* The number of reductions in this event */
171   PetscLogDouble memIncrease;             /* How much the resident memory has increased in this event */
172   PetscLogDouble mallocIncrease;          /* How much the maximum malloced space has increased in this event */
173   PetscLogDouble mallocSpace;             /* How much the space was malloced and kept during this event */
174   PetscLogDouble mallocIncreaseEvent;     /* Maximum of the high water mark with in event minus memory available at the end of the event */
175 #if defined(PETSC_HAVE_DEVICE)
176   PetscLogDouble CpuToGpuCount; /* The total number of CPU to GPU copies */
177   PetscLogDouble GpuToCpuCount; /* The total number of GPU to CPU copies */
178   PetscLogDouble CpuToGpuSize;  /* The total size of CPU to GPU copies */
179   PetscLogDouble GpuToCpuSize;  /* The total size of GPU to CPU copies */
180   PetscLogDouble GpuFlops;      /* The flops done on a GPU in this event */
181   PetscLogDouble GpuTime;       /* The time spent on a GPU in this event */
182 #endif
183 } PetscEventPerfInfo;
184 
185 typedef struct _n_PetscEventRegLog *PetscEventRegLog;
186 struct _n_PetscEventRegLog {
187   int                numEvents; /* The number of registered events */
188   int                maxEvents; /* The maximum number of events */
189   PetscEventRegInfo *eventInfo; /* The registration information for each event */
190 };
191 
192 typedef struct _n_PetscEventPerfLog *PetscEventPerfLog;
193 struct _n_PetscEventPerfLog {
194   int                 numEvents; /* The number of logging events */
195   int                 maxEvents; /* The maximum number of events */
196   PetscEventPerfInfo *eventInfo; /* The performance information for each event */
197 };
198 /* ------------------------------------------------------------------------------------------------------------*/
199 /*
200    PetscStageInfo - Contains all the information about a particular stage.
201 
202    PetscStageLog - An array of PetscStageInfo for each registered stage. There is a single one of these in the code.
203 */
204 typedef struct _PetscStageInfo {
205   char              *name;     /* The stage name */
206   PetscBool          used;     /* The stage was pushed on this processor */
207   PetscEventPerfInfo perfInfo; /* The stage performance information */
208   PetscEventPerfLog  eventLog; /* The event information for this stage */
209   PetscClassPerfLog  classLog; /* The class information for this stage */
210 #if defined(PETSC_HAVE_TAU_PERFSTUBS)
211   void *timer; /* Associated external tool timer for this stage */
212 #endif
213 } PetscStageInfo;
214 
215 typedef struct _n_PetscStageLog *PetscStageLog;
216 struct _n_PetscStageLog {
217   int              numStages; /* The number of registered stages */
218   int              maxStages; /* The maximum number of stages */
219   PetscIntStack    stack;     /* The stack for active stages */
220   int              curStage;  /* The current stage (only used in macros so we don't call PetscIntStackTop) */
221   PetscStageInfo  *stageInfo; /* The information for each stage */
222   PetscEventRegLog eventLog;  /* The registered events */
223   PetscClassRegLog classLog;  /* The registered classes */
224 };
225 /* -----------------------------------------------------------------------------------------------------*/
226 
227 PETSC_DEPRECATED_FUNCTION("PetscLogObjectParent() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectParent(PetscObject o, PetscObject p)
228 {
229   (void)o;
230   (void)p;
231   return 0;
232 }
233 
234 PETSC_DEPRECATED_FUNCTION("PetscLogObjectMemory() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectMemory(PetscObject o, PetscLogDouble m)
235 {
236   (void)o;
237   (void)m;
238   return 0;
239 }
240 
241 #if defined(PETSC_USE_LOG) /* --- Logging is turned on --------------------------------*/
242 PETSC_EXTERN PetscStageLog  petsc_stageLog;
243 PETSC_EXTERN PetscErrorCode PetscLogGetStageLog(PetscStageLog *);
244 PETSC_EXTERN PetscErrorCode PetscStageLogGetCurrent(PetscStageLog, int *);
245 PETSC_EXTERN PetscErrorCode PetscStageLogGetEventPerfLog(PetscStageLog, int, PetscEventPerfLog *);
246 
247   /*
248    Flop counting:  We count each arithmetic operation (e.g., addition, multiplication) separately.
249 
250    For the complex numbers version, note that
251        1 complex addition = 2 flops
252        1 complex multiplication = 6 flops,
253    where we define 1 flop as that for a double precision scalar.  We roughly approximate
254    flop counting for complex numbers by multiplying the total flops by 4; this corresponds
255    to the assumption that we're counting mostly additions and multiplications -- and
256    roughly the same number of each.  More accurate counting could be done by distinguishing
257    among the various arithmetic operations.
258  */
259 
260   #if defined(PETSC_USE_COMPLEX)
261     #define PETSC_FLOPS_PER_OP 4.0
262   #else
263     #define PETSC_FLOPS_PER_OP 1.0
264   #endif
265 
266 /*@C
267        PetscLogFlops - Log how many flops are performed in a calculation
268 
269    Input Parameter:
270 .   flops - the number of flops
271 
272    Notes:
273      To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double,
274      not an integer. Use PetscLogFlops(4.0*n) not PetscLogFlops(4*n)
275 
276    Level: intermediate
277 
278 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`
279 @*/
280 
281 static inline PetscErrorCode PetscLogFlops(PetscLogDouble n)
282 {
283   PetscFunctionBegin;
284   #if defined(PETSC_USE_DEBUG)
285   PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops");
286   #endif
287   petsc_TotalFlops += PETSC_FLOPS_PER_OP * n;
288   PetscFunctionReturn(0);
289 }
290 
291 PETSC_EXTERN PetscErrorCode PetscGetFlops(PetscLogDouble *);
292 
293   #if defined(PETSC_HAVE_MPE)
294 PETSC_EXTERN PetscErrorCode PetscLogMPEBegin(void);
295 PETSC_EXTERN PetscErrorCode PetscLogMPEDump(const char[]);
296   #endif
297 
298 PETSC_EXTERN PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
299 PETSC_EXTERN PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject);
300 PETSC_EXTERN PetscErrorCode (*PetscLogPHC)(PetscObject);
301 PETSC_EXTERN PetscErrorCode (*PetscLogPHD)(PetscObject);
302 
303   #define PetscLogObjectParents(p, n, d) PetscMacroReturnStandard(for (int _i = 0; _i < (n); ++_i) PetscCall(PetscLogObjectParent((PetscObject)(p), (PetscObject)(d)[_i]));)
304   #define PetscLogObjectCreate(h)        ((PetscLogPHC) ? (*PetscLogPHC)((PetscObject)(h)) : 0)
305   #define PetscLogObjectDestroy(h)       ((PetscLogPHD) ? (*PetscLogPHD)((PetscObject)(h)) : 0)
306 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3);
307 
308 /* Initialization functions */
309 PETSC_EXTERN PetscErrorCode PetscLogDefaultBegin(void);
310 PETSC_EXTERN PetscErrorCode PetscLogAllBegin(void);
311 PETSC_EXTERN PetscErrorCode PetscLogNestedBegin(void);
312 PETSC_EXTERN PetscErrorCode PetscLogTraceBegin(FILE *);
313 PETSC_EXTERN PetscErrorCode PetscLogActions(PetscBool);
314 PETSC_EXTERN PetscErrorCode PetscLogObjects(PetscBool);
315 PETSC_EXTERN PetscErrorCode PetscLogSetThreshold(PetscLogDouble, PetscLogDouble *);
316 PETSC_EXTERN PetscErrorCode PetscLogSet(PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject));
317 
318 /* Output functions */
319 PETSC_EXTERN PetscErrorCode PetscLogView(PetscViewer);
320 PETSC_EXTERN PetscErrorCode PetscLogViewFromOptions(void);
321 PETSC_EXTERN PetscErrorCode PetscLogDump(const char[]);
322 
323 /* Status checking functions */
324 PETSC_EXTERN PetscErrorCode PetscLogIsActive(PetscBool *);
325 
326 /* Stage functions */
327 PETSC_EXTERN PetscErrorCode PetscLogStageRegister(const char[], PetscLogStage *);
328 PETSC_EXTERN PetscErrorCode PetscLogStagePush(PetscLogStage);
329 PETSC_EXTERN PetscErrorCode PetscLogStagePop(void);
330 PETSC_EXTERN PetscErrorCode PetscLogStageSetActive(PetscLogStage, PetscBool);
331 PETSC_EXTERN PetscErrorCode PetscLogStageGetActive(PetscLogStage, PetscBool *);
332 PETSC_EXTERN PetscErrorCode PetscLogStageSetVisible(PetscLogStage, PetscBool);
333 PETSC_EXTERN PetscErrorCode PetscLogStageGetVisible(PetscLogStage, PetscBool *);
334 PETSC_EXTERN PetscErrorCode PetscLogStageGetId(const char[], PetscLogStage *);
335 
336 /* Event functions */
337 PETSC_EXTERN PetscErrorCode PetscLogEventRegister(const char[], PetscClassId, PetscLogEvent *);
338 PETSC_EXTERN PetscErrorCode PetscLogEventSetCollective(PetscLogEvent, PetscBool);
339 PETSC_EXTERN PetscErrorCode PetscLogEventIncludeClass(PetscClassId);
340 PETSC_EXTERN PetscErrorCode PetscLogEventExcludeClass(PetscClassId);
341 PETSC_EXTERN PetscErrorCode PetscLogEventActivate(PetscLogEvent);
342 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivate(PetscLogEvent);
343 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent);
344 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent);
345 PETSC_EXTERN PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent, PetscBool);
346 PETSC_EXTERN PetscErrorCode PetscLogEventActivateClass(PetscClassId);
347 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivateClass(PetscClassId);
348 PETSC_EXTERN PetscErrorCode PetscLogEventGetId(const char[], PetscLogEvent *);
349 PETSC_EXTERN PetscErrorCode PetscLogEventGetPerfInfo(int, PetscLogEvent, PetscEventPerfInfo *);
350 PETSC_EXTERN PetscErrorCode PetscLogEventSetDof(PetscLogEvent, PetscInt, PetscLogDouble);
351 PETSC_EXTERN PetscErrorCode PetscLogEventSetError(PetscLogEvent, PetscInt, PetscLogDouble);
352 PETSC_EXTERN PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent);
353 PETSC_EXTERN PetscErrorCode PetscLogPopCurrentEvent_Internal(void);
354 
355 /* Global counters */
356 PETSC_EXTERN PetscLogDouble petsc_irecv_ct;
357 PETSC_EXTERN PetscLogDouble petsc_isend_ct;
358 PETSC_EXTERN PetscLogDouble petsc_recv_ct;
359 PETSC_EXTERN PetscLogDouble petsc_send_ct;
360 PETSC_EXTERN PetscLogDouble petsc_irecv_len;
361 PETSC_EXTERN PetscLogDouble petsc_isend_len;
362 PETSC_EXTERN PetscLogDouble petsc_recv_len;
363 PETSC_EXTERN PetscLogDouble petsc_send_len;
364 PETSC_EXTERN PetscLogDouble petsc_allreduce_ct;
365 PETSC_EXTERN PetscLogDouble petsc_gather_ct;
366 PETSC_EXTERN PetscLogDouble petsc_scatter_ct;
367 PETSC_EXTERN PetscLogDouble petsc_wait_ct;
368 PETSC_EXTERN PetscLogDouble petsc_wait_any_ct;
369 PETSC_EXTERN PetscLogDouble petsc_wait_all_ct;
370 PETSC_EXTERN PetscLogDouble petsc_sum_of_waits_ct;
371 
372 PETSC_EXTERN PetscBool PetscLogMemory;
373 
374 PETSC_EXTERN PetscBool      PetscLogSyncOn; /* true if logging synchronization is enabled */
375 PETSC_EXTERN PetscErrorCode PetscLogEventSynchronize(PetscLogEvent, MPI_Comm);
376 
377   #define PetscLogEventSync(e, comm) \
378     (((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? PetscLogEventSynchronize((e), (comm)) : 0))
379 
380   #define PetscLogEventBegin(e, o1, o2, o3, o4) \
381     ((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLB)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPushCurrentEvent_Internal(e)) : 0)
382 
383   #define PetscLogEventEnd(e, o1, o2, o3, o4) \
384     ((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLE)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPopCurrentEvent_Internal()) : 0)
385 
386 PETSC_EXTERN PetscErrorCode PetscLogEventGetFlops(PetscLogEvent, PetscLogDouble *);
387 PETSC_EXTERN PetscErrorCode PetscLogEventZeroFlops(PetscLogEvent);
388 
389   /*
390      These are used internally in the PETSc routines to keep a count of MPI messages and
391    their sizes.
392 
393      This does not work for MPI-Uni because our include/petsc/mpiuni/mpi.h file
394    uses macros to defined the MPI operations.
395 
396      It does not work correctly from HP-UX because it processes the
397    macros in a way that sometimes it double counts, hence
398    PETSC_HAVE_BROKEN_RECURSIVE_MACRO
399 
400      It does not work with Windows because winmpich lacks MPI_Type_size()
401 */
402   #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined(PETSC_HAVE_MPI_MISSING_TYPESIZE)
403 /*
404    Logging of MPI activities
405 */
406 static inline PetscErrorCode PetscMPITypeSize(PetscInt count, MPI_Datatype type, PetscLogDouble *length)
407 {
408   PetscMPIInt typesize;
409 
410   if (type == MPI_DATATYPE_NULL) return 0;
411   PetscCallMPI(MPI_Type_size(type, &typesize));
412   *length += (PetscLogDouble)(count * typesize);
413   return 0;
414 }
415 
416 static inline PetscErrorCode PetscMPITypeSizeComm(MPI_Comm comm, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length)
417 {
418   PetscMPIInt typesize, size, p;
419 
420   if (type == MPI_DATATYPE_NULL) return 0;
421   PetscCallMPI(MPI_Comm_size(comm, &size));
422   PetscCallMPI(MPI_Type_size(type, &typesize));
423   for (p = 0; p < size; ++p) *length += (PetscLogDouble)(counts[p] * typesize);
424   return 0;
425 }
426 
427 static inline PetscErrorCode PetscMPITypeSizeCount(PetscInt n, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length)
428 {
429   PetscMPIInt typesize, p;
430 
431   if (type == MPI_DATATYPE_NULL) return 0;
432   PetscCallMPI(MPI_Type_size(type, &typesize));
433   for (p = 0; p < n; ++p) *length += (PetscLogDouble)(counts[p] * typesize);
434   return 0;
435 }
436 
437 /*
438     Returns 1 if the communicator is parallel else zero
439 */
440 static inline int PetscMPIParallelComm(MPI_Comm comm)
441 {
442   PetscMPIInt size;
443   MPI_Comm_size(comm, &size);
444   return size > 1;
445 }
446 
447     #define MPI_Irecv(buf, count, datatype, source, tag, comm, request) ((petsc_irecv_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || MPI_Irecv((buf), (count), (datatype), (source), (tag), (comm), (request)))
448 
449     #define MPI_Irecv_c(buf, count, datatype, source, tag, comm, request) ((petsc_irecv_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || MPI_Irecv_c((buf), (count), (datatype), (source), (tag), (comm), (request)))
450 
451     #define MPI_Isend(buf, count, datatype, dest, tag, comm, request) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || MPI_Isend((buf), (count), (datatype), (dest), (tag), (comm), (request)))
452 
453     #define MPI_Isend_c(buf, count, datatype, dest, tag, comm, request) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || MPI_Isend_c((buf), (count), (datatype), (dest), (tag), (comm), (request)))
454 
455     #define MPI_Startall_irecv(count, datatype, number, requests) ((petsc_irecv_ct += (PetscLogDouble)(number), 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || ((number) && MPI_Startall((number), (requests))))
456 
457     #define MPI_Startall_isend(count, datatype, number, requests) ((petsc_isend_ct += (PetscLogDouble)(number), 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || ((number) && MPI_Startall((number), (requests))))
458 
459     #define MPI_Start_isend(count, datatype, requests) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_isend_len)) || MPI_Start((requests)))
460 
461     #define MPI_Recv(buf, count, datatype, source, tag, comm, status) ((petsc_recv_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len)) || MPI_Recv((buf), (count), (datatype), (source), (tag), (comm), (status)))
462 
463     #define MPI_Recv_c(buf, count, datatype, source, tag, comm, status) ((petsc_recv_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len)) || MPI_Recv_c((buf), (count), (datatype), (source), (tag), (comm), (status)))
464 
465     #define MPI_Send(buf, count, datatype, dest, tag, comm) ((petsc_send_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_send_len)) || MPI_Send((buf), (count), (datatype), (dest), (tag), (comm)))
466 
467     #define MPI_Send_c(buf, count, datatype, dest, tag, comm) ((petsc_send_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_send_len)) || MPI_Send_c((buf), (count), (datatype), (dest), (tag), (comm)))
468 
469     #define MPI_Wait(request, status) ((petsc_wait_ct++, petsc_sum_of_waits_ct++, 0) || MPI_Wait((request), (status)))
470 
471     #define MPI_Waitany(a, b, c, d) ((petsc_wait_any_ct++, petsc_sum_of_waits_ct++, 0) || MPI_Waitany((a), (b), (c), (d)))
472 
473     #define MPI_Waitall(count, array_of_requests, array_of_statuses) ((petsc_wait_all_ct++, petsc_sum_of_waits_ct += (PetscLogDouble)(count), 0) || MPI_Waitall((count), (array_of_requests), (array_of_statuses)))
474 
475     #define MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm) (petsc_allreduce_ct += PetscMPIParallelComm((comm)), MPI_Allreduce((sendbuf), (recvbuf), (count), (datatype), (op), (comm)))
476 
477     #define MPI_Bcast(buffer, count, datatype, root, comm) ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || MPI_Bcast((buffer), (count), (datatype), (root), (comm)))
478 
479     #define MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm) ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || MPI_Reduce_scatter_block((sendbuf), (recvbuf), (recvcount), (datatype), (op), (comm)))
480 
481     #define MPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) \
482       ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Alltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm)))
483 
484     #define MPI_Alltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm) \
485       ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len)) || MPI_Alltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm)))
486 
487     #define MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Allgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm)))
488 
489     #define MPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm) \
490       ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Allgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm)))
491 
492     #define MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \
493       ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Gather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))
494 
495     #define MPI_Gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm) \
496       ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Gatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm)))
497 
498     #define MPI_Scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \
499       ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Scatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))
500 
501     #define MPI_Scatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm) \
502       ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Scatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm)))
503 
504     #define MPI_Ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \
505       ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Ialltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request)))
506 
507     #define MPI_Ialltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm, request) \
508       ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len)) || MPI_Ialltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm), (request)))
509 
510     #define MPI_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \
511       ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Iallgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request)))
512 
513     #define MPI_Iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm, request) \
514       ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Iallgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm), (request)))
515 
516     #define MPI_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
517       ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Igather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))
518 
519     #define MPI_Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm, request) \
520       ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Igatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm), (request)))
521 
522     #define MPI_Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
523       ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Iscatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))
524 
525     #define MPI_Iscatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \
526       ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Iscatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request)))
527 
528   #else
529 
530     #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests)))
531 
532     #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests)))
533 
534     #define MPI_Start_isend(count, datatype, requests) (MPI_Start((requests)))
535 
536   #endif /* !MPIUNI_H && ! PETSC_HAVE_BROKEN_RECURSIVE_MACRO */
537 
538 #else /* ---Logging is turned off --------------------------------------------*/
539 
540   #define PetscLogMemory PETSC_FALSE
541 
542   #define PetscLogFlops(n) 0
543   #define PetscGetFlops(a) (*(a) = 0.0, 0)
544 
545   #define PetscLogStageRegister(a, b)   0
546   #define PetscLogStagePush(a)          0
547   #define PetscLogStagePop()            0
548   #define PetscLogStageSetActive(a, b)  0
549   #define PetscLogStageGetActive(a, b)  0
550   #define PetscLogStageGetVisible(a, b) 0
551   #define PetscLogStageSetVisible(a, b) 0
552   #define PetscLogStageGetId(a, b)      (*(b) = 0, 0)
553 
554   #define PetscLogEventRegister(a, b, c)    0
555   #define PetscLogEventSetCollective(a, b)  0
556   #define PetscLogEventIncludeClass(a)      0
557   #define PetscLogEventExcludeClass(a)      0
558   #define PetscLogEventActivate(a)          0
559   #define PetscLogEventDeactivate(a)        0
560   #define PetscLogEventDeactivatePush(a)    0
561   #define PetscLogEventDeactivatePop(a)     0
562   #define PetscLogEventActivateClass(a)     0
563   #define PetscLogEventDeactivateClass(a)   0
564   #define PetscLogEventSetActiveAll(a, b)   0
565   #define PetscLogEventGetId(a, b)          (*(b) = 0, 0)
566   #define PetscLogEventGetPerfInfo(a, b, c) 0
567   #define PetscLogEventSetDof(a, b, c)      0
568   #define PetscLogEventSetError(a, b, c)    0
569 
570   #define PetscLogPLB 0
571   #define PetscLogPLE 0
572   #define PetscLogPHC 0
573   #define PetscLogPHD 0
574 
575   #define PetscLogObjectParents(p, n, c) 0
576   #define PetscLogObjectCreate(h)        0
577   #define PetscLogObjectDestroy(h)       0
578 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3);
579 
580   #define PetscLogDefaultBegin()     0
581   #define PetscLogAllBegin()         0
582   #define PetscLogNestedBegin()      0
583   #define PetscLogTraceBegin(file)   0
584   #define PetscLogActions(a)         0
585   #define PetscLogObjects(a)         0
586   #define PetscLogSetThreshold(a, b) 0
587   #define PetscLogSet(lb, le)        0
588   #define PetscLogIsActive(flag)     (*(flag) = PETSC_FALSE, 0)
589 
590   #define PetscLogView(viewer)      0
591   #define PetscLogViewFromOptions() 0
592   #define PetscLogDump(c)           0
593 
594   #define PetscLogEventSync(e, comm)                            0
595   #define PetscLogEventBegin(e, o1, o2, o3, o4)                 0
596   #define PetscLogEventEnd(e, o1, o2, o3, o4)                   0
597 
598   /* If PETSC_USE_LOG is NOT defined, these still need to be! */
599   #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall(number, requests))
600   #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall(number, requests))
601   #define MPI_Start_isend(count, datatype, requests)            MPI_Start(requests)
602 
603 #endif /* PETSC_USE_LOG */
604 
605 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_DEVICE)
606 
607 /* Global GPU counters */
608 PETSC_EXTERN PetscLogDouble petsc_ctog_ct;
609 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct;
610 PETSC_EXTERN PetscLogDouble petsc_ctog_sz;
611 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz;
612 PETSC_EXTERN PetscLogDouble petsc_ctog_ct_scalar;
613 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct_scalar;
614 PETSC_EXTERN PetscLogDouble petsc_ctog_sz_scalar;
615 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz_scalar;
616 PETSC_EXTERN PetscLogDouble petsc_gflops;
617 PETSC_EXTERN PetscLogDouble petsc_gtime;
618 
619 static inline PetscErrorCode PetscLogCpuToGpu(PetscLogDouble size)
620 {
621   PetscFunctionBegin;
622   petsc_ctog_ct += 1;
623   petsc_ctog_sz += size;
624   PetscFunctionReturn(0);
625 }
626 
627 static inline PetscErrorCode PetscLogGpuToCpu(PetscLogDouble size)
628 {
629   PetscFunctionBegin;
630   petsc_gtoc_ct += 1;
631   petsc_gtoc_sz += size;
632   PetscFunctionReturn(0);
633 }
634 
635 static inline PetscErrorCode PetscLogCpuToGpuScalar(PetscLogDouble size)
636 {
637   PetscFunctionBegin;
638   petsc_ctog_ct_scalar += 1;
639   petsc_ctog_sz_scalar += size;
640   PetscFunctionReturn(0);
641 }
642 
643 static inline PetscErrorCode PetscLogGpuToCpuScalar(PetscLogDouble size)
644 {
645   PetscFunctionBegin;
646   petsc_gtoc_ct_scalar += 1;
647   petsc_gtoc_sz_scalar += size;
648   PetscFunctionReturn(0);
649 }
650 
651 /*@C
652        PetscLogGpuFlops - Log how many flops are performed in a calculation on the device
653 
654    Input Parameter:
655 .   flops - the number of flops
656 
657    Notes:
658      To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double,
659      not an integer. Use PetscLogFlops(4.0*n) not PetscLogFlops(4*n)
660 
661      The values are also added to the total flop count for the MPI rank that is set with `PetscLogFlops()`; hence the number of flops
662      just on the CPU would be the value from set from `PetscLogFlops()` minus the value set from `PetscLogGpuFlops()`
663 
664    Level: intermediate
665 
666 .seealso: `PetscLogView()`, `PetscLogFlops()`, `PetscLogGpuTimeBegin()`, `PetscLogGpuTimeEnd()`
667 @*/
668 static inline PetscErrorCode PetscLogGpuFlops(PetscLogDouble n)
669 {
670   PetscFunctionBegin;
671   PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops");
672   petsc_TotalFlops += PETSC_FLOPS_PER_OP * n;
673   petsc_gflops += PETSC_FLOPS_PER_OP * n;
674   PetscFunctionReturn(0);
675 }
676 
677 static inline PetscErrorCode PetscLogGpuTimeAdd(PetscLogDouble t)
678 {
679   PetscFunctionBegin;
680   petsc_gtime += t;
681   PetscFunctionReturn(0);
682 }
683 
684 PETSC_EXTERN PetscErrorCode PetscLogGpuTime(void);
685 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeBegin(void);
686 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeEnd(void);
687 
688 #else
689 
690   #define PetscLogCpuToGpu(a)       0
691   #define PetscLogGpuToCpu(a)       0
692   #define PetscLogCpuToGpuScalar(a) 0
693   #define PetscLogGpuToCpuScalar(a) 0
694   #define PetscLogGpuFlops(a)       0
695   #define PetscLogGpuTime()         0
696   #define PetscLogGpuTimeAdd(a)     0
697   #define PetscLogGpuTimeBegin()    0
698   #define PetscLogGpuTimeEnd()      0
699 
700 #endif /* PETSC_USE_LOG && PETSC_HAVE_DEVICE */
701 
702 #define PetscPreLoadBegin(flag, name) \
703   do { \
704     PetscBool     PetscPreLoading = flag; \
705     int           PetscPreLoadMax, PetscPreLoadIt; \
706     PetscLogStage _stageNum; \
707     PetscCall(PetscOptionsGetBool(NULL, NULL, "-preload", &PetscPreLoading, NULL)); \
708     PetscPreLoadMax     = (int)(PetscPreLoading); \
709     PetscPreLoadingUsed = PetscPreLoading ? PETSC_TRUE : PetscPreLoadingUsed; \
710     for (PetscPreLoadIt = 0; PetscPreLoadIt <= PetscPreLoadMax; PetscPreLoadIt++) { \
711       PetscPreLoadingOn = PetscPreLoading; \
712       PetscCall(PetscBarrier(NULL)); \
713       if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \
714       else PetscCall(PetscLogStageRegister(name, &_stageNum)); \
715       PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \
716       PetscCall(PetscLogStagePush(_stageNum));
717 
718 #define PetscPreLoadEnd() \
719   PetscCall(PetscLogStagePop()); \
720   PetscPreLoading = PETSC_FALSE; \
721   } \
722   } \
723   while (0)
724 
725 #define PetscPreLoadStage(name) \
726   do { \
727     PetscCall(PetscLogStagePop()); \
728     if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \
729     else PetscCall(PetscLogStageRegister(name, &_stageNum)); \
730     PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \
731     PetscCall(PetscLogStagePush(_stageNum)); \
732   } while (0)
733 
734 /* some vars for logging */
735 PETSC_EXTERN PetscBool PetscPreLoadingUsed; /* true if we are or have done preloading */
736 PETSC_EXTERN PetscBool PetscPreLoadingOn;   /* true if we are currently in a preloading calculation */
737 
738 #endif
739