xref: /petsc/include/petsclog.h (revision cdb0f33d09c128f365fdb48a6f07c56e211b6a43)
1 /*
2     Defines profile/logging in PETSc.
3 */
4 
5 #if !defined(PETSCLOG_H)
6 #define PETSCLOG_H
7 #include <petscsys.h>
8 #include <petsctime.h>
9 
10 /* General logging of information; different from event logging */
11 PETSC_EXTERN PetscErrorCode PetscInfo_Private(const char[],PetscObject,const char[],...);
12 #if defined(PETSC_USE_INFO)
13 #define PetscInfo(A,S)                       PetscInfo_Private(PETSC_FUNCTION_NAME,((PetscObject)A),S)
14 #define PetscInfo1(A,S,a1)                   PetscInfo_Private(PETSC_FUNCTION_NAME,((PetscObject)A),S,a1)
15 #define PetscInfo2(A,S,a1,a2)                PetscInfo_Private(PETSC_FUNCTION_NAME,((PetscObject)A),S,a1,a2)
16 #define PetscInfo3(A,S,a1,a2,a3)             PetscInfo_Private(PETSC_FUNCTION_NAME,((PetscObject)A),S,a1,a2,a3)
17 #define PetscInfo4(A,S,a1,a2,a3,a4)          PetscInfo_Private(PETSC_FUNCTION_NAME,((PetscObject)A),S,a1,a2,a3,a4)
18 #define PetscInfo5(A,S,a1,a2,a3,a4,a5)       PetscInfo_Private(PETSC_FUNCTION_NAME,((PetscObject)A),S,a1,a2,a3,a4,a5)
19 #define PetscInfo6(A,S,a1,a2,a3,a4,a5,a6)    PetscInfo_Private(PETSC_FUNCTION_NAME,((PetscObject)A),S,a1,a2,a3,a4,a5,a6)
20 #define PetscInfo7(A,S,a1,a2,a3,a4,a5,a6,a7) PetscInfo_Private(PETSC_FUNCTION_NAME,((PetscObject)A),S,a1,a2,a3,a4,a5,a6,a7)
21 #else
22 #define PetscInfo(A,S)                       0
23 #define PetscInfo1(A,S,a1)                   0
24 #define PetscInfo2(A,S,a1,a2)                0
25 #define PetscInfo3(A,S,a1,a2,a3)             0
26 #define PetscInfo4(A,S,a1,a2,a3,a4)          0
27 #define PetscInfo5(A,S,a1,a2,a3,a4,a5)       0
28 #define PetscInfo6(A,S,a1,a2,a3,a4,a5,a6)    0
29 #define PetscInfo7(A,S,a1,a2,a3,a4,a5,a6,a7) 0
30 #endif
31 
32 /*E
33     PetscInfoCommFlag - Describes the method by which to filter PetscInfo() by communicator size
34 
35     Used as an input for PetscInfoSetFilterCommSelf()
36 
37 $   PETSC_INFO_COMM_ALL - Default uninitialized value. PetscInfo() will not filter based on communicator size (i.e. will
38 print for all communicators)
39 $   PETSC_INFO_COMM_NO_SELF - PetscInfo() will NOT print for communicators with size = 1 (i.e. *_COMM_SELF)
40 $   PETSC_INFO_COMM_ONLY_SELF - PetscInfo will ONLY print for communicators with size = 1
41 
42     Level: intermediate
43 
44 .seealso: PetscInfo(), PetscInfoSetFromOptions(), PetscInfoSetFilterCommSelf()
45 E*/
46 typedef enum {
47   PETSC_INFO_COMM_ALL = -1,
48   PETSC_INFO_COMM_NO_SELF = 0,
49   PETSC_INFO_COMM_ONLY_SELF = 1
50 } PetscInfoCommFlag;
51 
52 PETSC_EXTERN const char * const PetscInfoCommFlags[];
53 PETSC_EXTERN PetscErrorCode PetscInfoDeactivateClass(PetscClassId);
54 PETSC_EXTERN PetscErrorCode PetscInfoActivateClass(PetscClassId);
55 PETSC_EXTERN PetscErrorCode PetscInfoEnabled(PetscClassId, PetscBool *);
56 PETSC_EXTERN PetscErrorCode PetscInfoAllow(PetscBool);
57 PETSC_EXTERN PetscErrorCode PetscInfoSetFile(const char[],const char[]);
58 PETSC_EXTERN PetscErrorCode PetscInfoGetFile(char **,FILE **);
59 PETSC_EXTERN PetscErrorCode PetscInfoSetClasses(PetscBool,PetscInt,const char *const *);
60 PETSC_EXTERN PetscErrorCode PetscInfoGetClass(const char *, PetscBool *);
61 PETSC_EXTERN PetscErrorCode PetscInfoGetInfo(PetscBool *,PetscBool *,PetscBool *,PetscBool *,PetscInfoCommFlag *);
62 PETSC_EXTERN PetscErrorCode PetscInfoProcessClass(const char[],PetscInt,PetscClassId[]);
63 PETSC_EXTERN PetscErrorCode PetscInfoSetFilterCommSelf(PetscInfoCommFlag);
64 PETSC_EXTERN PetscErrorCode PetscInfoSetFromOptions(PetscOptions);
65 PETSC_EXTERN PetscErrorCode PetscInfoDestroy(void);
66 PETSC_EXTERN PetscBool      PetscLogPrintInfo;  /* if true, indicates PetscInfo() is turned on */
67 
68 /*MC
69     PetscLogEvent - id used to identify PETSc or user events which timed portions (blocks of executable)
70      code.
71 
72     Level: intermediate
73 
74 .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscLogStage
75 M*/
76 typedef int PetscLogEvent;
77 
78 /*MC
79     PetscLogStage - id used to identify user stages (phases, sections) of runs - for logging
80 
81     Level: intermediate
82 
83 .seealso: PetscLogStageRegister(), PetscLogStagePush(), PetscLogStagePop(), PetscLogEvent
84 M*/
85 typedef int PetscLogStage;
86 
87 #define PETSC_EVENT  1311311
88 PETSC_EXTERN PetscLogEvent PETSC_LARGEST_EVENT;
89 
90 /* Global flop counter */
91 PETSC_EXTERN PetscLogDouble petsc_TotalFlops;
92 PETSC_EXTERN PetscLogDouble petsc_tmp_flops;
93 
94 /* Global GPU counters */
95 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
96 PETSC_EXTERN PetscLogDouble petsc_ctog_ct;
97 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct;
98 PETSC_EXTERN PetscLogDouble petsc_ctog_sz;
99 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz;
100 PETSC_EXTERN PetscLogDouble petsc_gflops;
101 PETSC_EXTERN PetscLogDouble petsc_gtime;
102 #endif
103 
104 /* We must make the following structures available to access the event
105      activation flags in the PetscLogEventBegin/End() macros. These are not part of the PETSc public
106      API and are not intended to be used by other parts of PETSc or by users.
107 
108      The code that manipulates these structures is in src/sys/logging/utils.
109 */
110 typedef struct _n_PetscIntStack *PetscIntStack;
111 
112 /* -----------------------------------------------------------------------------------------------------*/
113 /*
114     PetscClassRegInfo, PetscClassPerfInfo - Each class has two data structures associated with it. The first has
115        static information about it, the second collects statistics on how many objects of the class are created,
116        how much memory they use, etc.
117 
118     PetscClassRegLog, PetscClassPerfLog - arrays of the PetscClassRegInfo and PetscClassPerfInfo for all classes.
119 */
120 typedef struct  {
121   char           *name;   /* The class name */
122   PetscClassId   classid; /* The integer identifying this class */
123 } PetscClassRegInfo;
124 
125 typedef struct {
126   PetscClassId   id;           /* The integer identifying this class */
127   int            creations;    /* The number of objects of this class created */
128   int            destructions; /* The number of objects of this class destroyed */
129   PetscLogDouble mem;          /* The total memory allocated by objects of this class */
130   PetscLogDouble descMem;      /* The total memory allocated by descendents of these objects */
131 } PetscClassPerfInfo;
132 
133 typedef struct _n_PetscClassRegLog *PetscClassRegLog;
134 struct _n_PetscClassRegLog {
135   int               numClasses; /* The number of classes registered */
136   int               maxClasses; /* The maximum number of classes */
137   PetscClassRegInfo *classInfo; /* The structure for class information (classids are monotonicly increasing) */
138 };
139 
140 typedef struct _n_PetscClassPerfLog *PetscClassPerfLog;
141 struct _n_PetscClassPerfLog {
142   int                numClasses; /* The number of logging classes */
143   int                maxClasses; /* The maximum number of classes */
144   PetscClassPerfInfo *classInfo; /* The structure for class information (classids are monotonicly increasing) */
145 };
146 /* -----------------------------------------------------------------------------------------------------*/
147 /*
148     PetscEventRegInfo, PetscEventPerfInfo - Each event has two data structures associated with it. The first has
149        static information about it, the second collects statistics on how many times the event is used, how
150        much time it takes, etc.
151 
152     PetscEventRegLog, PetscEventPerfLog - an array of all PetscEventRegInfo and PetscEventPerfInfo for all events. There is one
153       of these for each stage.
154 
155 */
156 typedef struct {
157   char         *name;         /* The name of this event */
158   PetscClassId classid;       /* The class the event is associated with */
159   PetscBool    collective;    /* Flag this event as collective */
160 #if defined (PETSC_HAVE_MPE)
161   int          mpe_id_begin;  /* MPE IDs that define the event */
162   int          mpe_id_end;
163 #endif
164 } PetscEventRegInfo;
165 
166 typedef struct {
167   int            id;            /* The integer identifying this event */
168   PetscBool      active;        /* The flag to activate logging */
169   PetscBool      visible;       /* The flag to print info in summary */
170   int            depth;         /* The nesting depth of the event call */
171   int            count;         /* The number of times this event was executed */
172   PetscLogDouble flops, flops2, flopsTmp; /* The flops and flops^2 used in this event */
173   PetscLogDouble time, time2, timeTmp;    /* The time and time^2 taken for this event */
174   PetscLogDouble syncTime;                /* The synchronization barrier time */
175   PetscLogDouble dof[8];        /* The number of degrees of freedom associated with this event */
176   PetscLogDouble errors[8];     /* The errors (user-defined) associated with this event */
177   PetscLogDouble numMessages;   /* The number of messages in this event */
178   PetscLogDouble messageLength; /* The total message lengths in this event */
179   PetscLogDouble numReductions; /* The number of reductions in this event */
180   PetscLogDouble memIncrease;   /* How much the resident memory has increased in this event */
181   PetscLogDouble mallocIncrease;/* How much the maximum malloced space has increased in this event */
182   PetscLogDouble mallocSpace;   /* How much the space was malloced and kept during this event */
183   PetscLogDouble mallocIncreaseEvent;  /* Maximum of the high water mark with in event minus memory available at the end of the event */
184   #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
185   PetscLogDouble CpuToGpuCount; /* The total number of CPU to GPU copies */
186   PetscLogDouble GpuToCpuCount; /* The total number of GPU to CPU copies */
187   PetscLogDouble CpuToGpuSize;  /* The total size of CPU to GPU copies */
188   PetscLogDouble GpuToCpuSize;  /* The total size of GPU to CPU copies */
189   PetscLogDouble GpuFlops;      /* The flops done on a GPU in this event */
190   PetscLogDouble GpuTime;       /* The time spent on a GPU in this event */
191   #endif
192 } PetscEventPerfInfo;
193 
194 typedef struct _n_PetscEventRegLog *PetscEventRegLog;
195 struct _n_PetscEventRegLog {
196   int               numEvents;  /* The number of registered events */
197   int               maxEvents;  /* The maximum number of events */
198   PetscEventRegInfo *eventInfo; /* The registration information for each event */
199 };
200 
201 typedef struct _n_PetscEventPerfLog *PetscEventPerfLog;
202 struct _n_PetscEventPerfLog {
203   int                numEvents;  /* The number of logging events */
204   int                maxEvents;  /* The maximum number of events */
205   PetscEventPerfInfo *eventInfo; /* The performance information for each event */
206 };
207 /* ------------------------------------------------------------------------------------------------------------*/
208 /*
209    PetscStageInfo - Contains all the information about a particular stage.
210 
211    PetscStageLog - An array of PetscStageInfo for each registered stage. There is a single one of these in the code.
212 */
213 typedef struct _PetscStageInfo {
214   char               *name;     /* The stage name */
215   PetscBool          used;      /* The stage was pushed on this processor */
216   PetscEventPerfInfo perfInfo;  /* The stage performance information */
217   PetscEventPerfLog  eventLog;  /* The event information for this stage */
218   PetscClassPerfLog  classLog;  /* The class information for this stage */
219 } PetscStageInfo;
220 
221 typedef struct _n_PetscStageLog *PetscStageLog;
222 struct _n_PetscStageLog {
223   int              numStages;   /* The number of registered stages */
224   int              maxStages;   /* The maximum number of stages */
225   PetscIntStack    stack;       /* The stack for active stages */
226   int              curStage;    /* The current stage (only used in macros so we don't call PetscIntStackTop) */
227   PetscStageInfo   *stageInfo;  /* The information for each stage */
228   PetscEventRegLog eventLog;    /* The registered events */
229   PetscClassRegLog classLog;    /* The registered classes */
230 };
231 /* -----------------------------------------------------------------------------------------------------*/
232 
233 PETSC_EXTERN PetscErrorCode PetscLogObjectParent(PetscObject,PetscObject);
234 PETSC_EXTERN PetscErrorCode PetscLogObjectMemory(PetscObject,PetscLogDouble);
235 
236 #if defined(PETSC_USE_LOG)  /* --- Logging is turned on --------------------------------*/
237 PETSC_EXTERN PetscStageLog petsc_stageLog;
238 PETSC_EXTERN PetscErrorCode PetscLogGetStageLog(PetscStageLog*);
239 PETSC_EXTERN PetscErrorCode PetscStageLogGetCurrent(PetscStageLog,int*);
240 PETSC_EXTERN PetscErrorCode PetscStageLogGetEventPerfLog(PetscStageLog,int,PetscEventPerfLog*);
241 
242 /*
243    Flop counting:  We count each arithmetic operation (e.g., addition, multiplication) separately.
244 
245    For the complex numbers version, note that
246        1 complex addition = 2 flops
247        1 complex multiplication = 6 flops,
248    where we define 1 flop as that for a double precision scalar.  We roughly approximate
249    flop counting for complex numbers by multiplying the total flops by 4; this corresponds
250    to the assumption that we're counting mostly additions and multiplications -- and
251    roughly the same number of each.  More accurate counting could be done by distinguishing
252    among the various arithmetic operations.
253  */
254 
255 #if defined(PETSC_USE_COMPLEX)
256 #define PETSC_FLOPS_PER_OP 4.0
257 #else
258 #define PETSC_FLOPS_PER_OP 1.0
259 #endif
260 
261 PETSC_STATIC_INLINE PetscErrorCode PetscLogFlops(PetscLogDouble n)
262 {
263   PetscFunctionBegin;
264 #if defined(PETSC_USE_DEBUG)
265   if (n < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Cannot log negative flops");
266 #endif
267   petsc_TotalFlops += PETSC_FLOPS_PER_OP*n;
268   PetscFunctionReturn(0);
269 }
270 
271 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
272 PETSC_STATIC_INLINE PetscErrorCode PetscLogCpuToGpu(PetscLogDouble size)
273 {
274   PetscFunctionBegin;
275   petsc_ctog_ct += 1;
276   petsc_ctog_sz += size;
277   PetscFunctionReturn(0);
278 }
279 
280 PETSC_STATIC_INLINE PetscErrorCode PetscLogGpuToCpu(PetscLogDouble size)
281 {
282   PetscFunctionBegin;
283   petsc_gtoc_ct += 1;
284   petsc_gtoc_sz += size;
285   PetscFunctionReturn(0);
286 }
287 
288 PETSC_STATIC_INLINE PetscErrorCode PetscLogGpuFlops(PetscLogDouble n)
289 {
290   PetscFunctionBegin;
291 #if defined(PETSC_USE_DEBUG)
292   if (n < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Cannot log negative flops");
293 #endif
294   petsc_TotalFlops += PETSC_FLOPS_PER_OP*n;
295   petsc_gflops += PETSC_FLOPS_PER_OP*n;
296   PetscFunctionReturn(0);
297 }
298 
299 PETSC_STATIC_INLINE PetscErrorCode PetscLogGpuTimeBegin()
300 {
301   PetscErrorCode ierr;
302   PetscFunctionBegin;
303   ierr = PetscTimeSubtract(&petsc_gtime);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 PETSC_STATIC_INLINE PetscErrorCode PetscLogGpuTimeEnd()
308 {
309   PetscErrorCode ierr;
310   PetscFunctionBegin;
311   ierr = PetscTimeAdd(&petsc_gtime);CHKERRQ(ierr);
312   PetscFunctionReturn(0);
313 }
314 
315 PETSC_STATIC_INLINE PetscErrorCode PetscLogGpuTimeAdd(PetscLogDouble t)
316 {
317   PetscFunctionBegin;
318   petsc_gtime += t;
319   PetscFunctionReturn(0);
320 }
321 #endif
322 
323 PETSC_EXTERN PetscErrorCode PetscGetFlops(PetscLogDouble *);
324 
325 #if defined (PETSC_HAVE_MPE)
326 PETSC_EXTERN PetscErrorCode PetscLogMPEBegin(void);
327 PETSC_EXTERN PetscErrorCode PetscLogMPEDump(const char[]);
328 #endif
329 
330 PETSC_EXTERN PetscErrorCode (*PetscLogPLB)(PetscLogEvent,int,PetscObject,PetscObject,PetscObject,PetscObject);
331 PETSC_EXTERN PetscErrorCode (*PetscLogPLE)(PetscLogEvent,int,PetscObject,PetscObject,PetscObject,PetscObject);
332 PETSC_EXTERN PetscErrorCode (*PetscLogPHC)(PetscObject);
333 PETSC_EXTERN PetscErrorCode (*PetscLogPHD)(PetscObject);
334 
335 #define PetscLogObjectParents(p,n,d)  0;do{int _i; for (_i=0; _i<(n); _i++) {ierr = PetscLogObjectParent((PetscObject)(p),(PetscObject)(d)[_i]);CHKERRQ(ierr);}}while(0)
336 #define PetscLogObjectCreate(h)      ((PetscLogPHC) ? (*PetscLogPHC)((PetscObject)(h)) : 0)
337 #define PetscLogObjectDestroy(h)     ((PetscLogPHD) ? (*PetscLogPHD)((PetscObject)(h)) : 0)
338 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...);
339 
340 /* Initialization functions */
341 PETSC_EXTERN PetscErrorCode PetscLogDefaultBegin(void);
342 PETSC_EXTERN PetscErrorCode PetscLogAllBegin(void);
343 PETSC_EXTERN PetscErrorCode PetscLogNestedBegin(void);
344 PETSC_EXTERN PetscErrorCode PetscLogTraceBegin(FILE *);
345 PETSC_EXTERN PetscErrorCode PetscLogActions(PetscBool);
346 PETSC_EXTERN PetscErrorCode PetscLogObjects(PetscBool);
347 PETSC_EXTERN PetscErrorCode PetscLogSetThreshold(PetscLogDouble,PetscLogDouble*);
348 PETSC_EXTERN PetscErrorCode PetscLogSet(PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject),
349                                         PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject));
350 
351 /* Output functions */
352 PETSC_EXTERN PetscErrorCode PetscLogView(PetscViewer);
353 PETSC_EXTERN PetscErrorCode PetscLogViewFromOptions(void);
354 PETSC_EXTERN PetscErrorCode PetscLogDump(const char[]);
355 
356 /* Stage functions */
357 PETSC_EXTERN PetscErrorCode PetscLogStageRegister(const char[],PetscLogStage*);
358 PETSC_EXTERN PetscErrorCode PetscLogStagePush(PetscLogStage);
359 PETSC_EXTERN PetscErrorCode PetscLogStagePop(void);
360 PETSC_EXTERN PetscErrorCode PetscLogStageSetActive(PetscLogStage,PetscBool);
361 PETSC_EXTERN PetscErrorCode PetscLogStageGetActive(PetscLogStage,PetscBool*);
362 PETSC_EXTERN PetscErrorCode PetscLogStageSetVisible(PetscLogStage,PetscBool);
363 PETSC_EXTERN PetscErrorCode PetscLogStageGetVisible(PetscLogStage,PetscBool*);
364 PETSC_EXTERN PetscErrorCode PetscLogStageGetId(const char[],PetscLogStage*);
365 
366 /* Event functions */
367 PETSC_EXTERN PetscErrorCode PetscLogEventRegister(const char[],PetscClassId,PetscLogEvent*);
368 PETSC_EXTERN PetscErrorCode PetscLogEventSetCollective(PetscLogEvent,PetscBool);
369 PETSC_EXTERN PetscErrorCode PetscLogEventIncludeClass(PetscClassId);
370 PETSC_EXTERN PetscErrorCode PetscLogEventExcludeClass(PetscClassId);
371 PETSC_EXTERN PetscErrorCode PetscLogEventActivate(PetscLogEvent);
372 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivate(PetscLogEvent);
373 PETSC_EXTERN PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent,PetscBool);
374 PETSC_EXTERN PetscErrorCode PetscLogEventActivateClass(PetscClassId);
375 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivateClass(PetscClassId);
376 PETSC_EXTERN PetscErrorCode PetscLogEventGetId(const char[],PetscLogEvent*);
377 PETSC_EXTERN PetscErrorCode PetscLogEventGetPerfInfo(int,PetscLogEvent,PetscEventPerfInfo*);
378 PETSC_EXTERN PetscErrorCode PetscLogEventSetDof(PetscLogEvent, PetscInt, PetscLogDouble);
379 PETSC_EXTERN PetscErrorCode PetscLogEventSetError(PetscLogEvent, PetscInt, PetscLogDouble);
380 
381 /* Global counters */
382 PETSC_EXTERN PetscLogDouble petsc_irecv_ct;
383 PETSC_EXTERN PetscLogDouble petsc_isend_ct;
384 PETSC_EXTERN PetscLogDouble petsc_recv_ct;
385 PETSC_EXTERN PetscLogDouble petsc_send_ct;
386 PETSC_EXTERN PetscLogDouble petsc_irecv_len;
387 PETSC_EXTERN PetscLogDouble petsc_isend_len;
388 PETSC_EXTERN PetscLogDouble petsc_recv_len;
389 PETSC_EXTERN PetscLogDouble petsc_send_len;
390 PETSC_EXTERN PetscLogDouble petsc_allreduce_ct;
391 PETSC_EXTERN PetscLogDouble petsc_gather_ct;
392 PETSC_EXTERN PetscLogDouble petsc_scatter_ct;
393 PETSC_EXTERN PetscLogDouble petsc_wait_ct;
394 PETSC_EXTERN PetscLogDouble petsc_wait_any_ct;
395 PETSC_EXTERN PetscLogDouble petsc_wait_all_ct;
396 PETSC_EXTERN PetscLogDouble petsc_sum_of_waits_ct;
397 
398 PETSC_EXTERN PetscBool      PetscLogMemory;
399 
400 PETSC_EXTERN PetscBool PetscLogSyncOn;  /* true if logging synchronization is enabled */
401 PETSC_EXTERN PetscErrorCode PetscLogEventSynchronize(PetscLogEvent, MPI_Comm);
402 
403 #define PetscLogEventSync(e,comm) \
404   (((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? \
405     PetscLogEventSynchronize((e),(comm)) : 0 ))
406 
407 #define PetscLogEventBegin(e,o1,o2,o3,o4) \
408   (((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? \
409     (*PetscLogPLB)((e),0,(PetscObject)(o1),(PetscObject)(o2),(PetscObject)(o3),(PetscObject)(o4)) : 0 ))
410 
411 #define PetscLogEventEnd(e,o1,o2,o3,o4) \
412   (((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? \
413     (*PetscLogPLE)((e),0,(PetscObject)(o1),(PetscObject)(o2),(PetscObject)(o3),(PetscObject)(o4)) : 0 ))
414 
415 PETSC_EXTERN PetscErrorCode PetscLogEventGetFlops(PetscLogEvent,PetscLogDouble*);
416 PETSC_EXTERN PetscErrorCode PetscLogEventZeroFlops(PetscLogEvent);
417 
418 /*
419      These are used internally in the PETSc routines to keep a count of MPI messages and
420    their sizes.
421 
422      This does not work for MPI-Uni because our include/petsc/mpiuni/mpi.h file
423    uses macros to defined the MPI operations.
424 
425      It does not work correctly from HP-UX because it processes the
426    macros in a way that sometimes it double counts, hence
427    PETSC_HAVE_BROKEN_RECURSIVE_MACRO
428 
429      It does not work with Windows because winmpich lacks MPI_Type_size()
430 */
431 #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined (PETSC_HAVE_MPI_MISSING_TYPESIZE)
432 /*
433    Logging of MPI activities
434 */
435 PETSC_STATIC_INLINE PetscErrorCode PetscMPITypeSize(PetscInt count,MPI_Datatype type,PetscLogDouble *length)
436 {
437   PetscMPIInt    typesize;
438   PetscErrorCode ierr;
439   if (type == MPI_DATATYPE_NULL) return 0;
440   ierr     = MPI_Type_size(type,&typesize);CHKERRQ(ierr);
441   *length += (PetscLogDouble) (count*typesize);
442   return 0;
443 }
444 
445 PETSC_STATIC_INLINE PetscErrorCode PetscMPITypeSizeComm(MPI_Comm comm,const PetscMPIInt *counts,MPI_Datatype type,PetscLogDouble *length)
446 {
447   PetscMPIInt    typesize,size,p;
448   PetscErrorCode ierr;
449 
450   if (type == MPI_DATATYPE_NULL) return 0;
451   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
452   ierr = MPI_Type_size(type,&typesize);CHKERRQ(ierr);
453   for (p=0; p<size; ++p) {
454     *length += (PetscLogDouble) (counts[p]*typesize);
455   }
456   return 0;
457 }
458 
459 PETSC_STATIC_INLINE PetscErrorCode PetscMPITypeSizeCount(PetscInt n,const PetscMPIInt *counts,MPI_Datatype type,PetscLogDouble *length)
460 {
461   PetscMPIInt    typesize,p;
462   PetscErrorCode ierr;
463 
464   if (type == MPI_DATATYPE_NULL) return 0;
465   ierr = MPI_Type_size(type,&typesize);CHKERRQ(ierr);
466   for (p=0; p<n; ++p) {
467     *length += (PetscLogDouble) (counts[p]*typesize);
468   }
469   return 0;
470 }
471 
472 /*
473     Returns 1 if the communicator is parallel else zero
474 */
475 PETSC_STATIC_INLINE int PetscMPIParallelComm(MPI_Comm comm)
476 {
477   PetscMPIInt size; MPI_Comm_size(comm,&size); return size > 1;
478 }
479 
480 #define MPI_Irecv(buf,count,datatype,source,tag,comm,request) \
481   ((petsc_irecv_ct++,0) || PetscMPITypeSize((count),(datatype),&(petsc_irecv_len)) || MPI_Irecv((buf),(count),(datatype),(source),(tag),(comm),(request)))
482 
483 #define MPI_Isend(buf,count,datatype,dest,tag,comm,request) \
484   ((petsc_isend_ct++,0) || PetscMPITypeSize((count),(datatype),&(petsc_isend_len)) || MPI_Isend((buf),(count),(datatype),(dest),(tag),(comm),(request)))
485 
486 #define MPI_Startall_irecv(count,datatype,number,requests) \
487   ((petsc_irecv_ct += (PetscLogDouble)(number),0) || PetscMPITypeSize((count),(datatype),&(petsc_irecv_len)) || ((number) && MPI_Startall((number),(requests))))
488 
489 #define MPI_Startall_isend(count,datatype,number,requests) \
490   ((petsc_isend_ct += (PetscLogDouble)(number),0) || PetscMPITypeSize((count),(datatype),&(petsc_isend_len)) || ((number) && MPI_Startall((number),(requests))))
491 
492 #define MPI_Start_isend(count,datatype,requests) \
493   ((petsc_isend_ct++,0) || PetscMPITypeSize((count),(datatype),(&petsc_isend_len)) || MPI_Start((requests)))
494 
495 #define MPI_Recv(buf,count,datatype,source,tag,comm,status) \
496   ((petsc_recv_ct++,0) || PetscMPITypeSize((count),(datatype),(&petsc_recv_len)) || MPI_Recv((buf),(count),(datatype),(source),(tag),(comm),(status)))
497 
498 #define MPI_Send(buf,count,datatype,dest,tag,comm) \
499   ((petsc_send_ct++,0) || PetscMPITypeSize((count),(datatype),(&petsc_send_len)) || MPI_Send((buf),(count),(datatype),(dest),(tag),(comm)))
500 
501 #define MPI_Wait(request,status) \
502   ((petsc_wait_ct++,petsc_sum_of_waits_ct++,0) || MPI_Wait((request),(status)))
503 
504 #define MPI_Waitany(a,b,c,d) \
505   ((petsc_wait_any_ct++,petsc_sum_of_waits_ct++,0) || MPI_Waitany((a),(b),(c),(d)))
506 
507 #define MPI_Waitall(count,array_of_requests,array_of_statuses) \
508   ((petsc_wait_all_ct++,petsc_sum_of_waits_ct += (PetscLogDouble) (count),0) || MPI_Waitall((count),(array_of_requests),(array_of_statuses)))
509 
510 #define MPI_Allreduce(sendbuf,recvbuf,count,datatype,op,comm) \
511   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || MPI_Allreduce((sendbuf),(recvbuf),(count),(datatype),(op),(comm)))
512 
513 #define MPI_Bcast(buffer,count,datatype,root,comm) \
514   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || MPI_Bcast((buffer),(count),(datatype),(root),(comm)))
515 
516 #define MPI_Reduce_scatter_block(sendbuf,recvbuf,recvcount,datatype,op,comm) \
517   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || MPI_Reduce_scatter_block((sendbuf),(recvbuf),(recvcount),(datatype),(op),(comm)))
518 
519 #define MPI_Alltoall(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm) \
520   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || PetscMPITypeSize((sendcount),(sendtype),(&petsc_send_len)) || MPI_Alltoall((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(comm)))
521 
522 #define MPI_Alltoallv(sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm) \
523   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || PetscMPITypeSizeComm((comm),(sendcnts),(sendtype),(&petsc_send_len)) || MPI_Alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm)))
524 
525 #define MPI_Allgather(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm) \
526   ((petsc_gather_ct += PetscMPIParallelComm((comm)),0) || MPI_Allgather((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(comm)))
527 
528 #define MPI_Allgatherv(sendbuf,sendcount,sendtype,recvbuf,recvcount,displs,recvtype,comm) \
529   ((petsc_gather_ct += PetscMPIParallelComm((comm)),0) || MPI_Allgatherv((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(displs),(recvtype),(comm)))
530 
531 #define MPI_Gather(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,root,comm) \
532   ((petsc_gather_ct++,0) || PetscMPITypeSize((sendcount),(sendtype),(&petsc_send_len)) || MPI_Gather((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(root),(comm)))
533 
534 #define MPI_Gatherv(sendbuf,sendcount,sendtype,recvbuf,recvcount,displs,recvtype,root,comm) \
535   ((petsc_gather_ct++,0) || PetscMPITypeSize((sendcount),(sendtype),(&petsc_send_len)) || MPI_Gatherv((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(displs),(recvtype),(root),(comm)))
536 
537 #define MPI_Scatter(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,root,comm) \
538   ((petsc_scatter_ct++,0) || PetscMPITypeSize((recvcount),(recvtype),(&petsc_recv_len)) || MPI_Scatter((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(root),(comm)))
539 
540 #define MPI_Scatterv(sendbuf,sendcount,displs,sendtype,recvbuf,recvcount,recvtype,root,comm) \
541   ((petsc_scatter_ct++,0) || PetscMPITypeSize((recvcount),(recvtype),(&petsc_recv_len)) || MPI_Scatterv((sendbuf),(sendcount),(displs),(sendtype),(recvbuf),(recvcount),(recvtype),(root),(comm)))
542 
543 #define MPI_Ialltoall(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm,request) \
544   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || PetscMPITypeSize((sendcount),(sendtype),(&petsc_send_len)) || MPI_Ialltoall((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(comm),(request)))
545 
546 #define MPI_Ialltoallv(sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm,request) \
547   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || PetscMPITypeSizeComm((comm),(sendcnts),(sendtype),(&petsc_send_len)) || MPI_Ialltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm),(request)))
548 
549 #define MPI_Iallgather(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm,request) \
550   ((petsc_gather_ct += PetscMPIParallelComm((comm)),0) || MPI_Iallgather((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(comm),(request)))
551 
552 #define MPI_Iallgatherv(sendbuf,sendcount,sendtype,recvbuf,recvcount,displs,recvtype,comm,request) \
553   ((petsc_gather_ct += PetscMPIParallelComm((comm)),0) || MPI_Iallgatherv((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(displs),(recvtype),(comm),(request)))
554 
555 #define MPI_Igather(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,root,comm,request) \
556   ((petsc_gather_ct++,0) || PetscMPITypeSize((sendcount),(sendtype),(&petsc_send_len)) || MPI_Igather((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(root),(comm),(request)))
557 
558 #define MPI_Igatherv(sendbuf,sendcount,sendtype,recvbuf,recvcount,displs,recvtype,root,comm,request) \
559   ((petsc_gather_ct++,0) || PetscMPITypeSize((sendcount),(sendtype),(&petsc_send_len)) || MPI_Igatherv((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(displs),(recvtype),(root),(comm),(request)))
560 
561 #define MPI_Iscatter(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,root,comm,request) \
562   ((petsc_scatter_ct++,0) || PetscMPITypeSize((recvcount),(recvtype),(&petsc_recv_len)) || MPI_Iscatter((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(root),(comm),(request)))
563 
564 #define MPI_Iscatterv(sendbuf,sendcount,displs,sendtype,recvbuf,recvcount,recvtype,root,comm,request) \
565   ((petsc_scatter_ct++,0) || PetscMPITypeSize((recvcount),(recvtype),(&petsc_recv_len)) || MPI_Iscatterv((sendbuf),(sendcount),(displs),(sendtype),(recvbuf),(recvcount),(recvtype),(root),(comm),(request)))
566 
567 /* We treat MPI_Ineighbor_alltoallv as a set of isend/irecv instead of a traditional MPI collective.
568    OpenMPI-3.0 ran into error with outdegree = indegree = 0, so we use ((outdegree) || (indegree)) as a workaround.
569  */
570 #define MPI_Start_ineighbor_alltoallv(outdegree,indegree,sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm,request) \
571   ((petsc_isend_ct += (PetscLogDouble)(outdegree),0) || (petsc_irecv_ct += (PetscLogDouble)(indegree),0) || PetscMPITypeSizeCount((outdegree),(sendcnts),(sendtype),(&petsc_isend_len)) || PetscMPITypeSizeCount((indegree),(recvcnts),(recvtype),(&petsc_irecv_len)) || (((outdegree) || (indegree)) && MPI_Ineighbor_alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm),(request))))
572 
573 #define MPI_Start_neighbor_alltoallv(outdegree,indegree,sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm) \
574   ((petsc_isend_ct += (PetscLogDouble)(outdegree),0) || (petsc_irecv_ct += (PetscLogDouble)(indegree),0) || PetscMPITypeSizeCount((outdegree),(sendcnts),(sendtype),(&petsc_isend_len)) || PetscMPITypeSizeCount((indegree),(recvcnts),(recvtype),(&petsc_irecv_len)) || (((outdegree) || (indegree)) && MPI_Neighbor_alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm))))
575 
576 #else
577 
578 #define MPI_Startall_irecv(count,datatype,number,requests) \
579   ((number) && MPI_Startall((number),(requests)))
580 
581 #define MPI_Startall_isend(count,datatype,number,requests) \
582   ((number) && MPI_Startall((number),(requests)))
583 
584 #define MPI_Start_isend(count,datatype,requests) \
585   (MPI_Start((requests)))
586 
587 #define MPI_Start_ineighbor_alltoallv(outdegree,indegree,sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm,request) \
588   (((outdegree) || (indegree)) && MPI_Ineighbor_alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm),(request)))
589 
590 #define MPI_Start_neighbor_alltoallv(outdegree,indegree,sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm) \
591   (((outdegree) || (indegree)) && MPI_Neighbor_alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm)))
592 #endif /* !MPIUNI_H && ! PETSC_HAVE_BROKEN_RECURSIVE_MACRO */
593 
594 #else  /* ---Logging is turned off --------------------------------------------*/
595 
596 #define PetscLogMemory                     PETSC_FALSE
597 
598 #define PetscLogFlops(n)                   0
599 #define PetscGetFlops(a)                   (*(a) = 0.0,0)
600 
601 #define PetscLogStageRegister(a,b)         0
602 #define PetscLogStagePush(a)               0
603 #define PetscLogStagePop()                 0
604 #define PetscLogStageSetActive(a,b)        0
605 #define PetscLogStageGetActive(a,b)        0
606 #define PetscLogStageGetVisible(a,b)       0
607 #define PetscLogStageSetVisible(a,b)       0
608 #define PetscLogStageGetId(a,b)            (*(b)=0,0)
609 
610 #define PetscLogEventRegister(a,b,c)       0
611 #define PetscLogEventSetCollective(a,b)    0
612 #define PetscLogEventIncludeClass(a)       0
613 #define PetscLogEventExcludeClass(a)       0
614 #define PetscLogEventActivate(a)           0
615 #define PetscLogEventDeactivate(a)         0
616 #define PetscLogEventActivateClass(a)      0
617 #define PetscLogEventDeactivateClass(a)    0
618 #define PetscLogEventSetActiveAll(a,b)     0
619 #define PetscLogEventGetId(a,b)            (*(b)=0,0)
620 #define PetscLogEventGetPerfInfo(a,b,c)    0
621 #define PetscLogEventSetDof(a,b,c)         0
622 #define PetscLogEventSetError(a,b,c)       0
623 
624 #define PetscLogPLB                        0
625 #define PetscLogPLE                        0
626 #define PetscLogPHC                        0
627 #define PetscLogPHD                        0
628 
629 #define PetscLogObjectParents(p,n,c)       0
630 #define PetscLogObjectCreate(h)            0
631 #define PetscLogObjectDestroy(h)           0
632 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject,const char[],...);
633 
634 #define PetscLogDefaultBegin()             0
635 #define PetscLogAllBegin()                 0
636 #define PetscLogNestedBegin()              0
637 #define PetscLogTraceBegin(file)           0
638 #define PetscLogActions(a)                 0
639 #define PetscLogObjects(a)                 0
640 #define PetscLogSetThreshold(a,b)          0
641 #define PetscLogSet(lb,le)                 0
642 
643 #define PetscLogView(viewer)               0
644 #define PetscLogViewFromOptions()          0
645 #define PetscLogDump(c)                    0
646 
647 #define PetscLogEventSync(e,comm)          0
648 #define PetscLogEventBegin(e,o1,o2,o3,o4)  0
649 #define PetscLogEventEnd(e,o1,o2,o3,o4)    0
650 
651 #define PetscLogCpuToGpu(a)                0
652 #define PetscLogGpuToCpu(a)                0
653 #define PetscLogGpuFlops(a)                0
654 #define PetscLogGpuTimeBegin()             0
655 #define PetscLogGpuTimeEnd()               0
656 #define PetscLogGpuTimeAdd(a)              0
657 
658 /* If PETSC_USE_LOG is NOT defined, these still need to be! */
659 #define MPI_Startall_irecv(count,datatype,number,requests) ((number) && MPI_Startall(number,requests))
660 #define MPI_Startall_isend(count,datatype,number,requests) ((number) && MPI_Startall(number,requests))
661 #define MPI_Start_isend(count,datatype,requests)           MPI_Start(requests)
662 #define MPI_Start_ineighbor_alltoallv(outdegree,indegree,sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm,request) \
663   (((outdegree) || (indegree)) && MPI_Ineighbor_alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm),(request)))
664 #define MPI_Start_neighbor_alltoallv(outdegree,indegree,sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm) \
665   (((outdegree) || (indegree)) && MPI_Neighbor_alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm)))
666 
667 #endif   /* PETSC_USE_LOG */
668 
669 #define PetscPreLoadBegin(flag,name) \
670 do {\
671   PetscBool      PetscPreLoading = flag;\
672   int            PetscPreLoadMax,PetscPreLoadIt;\
673   PetscLogStage  _stageNum;\
674   PetscErrorCode _3_ierr; \
675   _3_ierr = PetscOptionsGetBool(NULL,NULL,"-preload",&PetscPreLoading,NULL);CHKERRQ(_3_ierr); \
676   PetscPreLoadMax = (int)(PetscPreLoading);\
677   PetscPreLoadingUsed = PetscPreLoading ? PETSC_TRUE : PetscPreLoadingUsed;\
678   for (PetscPreLoadIt=0; PetscPreLoadIt<=PetscPreLoadMax; PetscPreLoadIt++) {\
679     PetscPreLoadingOn = PetscPreLoading;\
680     _3_ierr = PetscBarrier(NULL);CHKERRQ(_3_ierr);\
681     if (PetscPreLoadIt>0) {\
682       _3_ierr = PetscLogStageGetId(name,&_stageNum);CHKERRQ(_3_ierr);\
683     } else {\
684       _3_ierr = PetscLogStageRegister(name,&_stageNum);CHKERRQ(_3_ierr); \
685     }\
686     _3_ierr = PetscLogStageSetActive(_stageNum,(PetscBool)(!PetscPreLoadMax || PetscPreLoadIt));\
687     _3_ierr = PetscLogStagePush(_stageNum);CHKERRQ(_3_ierr);
688 
689 #define PetscPreLoadEnd() \
690     _3_ierr = PetscLogStagePop();CHKERRQ(_3_ierr);\
691     PetscPreLoading = PETSC_FALSE;\
692   }\
693 } while (0)
694 
695 #define PetscPreLoadStage(name) do {                                         \
696     _3_ierr = PetscLogStagePop();CHKERRQ(_3_ierr);                      \
697     if (PetscPreLoadIt>0) {                                                  \
698       _3_ierr = PetscLogStageGetId(name,&_stageNum);CHKERRQ(_3_ierr);   \
699     } else {                                                            \
700       _3_ierr = PetscLogStageRegister(name,&_stageNum);CHKERRQ(_3_ierr); \
701     }                                                                   \
702     _3_ierr = PetscLogStageSetActive(_stageNum,(PetscBool)(!PetscPreLoadMax || PetscPreLoadIt)); \
703     _3_ierr = PetscLogStagePush(_stageNum);CHKERRQ(_3_ierr);            \
704   } while (0)
705 
706 /* some vars for logging */
707 PETSC_EXTERN PetscBool PetscPreLoadingUsed;       /* true if we are or have done preloading */
708 PETSC_EXTERN PetscBool PetscPreLoadingOn;         /* true if we are currently in a preloading calculation */
709 
710 #endif
711