xref: /petsc/include/petsclog.h (revision ef0bb6c736604ce380bf8bea4ebd4a7bda431d97)
1 /*
2     Defines profile/logging in PETSc.
3 */
4 
5 #if !defined(PETSCLOG_H)
6 #define PETSCLOG_H
7 #include <petscsys.h>
8 #include <petsctime.h>
9 
10 /* General logging of information; different from event logging */
11 PETSC_EXTERN PetscErrorCode PetscInfo_Private(const char[],void*,const char[],...);
12 #if defined(PETSC_USE_INFO)
13 #define PetscInfo(A,S)                       PetscInfo_Private(PETSC_FUNCTION_NAME,A,S)
14 #define PetscInfo1(A,S,a1)                   PetscInfo_Private(PETSC_FUNCTION_NAME,A,S,a1)
15 #define PetscInfo2(A,S,a1,a2)                PetscInfo_Private(PETSC_FUNCTION_NAME,A,S,a1,a2)
16 #define PetscInfo3(A,S,a1,a2,a3)             PetscInfo_Private(PETSC_FUNCTION_NAME,A,S,a1,a2,a3)
17 #define PetscInfo4(A,S,a1,a2,a3,a4)          PetscInfo_Private(PETSC_FUNCTION_NAME,A,S,a1,a2,a3,a4)
18 #define PetscInfo5(A,S,a1,a2,a3,a4,a5)       PetscInfo_Private(PETSC_FUNCTION_NAME,A,S,a1,a2,a3,a4,a5)
19 #define PetscInfo6(A,S,a1,a2,a3,a4,a5,a6)    PetscInfo_Private(PETSC_FUNCTION_NAME,A,S,a1,a2,a3,a4,a5,a6)
20 #define PetscInfo7(A,S,a1,a2,a3,a4,a5,a6,a7) PetscInfo_Private(PETSC_FUNCTION_NAME,A,S,a1,a2,a3,a4,a5,a6,a7)
21 #else
22 #define PetscInfo(A,S)                       0
23 #define PetscInfo1(A,S,a1)                   0
24 #define PetscInfo2(A,S,a1,a2)                0
25 #define PetscInfo3(A,S,a1,a2,a3)             0
26 #define PetscInfo4(A,S,a1,a2,a3,a4)          0
27 #define PetscInfo5(A,S,a1,a2,a3,a4,a5)       0
28 #define PetscInfo6(A,S,a1,a2,a3,a4,a5,a6)    0
29 #define PetscInfo7(A,S,a1,a2,a3,a4,a5,a6,a7) 0
30 #endif
31 PETSC_EXTERN PetscErrorCode PetscInfoDeactivateClass(PetscClassId);
32 PETSC_EXTERN PetscErrorCode PetscInfoActivateClass(PetscClassId);
33 PETSC_EXTERN PetscBool PetscLogPrintInfo;  /* if true, indicates PetscInfo() is turned on */
34 
35 /*MC
36     PetscLogEvent - id used to identify PETSc or user events which timed portions (blocks of executable)
37      code.
38 
39     Level: intermediate
40 
41 .seealso: PetscLogEventRegister(), PetscLogEventBegin(), PetscLogEventEnd(), PetscLogStage
42 M*/
43 typedef int PetscLogEvent;
44 
45 /*MC
46     PetscLogStage - id used to identify user stages (phases, sections) of runs - for logging
47 
48     Level: intermediate
49 
50 .seealso: PetscLogStageRegister(), PetscLogStagePush(), PetscLogStagePop(), PetscLogEvent
51 M*/
52 typedef int PetscLogStage;
53 
54 #define PETSC_EVENT  1311311
55 PETSC_EXTERN PetscLogEvent PETSC_LARGEST_EVENT;
56 
57 /* Global flop counter */
58 PETSC_EXTERN PetscLogDouble petsc_TotalFlops;
59 PETSC_EXTERN PetscLogDouble petsc_tmp_flops;
60 
61 /* Global GPU counters */
62 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
63 PETSC_EXTERN PetscLogDouble petsc_ctog_ct;
64 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct;
65 PETSC_EXTERN PetscLogDouble petsc_ctog_sz;
66 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz;
67 PETSC_EXTERN PetscLogDouble petsc_gflops;
68 PETSC_EXTERN PetscLogDouble petsc_gtime;
69 #endif
70 
71 /* We must make the following structures available to access the event
72      activation flags in the PetscLogEventBegin/End() macros. These are not part of the PETSc public
73      API and are not intended to be used by other parts of PETSc or by users.
74 
75      The code that manipulates these structures is in src/sys/logging/utils.
76 */
77 typedef struct _n_PetscIntStack *PetscIntStack;
78 
79 /* -----------------------------------------------------------------------------------------------------*/
80 /*
81     PetscClassRegInfo, PetscClassPerfInfo - Each class has two data structures associated with it. The first has
82        static information about it, the second collects statistics on how many objects of the class are created,
83        how much memory they use, etc.
84 
85     PetscClassRegLog, PetscClassPerfLog - arrays of the PetscClassRegInfo and PetscClassPerfInfo for all classes.
86 */
87 typedef struct  {
88   char           *name;   /* The class name */
89   PetscClassId   classid; /* The integer identifying this class */
90 } PetscClassRegInfo;
91 
92 typedef struct {
93   PetscClassId   id;           /* The integer identifying this class */
94   int            creations;    /* The number of objects of this class created */
95   int            destructions; /* The number of objects of this class destroyed */
96   PetscLogDouble mem;          /* The total memory allocated by objects of this class */
97   PetscLogDouble descMem;      /* The total memory allocated by descendents of these objects */
98 } PetscClassPerfInfo;
99 
100 typedef struct _n_PetscClassRegLog *PetscClassRegLog;
101 struct _n_PetscClassRegLog {
102   int               numClasses; /* The number of classes registered */
103   int               maxClasses; /* The maximum number of classes */
104   PetscClassRegInfo *classInfo; /* The structure for class information (classids are monotonicly increasing) */
105 };
106 
107 typedef struct _n_PetscClassPerfLog *PetscClassPerfLog;
108 struct _n_PetscClassPerfLog {
109   int                numClasses; /* The number of logging classes */
110   int                maxClasses; /* The maximum number of classes */
111   PetscClassPerfInfo *classInfo; /* The structure for class information (classids are monotonicly increasing) */
112 };
113 /* -----------------------------------------------------------------------------------------------------*/
114 /*
115     PetscEventRegInfo, PetscEventPerfInfo - Each event has two data structures associated with it. The first has
116        static information about it, the second collects statistics on how many times the event is used, how
117        much time it takes, etc.
118 
119     PetscEventRegLog, PetscEventPerfLog - an array of all PetscEventRegInfo and PetscEventPerfInfo for all events. There is one
120       of these for each stage.
121 
122 */
123 typedef struct {
124   char         *name;         /* The name of this event */
125   PetscClassId classid;       /* The class the event is associated with */
126   PetscBool    collective;    /* Flag this event as collective */
127 #if defined (PETSC_HAVE_MPE)
128   int          mpe_id_begin;  /* MPE IDs that define the event */
129   int          mpe_id_end;
130 #endif
131 } PetscEventRegInfo;
132 
133 typedef struct {
134   int            id;            /* The integer identifying this event */
135   PetscBool      active;        /* The flag to activate logging */
136   PetscBool      visible;       /* The flag to print info in summary */
137   int            depth;         /* The nesting depth of the event call */
138   int            count;         /* The number of times this event was executed */
139   PetscLogDouble flops, flops2, flopsTmp; /* The flops and flops^2 used in this event */
140   PetscLogDouble time, time2, timeTmp;    /* The time and time^2 taken for this event */
141   PetscLogDouble syncTime;                /* The synchronization barrier time */
142   PetscLogDouble dof[8];        /* The number of degrees of freedom associated with this event */
143   PetscLogDouble errors[8];     /* The errors (user-defined) associated with this event */
144   PetscLogDouble numMessages;   /* The number of messages in this event */
145   PetscLogDouble messageLength; /* The total message lengths in this event */
146   PetscLogDouble numReductions; /* The number of reductions in this event */
147   PetscLogDouble memIncrease;   /* How much the resident memory has increased in this event */
148   PetscLogDouble mallocIncrease;/* How much the maximum malloced space has increased in this event */
149   PetscLogDouble mallocSpace;   /* How much the space was malloced and kept during this event */
150   PetscLogDouble mallocIncreaseEvent;  /* Maximum of the high water mark with in event minus memory available at the end of the event */
151   #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
152   PetscLogDouble CpuToGpuCount; /* The total number of CPU to GPU copies */
153   PetscLogDouble GpuToCpuCount; /* The total number of GPU to CPU copies */
154   PetscLogDouble CpuToGpuSize;  /* The total size of CPU to GPU copies */
155   PetscLogDouble GpuToCpuSize;  /* The total size of GPU to CPU copies */
156   PetscLogDouble GpuFlops;      /* The flops done on a GPU in this event */
157   PetscLogDouble GpuTime;       /* The time spent on a GPU in this event */
158   #endif
159 } PetscEventPerfInfo;
160 
161 typedef struct _n_PetscEventRegLog *PetscEventRegLog;
162 struct _n_PetscEventRegLog {
163   int               numEvents;  /* The number of registered events */
164   int               maxEvents;  /* The maximum number of events */
165   PetscEventRegInfo *eventInfo; /* The registration information for each event */
166 };
167 
168 typedef struct _n_PetscEventPerfLog *PetscEventPerfLog;
169 struct _n_PetscEventPerfLog {
170   int                numEvents;  /* The number of logging events */
171   int                maxEvents;  /* The maximum number of events */
172   PetscEventPerfInfo *eventInfo; /* The performance information for each event */
173 };
174 /* ------------------------------------------------------------------------------------------------------------*/
175 /*
176    PetscStageInfo - Contains all the information about a particular stage.
177 
178    PetscStageLog - An array of PetscStageInfo for each registered stage. There is a single one of these in the code.
179 */
180 typedef struct _PetscStageInfo {
181   char               *name;     /* The stage name */
182   PetscBool          used;      /* The stage was pushed on this processor */
183   PetscEventPerfInfo perfInfo;  /* The stage performance information */
184   PetscEventPerfLog  eventLog;  /* The event information for this stage */
185   PetscClassPerfLog  classLog;  /* The class information for this stage */
186 } PetscStageInfo;
187 
188 typedef struct _n_PetscStageLog *PetscStageLog;
189 struct _n_PetscStageLog {
190   int              numStages;   /* The number of registered stages */
191   int              maxStages;   /* The maximum number of stages */
192   PetscIntStack    stack;       /* The stack for active stages */
193   int              curStage;    /* The current stage (only used in macros so we don't call PetscIntStackTop) */
194   PetscStageInfo   *stageInfo;  /* The information for each stage */
195   PetscEventRegLog eventLog;    /* The registered events */
196   PetscClassRegLog classLog;    /* The registered classes */
197 };
198 /* -----------------------------------------------------------------------------------------------------*/
199 
200 PETSC_EXTERN PetscErrorCode PetscLogObjectParent(PetscObject,PetscObject);
201 PETSC_EXTERN PetscErrorCode PetscLogObjectMemory(PetscObject,PetscLogDouble);
202 
203 #if defined(PETSC_USE_LOG)  /* --- Logging is turned on --------------------------------*/
204 PETSC_EXTERN PetscStageLog petsc_stageLog;
205 PETSC_EXTERN PetscErrorCode PetscLogGetStageLog(PetscStageLog*);
206 PETSC_EXTERN PetscErrorCode PetscStageLogGetCurrent(PetscStageLog,int*);
207 PETSC_EXTERN PetscErrorCode PetscStageLogGetEventPerfLog(PetscStageLog,int,PetscEventPerfLog*);
208 
209 /*
210    Flop counting:  We count each arithmetic operation (e.g., addition, multiplication) separately.
211 
212    For the complex numbers version, note that
213        1 complex addition = 2 flops
214        1 complex multiplication = 6 flops,
215    where we define 1 flop as that for a double precision scalar.  We roughly approximate
216    flop counting for complex numbers by multiplying the total flops by 4; this corresponds
217    to the assumption that we're counting mostly additions and multiplications -- and
218    roughly the same number of each.  More accurate counting could be done by distinguishing
219    among the various arithmetic operations.
220  */
221 
222 #if defined(PETSC_USE_COMPLEX)
223 #define PETSC_FLOPS_PER_OP 4.0
224 #else
225 #define PETSC_FLOPS_PER_OP 1.0
226 #endif
227 
228 PETSC_STATIC_INLINE PetscErrorCode PetscLogFlops(PetscLogDouble n)
229 {
230   PetscFunctionBegin;
231 #if defined(PETSC_USE_DEBUG)
232   if (n < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Cannot log negative flops");
233 #endif
234   petsc_TotalFlops += PETSC_FLOPS_PER_OP*n;
235   PetscFunctionReturn(0);
236 }
237 
238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
239 PETSC_STATIC_INLINE PetscErrorCode PetscLogCpuToGpu(PetscLogDouble size)
240 {
241   PetscFunctionBegin;
242   petsc_ctog_ct += 1;
243   petsc_ctog_sz += size;
244   PetscFunctionReturn(0);
245 }
246 
247 PETSC_STATIC_INLINE PetscErrorCode PetscLogGpuToCpu(PetscLogDouble size)
248 {
249   PetscFunctionBegin;
250   petsc_gtoc_ct += 1;
251   petsc_gtoc_sz += size;
252   PetscFunctionReturn(0);
253 }
254 
255 PETSC_STATIC_INLINE PetscErrorCode PetscLogGpuFlops(PetscLogDouble n)
256 {
257   PetscFunctionBegin;
258 #if defined(PETSC_USE_DEBUG)
259   if (n < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Cannot log negative flops");
260 #endif
261   petsc_TotalFlops += PETSC_FLOPS_PER_OP*n;
262   petsc_gflops += PETSC_FLOPS_PER_OP*n;
263   PetscFunctionReturn(0);
264 }
265 
266 PETSC_STATIC_INLINE PetscErrorCode PetscLogGpuTimeBegin()
267 {
268   PetscErrorCode ierr;
269   PetscFunctionBegin;
270   ierr = PetscTimeSubtract(&petsc_gtime);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PETSC_STATIC_INLINE PetscErrorCode PetscLogGpuTimeEnd()
275 {
276   PetscErrorCode ierr;
277   PetscFunctionBegin;
278   ierr = PetscTimeAdd(&petsc_gtime);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 PETSC_STATIC_INLINE PetscErrorCode PetscLogGpuTimeAdd(PetscLogDouble t)
283 {
284   PetscFunctionBegin;
285   petsc_gtime += t;
286   PetscFunctionReturn(0);
287 }
288 #endif
289 
290 PETSC_EXTERN PetscErrorCode PetscGetFlops(PetscLogDouble *);
291 
292 #if defined (PETSC_HAVE_MPE)
293 PETSC_EXTERN PetscErrorCode PetscLogMPEBegin(void);
294 PETSC_EXTERN PetscErrorCode PetscLogMPEDump(const char[]);
295 #endif
296 
297 PETSC_EXTERN PetscErrorCode (*PetscLogPLB)(PetscLogEvent,int,PetscObject,PetscObject,PetscObject,PetscObject);
298 PETSC_EXTERN PetscErrorCode (*PetscLogPLE)(PetscLogEvent,int,PetscObject,PetscObject,PetscObject,PetscObject);
299 PETSC_EXTERN PetscErrorCode (*PetscLogPHC)(PetscObject);
300 PETSC_EXTERN PetscErrorCode (*PetscLogPHD)(PetscObject);
301 
302 #define PetscLogObjectParents(p,n,d)  0;do{int _i; for (_i=0; _i<(n); _i++) {ierr = PetscLogObjectParent((PetscObject)(p),(PetscObject)(d)[_i]);CHKERRQ(ierr);}}while(0)
303 #define PetscLogObjectCreate(h)      ((PetscLogPHC) ? (*PetscLogPHC)((PetscObject)(h)) : 0)
304 #define PetscLogObjectDestroy(h)     ((PetscLogPHD) ? (*PetscLogPHD)((PetscObject)(h)) : 0)
305 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...);
306 
307 /* Initialization functions */
308 PETSC_EXTERN PetscErrorCode PetscLogDefaultBegin(void);
309 PETSC_EXTERN PetscErrorCode PetscLogAllBegin(void);
310 PETSC_EXTERN PetscErrorCode PetscLogNestedBegin(void);
311 PETSC_EXTERN PetscErrorCode PetscLogTraceBegin(FILE *);
312 PETSC_EXTERN PetscErrorCode PetscLogActions(PetscBool);
313 PETSC_EXTERN PetscErrorCode PetscLogObjects(PetscBool);
314 PETSC_EXTERN PetscErrorCode PetscLogSetThreshold(PetscLogDouble,PetscLogDouble*);
315 PETSC_EXTERN PetscErrorCode PetscLogSet(PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject),
316                                         PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject));
317 
318 /* Output functions */
319 PETSC_EXTERN PetscErrorCode PetscLogView(PetscViewer);
320 PETSC_EXTERN PetscErrorCode PetscLogViewFromOptions(void);
321 PETSC_EXTERN PetscErrorCode PetscLogDump(const char[]);
322 
323 /* Stage functions */
324 PETSC_EXTERN PetscErrorCode PetscLogStageRegister(const char[],PetscLogStage*);
325 PETSC_EXTERN PetscErrorCode PetscLogStagePush(PetscLogStage);
326 PETSC_EXTERN PetscErrorCode PetscLogStagePop(void);
327 PETSC_EXTERN PetscErrorCode PetscLogStageSetActive(PetscLogStage,PetscBool);
328 PETSC_EXTERN PetscErrorCode PetscLogStageGetActive(PetscLogStage,PetscBool*);
329 PETSC_EXTERN PetscErrorCode PetscLogStageSetVisible(PetscLogStage,PetscBool);
330 PETSC_EXTERN PetscErrorCode PetscLogStageGetVisible(PetscLogStage,PetscBool*);
331 PETSC_EXTERN PetscErrorCode PetscLogStageGetId(const char[],PetscLogStage*);
332 
333 /* Event functions */
334 PETSC_EXTERN PetscErrorCode PetscLogEventRegister(const char[],PetscClassId,PetscLogEvent*);
335 PETSC_EXTERN PetscErrorCode PetscLogEventSetCollective(PetscLogEvent,PetscBool);
336 PETSC_EXTERN PetscErrorCode PetscLogEventIncludeClass(PetscClassId);
337 PETSC_EXTERN PetscErrorCode PetscLogEventExcludeClass(PetscClassId);
338 PETSC_EXTERN PetscErrorCode PetscLogEventActivate(PetscLogEvent);
339 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivate(PetscLogEvent);
340 PETSC_EXTERN PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent,PetscBool);
341 PETSC_EXTERN PetscErrorCode PetscLogEventActivateClass(PetscClassId);
342 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivateClass(PetscClassId);
343 PETSC_EXTERN PetscErrorCode PetscLogEventGetId(const char[],PetscLogEvent*);
344 PETSC_EXTERN PetscErrorCode PetscLogEventGetPerfInfo(int,PetscLogEvent,PetscEventPerfInfo*);
345 PETSC_EXTERN PetscErrorCode PetscLogEventSetDof(PetscLogEvent, PetscInt, PetscLogDouble);
346 PETSC_EXTERN PetscErrorCode PetscLogEventSetError(PetscLogEvent, PetscInt, PetscLogDouble);
347 
348 /* Global counters */
349 PETSC_EXTERN PetscLogDouble petsc_irecv_ct;
350 PETSC_EXTERN PetscLogDouble petsc_isend_ct;
351 PETSC_EXTERN PetscLogDouble petsc_recv_ct;
352 PETSC_EXTERN PetscLogDouble petsc_send_ct;
353 PETSC_EXTERN PetscLogDouble petsc_irecv_len;
354 PETSC_EXTERN PetscLogDouble petsc_isend_len;
355 PETSC_EXTERN PetscLogDouble petsc_recv_len;
356 PETSC_EXTERN PetscLogDouble petsc_send_len;
357 PETSC_EXTERN PetscLogDouble petsc_allreduce_ct;
358 PETSC_EXTERN PetscLogDouble petsc_gather_ct;
359 PETSC_EXTERN PetscLogDouble petsc_scatter_ct;
360 PETSC_EXTERN PetscLogDouble petsc_wait_ct;
361 PETSC_EXTERN PetscLogDouble petsc_wait_any_ct;
362 PETSC_EXTERN PetscLogDouble petsc_wait_all_ct;
363 PETSC_EXTERN PetscLogDouble petsc_sum_of_waits_ct;
364 
365 PETSC_EXTERN PetscBool      PetscLogMemory;
366 
367 PETSC_EXTERN PetscBool PetscLogSyncOn;  /* true if logging synchronization is enabled */
368 PETSC_EXTERN PetscErrorCode PetscLogEventSynchronize(PetscLogEvent, MPI_Comm);
369 
370 #define PetscLogEventSync(e,comm) \
371   (((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? \
372     PetscLogEventSynchronize((e),(comm)) : 0 ))
373 
374 #define PetscLogEventBegin(e,o1,o2,o3,o4) \
375   (((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? \
376     (*PetscLogPLB)((e),0,(PetscObject)(o1),(PetscObject)(o2),(PetscObject)(o3),(PetscObject)(o4)) : 0 ))
377 
378 #define PetscLogEventEnd(e,o1,o2,o3,o4) \
379   (((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? \
380     (*PetscLogPLE)((e),0,(PetscObject)(o1),(PetscObject)(o2),(PetscObject)(o3),(PetscObject)(o4)) : 0 ))
381 
382 PETSC_EXTERN PetscErrorCode PetscLogEventGetFlops(PetscLogEvent,PetscLogDouble*);
383 PETSC_EXTERN PetscErrorCode PetscLogEventZeroFlops(PetscLogEvent);
384 
385 /*
386      These are used internally in the PETSc routines to keep a count of MPI messages and
387    their sizes.
388 
389      This does not work for MPI-Uni because our include/petsc/mpiuni/mpi.h file
390    uses macros to defined the MPI operations.
391 
392      It does not work correctly from HP-UX because it processes the
393    macros in a way that sometimes it double counts, hence
394    PETSC_HAVE_BROKEN_RECURSIVE_MACRO
395 
396      It does not work with Windows because winmpich lacks MPI_Type_size()
397 */
398 #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined (PETSC_HAVE_MPI_MISSING_TYPESIZE)
399 /*
400    Logging of MPI activities
401 */
402 PETSC_STATIC_INLINE PetscErrorCode PetscMPITypeSize(PetscInt count,MPI_Datatype type,PetscLogDouble *length)
403 {
404   PetscMPIInt    typesize;
405   PetscErrorCode ierr;
406   if (type == MPI_DATATYPE_NULL) return 0;
407   ierr     = MPI_Type_size(type,&typesize);CHKERRQ(ierr);
408   *length += (PetscLogDouble) (count*typesize);
409   return 0;
410 }
411 
412 PETSC_STATIC_INLINE PetscErrorCode PetscMPITypeSizeComm(MPI_Comm comm,const PetscMPIInt *counts,MPI_Datatype type,PetscLogDouble *length)
413 {
414   PetscMPIInt    typesize,size,p;
415   PetscErrorCode ierr;
416 
417   if (type == MPI_DATATYPE_NULL) return 0;
418   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
419   ierr = MPI_Type_size(type,&typesize);CHKERRQ(ierr);
420   for (p=0; p<size; ++p) {
421     *length += (PetscLogDouble) (counts[p]*typesize);
422   }
423   return 0;
424 }
425 
426 PETSC_STATIC_INLINE PetscErrorCode PetscMPITypeSizeCount(PetscInt n,const PetscMPIInt *counts,MPI_Datatype type,PetscLogDouble *length)
427 {
428   PetscMPIInt    typesize,p;
429   PetscErrorCode ierr;
430 
431   if (type == MPI_DATATYPE_NULL) return 0;
432   ierr = MPI_Type_size(type,&typesize);CHKERRQ(ierr);
433   for (p=0; p<n; ++p) {
434     *length += (PetscLogDouble) (counts[p]*typesize);
435   }
436   return 0;
437 }
438 
439 /*
440     Returns 1 if the communicator is parallel else zero
441 */
442 PETSC_STATIC_INLINE int PetscMPIParallelComm(MPI_Comm comm)
443 {
444   PetscMPIInt size; MPI_Comm_size(comm,&size); return size > 1;
445 }
446 
447 #define MPI_Irecv(buf,count,datatype,source,tag,comm,request) \
448   ((petsc_irecv_ct++,0) || PetscMPITypeSize((count),(datatype),&(petsc_irecv_len)) || MPI_Irecv((buf),(count),(datatype),(source),(tag),(comm),(request)))
449 
450 #define MPI_Isend(buf,count,datatype,dest,tag,comm,request) \
451   ((petsc_isend_ct++,0) || PetscMPITypeSize((count),(datatype),&(petsc_isend_len)) || MPI_Isend((buf),(count),(datatype),(dest),(tag),(comm),(request)))
452 
453 #define MPI_Startall_irecv(count,datatype,number,requests) \
454   ((petsc_irecv_ct += (PetscLogDouble)(number),0) || PetscMPITypeSize((count),(datatype),&(petsc_irecv_len)) || ((number) && MPI_Startall((number),(requests))))
455 
456 #define MPI_Startall_isend(count,datatype,number,requests) \
457   ((petsc_isend_ct += (PetscLogDouble)(number),0) || PetscMPITypeSize((count),(datatype),&(petsc_isend_len)) || ((number) && MPI_Startall((number),(requests))))
458 
459 #define MPI_Start_isend(count,datatype,requests) \
460   ((petsc_isend_ct++,0) || PetscMPITypeSize((count),(datatype),(&petsc_isend_len)) || MPI_Start((requests)))
461 
462 #define MPI_Recv(buf,count,datatype,source,tag,comm,status) \
463   ((petsc_recv_ct++,0) || PetscMPITypeSize((count),(datatype),(&petsc_recv_len)) || MPI_Recv((buf),(count),(datatype),(source),(tag),(comm),(status)))
464 
465 #define MPI_Send(buf,count,datatype,dest,tag,comm) \
466   ((petsc_send_ct++,0) || PetscMPITypeSize((count),(datatype),(&petsc_send_len)) || MPI_Send((buf),(count),(datatype),(dest),(tag),(comm)))
467 
468 #define MPI_Wait(request,status) \
469   ((petsc_wait_ct++,petsc_sum_of_waits_ct++,0) || MPI_Wait((request),(status)))
470 
471 #define MPI_Waitany(a,b,c,d) \
472   ((petsc_wait_any_ct++,petsc_sum_of_waits_ct++,0) || MPI_Waitany((a),(b),(c),(d)))
473 
474 #define MPI_Waitall(count,array_of_requests,array_of_statuses) \
475   ((petsc_wait_all_ct++,petsc_sum_of_waits_ct += (PetscLogDouble) (count),0) || MPI_Waitall((count),(array_of_requests),(array_of_statuses)))
476 
477 #define MPI_Allreduce(sendbuf,recvbuf,count,datatype,op,comm) \
478   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || MPI_Allreduce((sendbuf),(recvbuf),(count),(datatype),(op),(comm)))
479 
480 #define MPI_Bcast(buffer,count,datatype,root,comm) \
481   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || MPI_Bcast((buffer),(count),(datatype),(root),(comm)))
482 
483 #define MPI_Reduce_scatter_block(sendbuf,recvbuf,recvcount,datatype,op,comm) \
484   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || MPI_Reduce_scatter_block((sendbuf),(recvbuf),(recvcount),(datatype),(op),(comm)))
485 
486 #define MPI_Alltoall(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm) \
487   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || PetscMPITypeSize((sendcount),(sendtype),(&petsc_send_len)) || MPI_Alltoall((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(comm)))
488 
489 #define MPI_Alltoallv(sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm) \
490   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || PetscMPITypeSizeComm((comm),(sendcnts),(sendtype),(&petsc_send_len)) || MPI_Alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm)))
491 
492 #define MPI_Allgather(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm) \
493   ((petsc_gather_ct += PetscMPIParallelComm((comm)),0) || MPI_Allgather((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(comm)))
494 
495 #define MPI_Allgatherv(sendbuf,sendcount,sendtype,recvbuf,recvcount,displs,recvtype,comm) \
496   ((petsc_gather_ct += PetscMPIParallelComm((comm)),0) || MPI_Allgatherv((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(displs),(recvtype),(comm)))
497 
498 #define MPI_Gather(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,root,comm) \
499   ((petsc_gather_ct++,0) || PetscMPITypeSize((sendcount),(sendtype),(&petsc_send_len)) || MPI_Gather((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(root),(comm)))
500 
501 #define MPI_Gatherv(sendbuf,sendcount,sendtype,recvbuf,recvcount,displs,recvtype,root,comm) \
502   ((petsc_gather_ct++,0) || PetscMPITypeSize((sendcount),(sendtype),(&petsc_send_len)) || MPI_Gatherv((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(displs),(recvtype),(root),(comm)))
503 
504 #define MPI_Scatter(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,root,comm) \
505   ((petsc_scatter_ct++,0) || PetscMPITypeSize((recvcount),(recvtype),(&petsc_recv_len)) || MPI_Scatter((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(root),(comm)))
506 
507 #define MPI_Scatterv(sendbuf,sendcount,displs,sendtype,recvbuf,recvcount,recvtype,root,comm) \
508   ((petsc_scatter_ct++,0) || PetscMPITypeSize((recvcount),(recvtype),(&petsc_recv_len)) || MPI_Scatterv((sendbuf),(sendcount),(displs),(sendtype),(recvbuf),(recvcount),(recvtype),(root),(comm)))
509 
510 #define MPI_Ialltoall(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm,request) \
511   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || PetscMPITypeSize((sendcount),(sendtype),(&petsc_send_len)) || MPI_Ialltoall((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(comm),(request)))
512 
513 #define MPI_Ialltoallv(sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm,request) \
514   ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || PetscMPITypeSizeComm((comm),(sendcnts),(sendtype),(&petsc_send_len)) || MPI_Ialltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm),(request)))
515 
516 #define MPI_Iallgather(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,comm,request) \
517   ((petsc_gather_ct += PetscMPIParallelComm((comm)),0) || MPI_Iallgather((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(comm),(request)))
518 
519 #define MPI_Iallgatherv(sendbuf,sendcount,sendtype,recvbuf,recvcount,displs,recvtype,comm,request) \
520   ((petsc_gather_ct += PetscMPIParallelComm((comm)),0) || MPI_Iallgatherv((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(displs),(recvtype),(comm),(request)))
521 
522 #define MPI_Igather(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,root,comm,request) \
523   ((petsc_gather_ct++,0) || PetscMPITypeSize((sendcount),(sendtype),(&petsc_send_len)) || MPI_Igather((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(root),(comm),(request)))
524 
525 #define MPI_Igatherv(sendbuf,sendcount,sendtype,recvbuf,recvcount,displs,recvtype,root,comm,request) \
526   ((petsc_gather_ct++,0) || PetscMPITypeSize((sendcount),(sendtype),(&petsc_send_len)) || MPI_Igatherv((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(displs),(recvtype),(root),(comm),(request)))
527 
528 #define MPI_Iscatter(sendbuf,sendcount,sendtype,recvbuf,recvcount,recvtype,root,comm,request) \
529   ((petsc_scatter_ct++,0) || PetscMPITypeSize((recvcount),(recvtype),(&petsc_recv_len)) || MPI_Iscatter((sendbuf),(sendcount),(sendtype),(recvbuf),(recvcount),(recvtype),(root),(comm),(request)))
530 
531 #define MPI_Iscatterv(sendbuf,sendcount,displs,sendtype,recvbuf,recvcount,recvtype,root,comm,request) \
532   ((petsc_scatter_ct++,0) || PetscMPITypeSize((recvcount),(recvtype),(&petsc_recv_len)) || MPI_Iscatterv((sendbuf),(sendcount),(displs),(sendtype),(recvbuf),(recvcount),(recvtype),(root),(comm),(request)))
533 
534 /* We treat MPI_Ineighbor_alltoallv as a set of isend/irecv instead of a traditional MPI collective.
535    OpenMPI-3.0 ran into error with outdegree = indegree = 0, so we use ((outdegree) || (indegree)) as a workaround.
536  */
537 #define MPI_Start_ineighbor_alltoallv(outdegree,indegree,sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm,request) \
538   ((petsc_isend_ct += (PetscLogDouble)(outdegree),0) || (petsc_irecv_ct += (PetscLogDouble)(indegree),0) || PetscMPITypeSizeCount((outdegree),(sendcnts),(sendtype),(&petsc_isend_len)) || PetscMPITypeSizeCount((indegree),(recvcnts),(recvtype),(&petsc_irecv_len)) || (((outdegree) || (indegree)) && MPI_Ineighbor_alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm),(request))))
539 
540 #define MPI_Start_neighbor_alltoallv(outdegree,indegree,sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm) \
541   ((petsc_isend_ct += (PetscLogDouble)(outdegree),0) || (petsc_irecv_ct += (PetscLogDouble)(indegree),0) || PetscMPITypeSizeCount((outdegree),(sendcnts),(sendtype),(&petsc_isend_len)) || PetscMPITypeSizeCount((indegree),(recvcnts),(recvtype),(&petsc_irecv_len)) || (((outdegree) || (indegree)) && MPI_Neighbor_alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm))))
542 
543 #else
544 
545 #define MPI_Startall_irecv(count,datatype,number,requests) \
546   ((number) && MPI_Startall((number),(requests)))
547 
548 #define MPI_Startall_isend(count,datatype,number,requests) \
549   ((number) && MPI_Startall((number),(requests)))
550 
551 #define MPI_Start_isend(count,datatype,requests) \
552   (MPI_Start((requests)))
553 
554 #define MPI_Start_ineighbor_alltoallv(outdegree,indegree,sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm,request) \
555   (((outdegree) || (indegree)) && MPI_Ineighbor_alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm),(request)))
556 
557 #define MPI_Start_neighbor_alltoallv(outdegree,indegree,sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm) \
558   (((outdegree) || (indegree)) && MPI_Neighbor_alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm)))
559 #endif /* !MPIUNI_H && ! PETSC_HAVE_BROKEN_RECURSIVE_MACRO */
560 
561 #else  /* ---Logging is turned off --------------------------------------------*/
562 
563 #define PetscLogMemory                     PETSC_FALSE
564 
565 #define PetscLogFlops(n)                   0
566 #define PetscGetFlops(a)                   (*(a) = 0.0,0)
567 
568 #define PetscLogStageRegister(a,b)         0
569 #define PetscLogStagePush(a)               0
570 #define PetscLogStagePop()                 0
571 #define PetscLogStageSetActive(a,b)        0
572 #define PetscLogStageGetActive(a,b)        0
573 #define PetscLogStageGetVisible(a,b)       0
574 #define PetscLogStageSetVisible(a,b)       0
575 #define PetscLogStageGetId(a,b)            (*(b)=0,0)
576 
577 #define PetscLogEventRegister(a,b,c)       0
578 #define PetscLogEventSetCollective(a,b)    0
579 #define PetscLogEventIncludeClass(a)       0
580 #define PetscLogEventExcludeClass(a)       0
581 #define PetscLogEventActivate(a)           0
582 #define PetscLogEventDeactivate(a)         0
583 #define PetscLogEventActivateClass(a)      0
584 #define PetscLogEventDeactivateClass(a)    0
585 #define PetscLogEventSetActiveAll(a,b)     0
586 #define PetscLogEventGetId(a,b)            (*(b)=0,0)
587 #define PetscLogEventGetPerfInfo(a,b,c)    0
588 #define PetscLogEventSetDof(a,b,c)         0
589 #define PetscLogEventSetError(a,b,c)       0
590 
591 #define PetscLogPLB                        0
592 #define PetscLogPLE                        0
593 #define PetscLogPHC                        0
594 #define PetscLogPHD                        0
595 
596 #define PetscLogObjectParents(p,n,c)       0
597 #define PetscLogObjectCreate(h)            0
598 #define PetscLogObjectDestroy(h)           0
599 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject,const char[],...);
600 
601 #define PetscLogDefaultBegin()             0
602 #define PetscLogAllBegin()                 0
603 #define PetscLogNestedBegin()              0
604 #define PetscLogTraceBegin(file)           0
605 #define PetscLogActions(a)                 0
606 #define PetscLogObjects(a)                 0
607 #define PetscLogSetThreshold(a,b)          0
608 #define PetscLogSet(lb,le)                 0
609 
610 #define PetscLogView(viewer)               0
611 #define PetscLogViewFromOptions()          0
612 #define PetscLogDump(c)                    0
613 
614 #define PetscLogEventSync(e,comm)          0
615 #define PetscLogEventBegin(e,o1,o2,o3,o4)  0
616 #define PetscLogEventEnd(e,o1,o2,o3,o4)    0
617 
618 /* If PETSC_USE_LOG is NOT defined, these still need to be! */
619 #define MPI_Startall_irecv(count,datatype,number,requests) ((number) && MPI_Startall(number,requests))
620 #define MPI_Startall_isend(count,datatype,number,requests) ((number) && MPI_Startall(number,requests))
621 #define MPI_Start_isend(count,datatype,requests)           MPI_Start(requests)
622 #define MPI_Start_ineighbor_alltoallv(outdegree,indegree,sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm,request) \
623   (((outdegree) || (indegree)) && MPI_Ineighbor_alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm),(request)))
624 #define MPI_Start_neighbor_alltoallv(outdegree,indegree,sendbuf,sendcnts,sdispls,sendtype,recvbuf,recvcnts,rdispls,recvtype,comm) \
625   (((outdegree) || (indegree)) && MPI_Neighbor_alltoallv((sendbuf),(sendcnts),(sdispls),(sendtype),(recvbuf),(recvcnts),(rdispls),(recvtype),(comm)))
626 
627 #endif   /* PETSC_USE_LOG */
628 
629 #define PetscPreLoadBegin(flag,name) \
630 do {\
631   PetscBool      PetscPreLoading = flag;\
632   int            PetscPreLoadMax,PetscPreLoadIt;\
633   PetscLogStage  _stageNum;\
634   PetscErrorCode _3_ierr; \
635   _3_ierr = PetscOptionsGetBool(NULL,NULL,"-preload",&PetscPreLoading,NULL);CHKERRQ(_3_ierr); \
636   PetscPreLoadMax = (int)(PetscPreLoading);\
637   PetscPreLoadingUsed = PetscPreLoading ? PETSC_TRUE : PetscPreLoadingUsed;\
638   for (PetscPreLoadIt=0; PetscPreLoadIt<=PetscPreLoadMax; PetscPreLoadIt++) {\
639     PetscPreLoadingOn = PetscPreLoading;\
640     _3_ierr = PetscBarrier(NULL);CHKERRQ(_3_ierr);\
641     if (PetscPreLoadIt>0) {\
642       _3_ierr = PetscLogStageGetId(name,&_stageNum);CHKERRQ(_3_ierr);\
643     } else {\
644       _3_ierr = PetscLogStageRegister(name,&_stageNum);CHKERRQ(_3_ierr); \
645     }\
646     _3_ierr = PetscLogStageSetActive(_stageNum,(PetscBool)(!PetscPreLoadMax || PetscPreLoadIt));\
647     _3_ierr = PetscLogStagePush(_stageNum);CHKERRQ(_3_ierr);
648 
649 #define PetscPreLoadEnd() \
650     _3_ierr = PetscLogStagePop();CHKERRQ(_3_ierr);\
651     PetscPreLoading = PETSC_FALSE;\
652   }\
653 } while (0)
654 
655 #define PetscPreLoadStage(name) do {                                         \
656     _3_ierr = PetscLogStagePop();CHKERRQ(_3_ierr);                      \
657     if (PetscPreLoadIt>0) {                                                  \
658       _3_ierr = PetscLogStageGetId(name,&_stageNum);CHKERRQ(_3_ierr);   \
659     } else {                                                            \
660       _3_ierr = PetscLogStageRegister(name,&_stageNum);CHKERRQ(_3_ierr); \
661     }                                                                   \
662     _3_ierr = PetscLogStageSetActive(_stageNum,(PetscBool)(!PetscPreLoadMax || PetscPreLoadIt)); \
663     _3_ierr = PetscLogStagePush(_stageNum);CHKERRQ(_3_ierr);            \
664   } while (0)
665 
666 /* some vars for logging */
667 PETSC_EXTERN PetscBool PetscPreLoadingUsed;       /* true if we are or have done preloading */
668 PETSC_EXTERN PetscBool PetscPreLoadingOn;         /* true if we are currently in a preloading calculation */
669 
670 #endif
671