xref: /petsc/src/sys/logging/handler/impls/default/logdefault.c (revision 2611ad710242b3c70d66651ef7e40f9450d305e2)
1 #include <petscviewer.h>
2 #include <petscdevice.h>
3 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/
4 #include <petsc/private/loghandlerimpl.h>
5 #include <petsc/private/deviceimpl.h>
6 #include <petscconfiginfo.h>
7 #include <petscmachineinfo.h>
8 #include "logdefault.h"
9 
10 static PetscErrorCode PetscEventPerfInfoInit(PetscEventPerfInfo *eventInfo)
11 {
12   PetscFunctionBegin;
13   PetscCall(PetscMemzero(eventInfo, sizeof(*eventInfo)));
14   eventInfo->visible   = PETSC_TRUE;
15   eventInfo->id        = -1;
16   eventInfo->dof[0]    = -1.0;
17   eventInfo->dof[1]    = -1.0;
18   eventInfo->dof[2]    = -1.0;
19   eventInfo->dof[3]    = -1.0;
20   eventInfo->dof[4]    = -1.0;
21   eventInfo->dof[5]    = -1.0;
22   eventInfo->dof[6]    = -1.0;
23   eventInfo->dof[7]    = -1.0;
24   eventInfo->errors[0] = -1.0;
25   eventInfo->errors[1] = -1.0;
26   eventInfo->errors[2] = -1.0;
27   eventInfo->errors[3] = -1.0;
28   eventInfo->errors[4] = -1.0;
29   eventInfo->errors[5] = -1.0;
30   eventInfo->errors[6] = -1.0;
31   eventInfo->errors[7] = -1.0;
32   PetscFunctionReturn(PETSC_SUCCESS);
33 }
34 
35 static PetscErrorCode PetscEventPerfInfoTic_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool resume)
36 {
37   PetscFunctionBegin;
38   if (resume) {
39     eventInfo->timeTmp -= time;
40     eventInfo->flopsTmp -= petsc_TotalFlops_th;
41   } else {
42     eventInfo->timeTmp  = -time;
43     eventInfo->flopsTmp = -petsc_TotalFlops_th;
44   }
45   eventInfo->numMessages -= petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th;
46   eventInfo->messageLength -= petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len_th + petsc_send_len_th;
47   eventInfo->numReductions -= petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th;
48 #if defined(PETSC_HAVE_DEVICE)
49   eventInfo->CpuToGpuCount -= petsc_ctog_ct_th;
50   eventInfo->GpuToCpuCount -= petsc_gtoc_ct_th;
51   eventInfo->CpuToGpuSize -= petsc_ctog_sz_th;
52   eventInfo->GpuToCpuSize -= petsc_gtoc_sz_th;
53   eventInfo->GpuFlops -= petsc_gflops_th;
54   eventInfo->GpuTime -= petsc_gtime;
55 #endif
56   if (logMemory) {
57     PetscLogDouble usage;
58     PetscCall(PetscMemoryGetCurrentUsage(&usage));
59     eventInfo->memIncrease -= usage;
60     PetscCall(PetscMallocGetCurrentUsage(&usage));
61     eventInfo->mallocSpace -= usage;
62     PetscCall(PetscMallocGetMaximumUsage(&usage));
63     eventInfo->mallocIncrease -= usage;
64     PetscCall(PetscMallocPushMaximumUsage(event));
65   }
66   PetscFunctionReturn(PETSC_SUCCESS);
67 }
68 
69 static PetscErrorCode PetscEventPerfInfoTic(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event)
70 {
71   PetscFunctionBegin;
72   PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_FALSE));
73   PetscFunctionReturn(PETSC_SUCCESS);
74 }
75 
76 static PetscErrorCode PetscEventPerfInfoResume(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event)
77 {
78   PetscFunctionBegin;
79   PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_TRUE));
80   PetscFunctionReturn(PETSC_SUCCESS);
81 }
82 
83 static PetscErrorCode PetscEventPerfInfoToc_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool pause)
84 {
85   PetscFunctionBegin;
86   eventInfo->timeTmp += time;
87   eventInfo->flopsTmp += petsc_TotalFlops_th;
88   if (!pause) {
89     eventInfo->time += eventInfo->timeTmp;
90     eventInfo->time2 += eventInfo->timeTmp * eventInfo->timeTmp;
91     eventInfo->flops += eventInfo->flopsTmp;
92     eventInfo->flops2 += eventInfo->flopsTmp * eventInfo->flopsTmp;
93   }
94   eventInfo->numMessages += petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th;
95   eventInfo->messageLength += petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len + petsc_send_len_th;
96   eventInfo->numReductions += petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th;
97 #if defined(PETSC_HAVE_DEVICE)
98   eventInfo->CpuToGpuCount += petsc_ctog_ct_th;
99   eventInfo->GpuToCpuCount += petsc_gtoc_ct_th;
100   eventInfo->CpuToGpuSize += petsc_ctog_sz_th;
101   eventInfo->GpuToCpuSize += petsc_gtoc_sz_th;
102   eventInfo->GpuFlops += petsc_gflops_th;
103   eventInfo->GpuTime += petsc_gtime;
104 #endif
105   if (logMemory) {
106     PetscLogDouble usage, musage;
107     PetscCall(PetscMemoryGetCurrentUsage(&usage)); /* the comments below match the column labels printed in PetscLogView_Default() */
108     eventInfo->memIncrease += usage;               /* RMI */
109     PetscCall(PetscMallocGetCurrentUsage(&usage));
110     eventInfo->mallocSpace += usage; /* Malloc */
111     PetscCall(PetscMallocPopMaximumUsage((int)event, &musage));
112     eventInfo->mallocIncreaseEvent = PetscMax(musage - usage, eventInfo->mallocIncreaseEvent); /* EMalloc */
113     PetscCall(PetscMallocGetMaximumUsage(&usage));
114     eventInfo->mallocIncrease += usage; /* MMalloc */
115   }
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 static PetscErrorCode PetscEventPerfInfoToc(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event)
120 {
121   PetscFunctionBegin;
122   PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE));
123   PetscFunctionReturn(PETSC_SUCCESS);
124 }
125 
126 static PetscErrorCode PetscEventPerfInfoPause(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event)
127 {
128   PetscFunctionBegin;
129   PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE));
130   PetscFunctionReturn(PETSC_SUCCESS);
131 }
132 
133 static PetscErrorCode PetscEventPerfInfoAdd_Internal(const PetscEventPerfInfo *eventInfo, PetscEventPerfInfo *outInfo)
134 {
135   PetscFunctionBegin;
136   outInfo->count += eventInfo->count;
137   outInfo->time += eventInfo->time;
138   outInfo->time2 += eventInfo->time2;
139   outInfo->flops += eventInfo->flops;
140   outInfo->flops2 += eventInfo->flops2;
141   outInfo->numMessages += eventInfo->numMessages;
142   outInfo->messageLength += eventInfo->messageLength;
143   outInfo->numReductions += eventInfo->numReductions;
144 #if defined(PETSC_HAVE_DEVICE)
145   outInfo->CpuToGpuCount += eventInfo->CpuToGpuCount;
146   outInfo->GpuToCpuCount += eventInfo->GpuToCpuCount;
147   outInfo->CpuToGpuSize += eventInfo->CpuToGpuSize;
148   outInfo->GpuToCpuSize += eventInfo->GpuToCpuSize;
149   outInfo->GpuFlops += eventInfo->GpuFlops;
150   outInfo->GpuTime += eventInfo->GpuTime;
151 #endif
152   outInfo->memIncrease += eventInfo->memIncrease;
153   outInfo->mallocSpace += eventInfo->mallocSpace;
154   outInfo->mallocIncreaseEvent += eventInfo->mallocIncreaseEvent;
155   outInfo->mallocIncrease += eventInfo->mallocIncrease;
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 PETSC_LOG_RESIZABLE_ARRAY(EventPerfArray, PetscEventPerfInfo, PetscLogEvent, PetscEventPerfInfoInit, NULL, NULL)
160 
161 /* --- PetscClassPerf --- */
162 
163 typedef struct {
164   int            creations;    /* The number of objects of this class created */
165   int            destructions; /* The number of objects of this class destroyed */
166   PetscLogDouble mem;          /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */
167   PetscLogDouble descMem;      /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */
168 } PetscClassPerf;
169 
170 static PetscErrorCode PetscClassPerfInit(PetscClassPerf *classInfo)
171 {
172   PetscFunctionBegin;
173   PetscCall(PetscMemzero(classInfo, sizeof(*classInfo)));
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
177 PETSC_LOG_RESIZABLE_ARRAY(ClassPerfArray, PetscClassPerf, PetscLogClass, PetscClassPerfInit, NULL, NULL)
178 
179 /* --- PetscStagePerf --- */
180 
181 typedef struct _PetscStagePerf {
182   PetscBool              used;     /* The stage was pushed on this processor */
183   PetscEventPerfInfo     perfInfo; /* The stage performance information */
184   PetscLogEventPerfArray eventLog; /* The event information for this stage */
185   PetscLogClassPerfArray classLog; /* The class information for this stage */
186 } PetscStagePerf;
187 
188 static PetscErrorCode PetscStageInfoInit(PetscStagePerf *stageInfo)
189 {
190   PetscFunctionBegin;
191   PetscCall(PetscMemzero(stageInfo, sizeof(*stageInfo)));
192   PetscCall(PetscLogEventPerfArrayCreate(128, &(stageInfo->eventLog)));
193   PetscCall(PetscLogClassPerfArrayCreate(128, &(stageInfo->classLog)));
194   PetscCall(PetscEventPerfInfoInit(&stageInfo->perfInfo));
195   PetscFunctionReturn(PETSC_SUCCESS);
196 }
197 
198 static PetscErrorCode PetscStageInfoReset(PetscStagePerf *stageInfo)
199 {
200   PetscFunctionBegin;
201   PetscCall(PetscLogEventPerfArrayDestroy(&(stageInfo->eventLog)));
202   PetscCall(PetscLogClassPerfArrayDestroy(&(stageInfo->classLog)));
203   PetscFunctionReturn(PETSC_SUCCESS);
204 }
205 
206 PETSC_LOG_RESIZABLE_ARRAY(StageInfoArray, PetscStagePerf, PetscLogStage, PetscStageInfoInit, PetscStageInfoReset, NULL)
207 
208 /* --- Action --- */
209 
210 /* The structure for action logging */
211 typedef enum {
212   PETSC_LOG_ACTION_CREATE,
213   PETSC_LOG_ACTION_DESTROY,
214   PETSC_LOG_ACTION_BEGIN,
215   PETSC_LOG_ACTION_END,
216 } PetscLogActionType;
217 
218 PETSC_LOG_RESIZABLE_ARRAY(ActionArray, Action, PetscLogEvent, NULL, NULL, NULL)
219 
220 /* --- Object --- */
221 
222 PETSC_LOG_RESIZABLE_ARRAY(ObjectArray, Object, PetscObject, NULL, NULL, NULL)
223 
224 /* Map from (threadid,stage,event) to perfInfo data struct */
225 #include <petsc/private/hashmapijk.h>
226 
227 #if !PetscDefined(HAVE_THREADSAFETY)
228 // PetscHMapEvent is only declared in logimpl.h if PetscDefined(HAVE_THREADSAFETY): define it otherwise so that we
229 // can just always have it in the struct
230 PETSC_HASH_MAP(HMapEvent, PetscHashIJKKey, PetscEventPerfInfo *, PetscHashIJKKeyHash, PetscHashIJKKeyEqual, NULL)
231 #endif
232 
233 typedef struct _n_PetscLogHandler_Default *PetscLogHandler_Default;
234 struct _n_PetscLogHandler_Default {
235   PetscLogStageInfoArray stages;
236   PetscSpinlock          lock;
237   PetscLogActionArray    petsc_actions;
238   PetscLogObjectArray    petsc_objects;
239   PetscBool              petsc_logActions;
240   PetscBool              petsc_logObjects;
241   int                    petsc_numObjectsCreated;
242   int                    petsc_numObjectsDestroyed;
243   PetscHMapEvent         eventInfoMap_th;
244   int                    pause_depth;
245 };
246 
247 /* --- PetscLogHandler_Default --- */
248 
249 static PetscErrorCode PetscLogHandlerContextCreate_Default(PetscLogHandler_Default *def_p)
250 {
251   PetscLogHandler_Default def;
252 
253   PetscFunctionBegin;
254   PetscCall(PetscNew(def_p));
255   def = *def_p;
256   PetscCall(PetscLogStageInfoArrayCreate(8, &(def->stages)));
257   PetscCall(PetscLogActionArrayCreate(64, &(def->petsc_actions)));
258   PetscCall(PetscLogObjectArrayCreate(64, &(def->petsc_objects)));
259 
260   PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_actions", &def->petsc_logActions, NULL));
261   PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_objects", &def->petsc_logObjects, NULL));
262   if (PetscDefined(HAVE_THREADSAFETY)) { PetscCall(PetscHMapEventCreate(&def->eventInfoMap_th)); }
263   PetscFunctionReturn(PETSC_SUCCESS);
264 }
265 
266 static PetscErrorCode PetscLogHandlerDestroy_Default(PetscLogHandler h)
267 {
268   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
269 
270   PetscFunctionBegin;
271   PetscCall(PetscLogStageInfoArrayDestroy(&def->stages));
272   PetscCall(PetscLogActionArrayDestroy(&def->petsc_actions));
273   PetscCall(PetscLogObjectArrayDestroy(&def->petsc_objects));
274   if (def->eventInfoMap_th) {
275     PetscEventPerfInfo **array;
276     PetscInt             n, off = 0;
277 
278     PetscCall(PetscHMapEventGetSize(def->eventInfoMap_th, &n));
279     PetscCall(PetscMalloc1(n, &array));
280     PetscCall(PetscHMapEventGetVals(def->eventInfoMap_th, &off, array));
281     for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i]));
282     PetscCall(PetscFree(array));
283     PetscCall(PetscHMapEventDestroy(&def->eventInfoMap_th));
284   }
285   PetscCall(PetscFree(def));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode PetscLogHandlerDefaultGetStageInfo(PetscLogHandler handler, PetscLogStage stage, PetscStagePerf **stage_info_p)
290 {
291   PetscStagePerf         *stage_info = NULL;
292   PetscLogHandler_Default def        = (PetscLogHandler_Default)handler->data;
293 
294   PetscFunctionBegin;
295   PetscCall(PetscLogStageInfoArrayResize(def->stages, stage + 1));
296   PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info));
297   *stage_info_p = stage_info;
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultGetEventPerfInfo(PetscLogHandler handler, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **event_info_p)
302 {
303   PetscEventPerfInfo    *event_info = NULL;
304   PetscStagePerf        *stage_info = NULL;
305   PetscLogEventPerfArray event_log;
306 
307   PetscFunctionBegin;
308   if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(handler->state, &stage));
309   PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info));
310   event_log = stage_info->eventLog;
311   PetscCall(PetscLogEventPerfArrayResize(event_log, event + 1));
312   PetscCall(PetscLogEventPerfArrayGetRef(event_log, event, &event_info));
313   event_info->id = event;
314   *event_info_p  = event_info;
315   PetscFunctionReturn(PETSC_SUCCESS);
316 }
317 
318 static PetscErrorCode PetscLogHandlerDefaultGetClassPerf(PetscLogHandler handler, PetscLogStage stage, PetscLogClass clss, PetscClassPerf **class_info)
319 {
320   PetscLogClassPerfArray class_log;
321   PetscStagePerf        *stage_info;
322 
323   PetscFunctionBegin;
324   PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info));
325   class_log = stage_info->classLog;
326   PetscCall(PetscLogClassPerfArrayResize(class_log, clss + 1));
327   PetscCall(PetscLogClassPerfArrayGetRef(class_log, clss, class_info));
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
331 static PetscErrorCode PetscLogHandlerObjectCreate_Default(PetscLogHandler h, PetscObject obj)
332 {
333   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
334   PetscLogState           state;
335   PetscLogStage           stage;
336   PetscClassPerf         *classInfo;
337   int                     oclass = 0;
338 
339   PetscFunctionBegin;
340   PetscCall(PetscLogHandlerGetState(h, &state));
341   PetscCall(PetscSpinlockLock(&def->lock));
342   /* Record stage info */
343   PetscCall(PetscLogStateGetCurrentStage(state, &stage));
344   PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass));
345   PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo));
346   classInfo->creations++;
347   /* Record the creation action */
348   if (def->petsc_logActions) {
349     Action new_action;
350 
351     PetscCall(PetscTime(&new_action.time));
352     new_action.time -= petsc_BaseTime;
353     new_action.action  = PETSC_LOG_ACTION_CREATE;
354     new_action.event   = -1;
355     new_action.classid = obj->classid;
356     new_action.id1     = obj->id;
357     new_action.id2     = -1;
358     new_action.id3     = -1;
359     new_action.flops   = petsc_TotalFlops;
360     PetscCall(PetscMallocGetCurrentUsage(&new_action.mem));
361     PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem));
362     PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action));
363   }
364   /* We don't just use obj->id to count all objects that are created
365      because PetscLogHandlers are objects and PetscLogObjectDestroy() will not
366      be called for them: the number of objects created and destroyed as counted
367      here and below would have an imbalance */
368   def->petsc_numObjectsCreated++;
369   /* Record the object */
370   if (def->petsc_logObjects) {
371     Object new_object;
372 
373     new_object.parent = -1;
374     new_object.obj    = obj;
375     new_object.mem    = 0;
376 
377     PetscCall(PetscMemzero(new_object.name, sizeof(new_object.name)));
378     PetscCall(PetscMemzero(new_object.info, sizeof(new_object.info)));
379     PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1");
380     PetscCall(PetscLogObjectArrayResize(def->petsc_objects, obj->id));
381     PetscCall(PetscLogObjectArraySet(def->petsc_objects, obj->id - 1, new_object));
382   }
383   PetscCall(PetscSpinlockUnlock(&def->lock));
384   PetscFunctionReturn(PETSC_SUCCESS);
385 }
386 
387 static PetscErrorCode PetscLogHandlerObjectDestroy_Default(PetscLogHandler h, PetscObject obj)
388 {
389   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
390   PetscLogState           state;
391   PetscLogStage           stage;
392   PetscClassPerf         *classInfo;
393   int                     oclass = 0;
394 
395   PetscFunctionBegin;
396   PetscCall(PetscLogHandlerGetState(h, &state));
397   /* Record stage info */
398   PetscCall(PetscSpinlockLock(&def->lock));
399   PetscCall(PetscLogStateGetCurrentStage(state, &stage));
400   if (stage >= 0) {
401     /* stage < 0 can happen if the log summary is output before some things are destroyed */
402     PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass));
403     PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo));
404     classInfo->destructions++;
405   }
406   /* Cannot Credit all ancestors with your memory because they may have already been destroyed*/
407   def->petsc_numObjectsDestroyed++;
408   /* Dynamically enlarge logging structures */
409   /* Record the destruction action */
410   if (def->petsc_logActions) {
411     Action new_action;
412 
413     PetscCall(PetscTime(&new_action.time));
414     new_action.time -= petsc_BaseTime;
415     new_action.event   = -1;
416     new_action.action  = PETSC_LOG_ACTION_DESTROY;
417     new_action.classid = obj->classid;
418     new_action.id1     = obj->id;
419     new_action.id2     = -1;
420     new_action.id3     = -1;
421     new_action.flops   = petsc_TotalFlops;
422     PetscCall(PetscMallocGetCurrentUsage(&new_action.mem));
423     PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem));
424     PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action));
425   }
426   if (def->petsc_logObjects) {
427     Object *obj_entry = NULL;
428 
429     PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1");
430     PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, obj->id - 1, &obj_entry));
431     if (obj->name) PetscCall(PetscStrncpy(obj_entry->name, obj->name, 64));
432     obj_entry->obj = NULL;
433   }
434   PetscCall(PetscSpinlockUnlock(&def->lock));
435   PetscFunctionReturn(PETSC_SUCCESS);
436 }
437 
438 static PetscErrorCode PetscLogHandlerEventSync_Default(PetscLogHandler h, PetscLogEvent event, MPI_Comm comm)
439 {
440   PetscLogState       state;
441   PetscLogEventInfo   event_info;
442   PetscEventPerfInfo *event_perf_info;
443   int                 stage;
444   PetscLogDouble      time = 0.0;
445 
446   PetscFunctionBegin;
447   if (!(PetscLogSyncOn) || comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS);
448   PetscCall(PetscLogHandlerGetState(h, &state));
449   PetscCall(PetscLogStateEventGetInfo(state, event, &event_info));
450   if (!event_info.collective) PetscFunctionReturn(PETSC_SUCCESS);
451   PetscCall(PetscLogStateGetCurrentStage(state, &stage));
452   PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info));
453   if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
454 
455   PetscCall(PetscTimeSubtract(&time));
456   PetscCallMPI(MPI_Barrier(comm));
457   PetscCall(PetscTimeAdd(&time));
458   event_perf_info->syncTime += time;
459   PetscFunctionReturn(PETSC_SUCCESS);
460 }
461 
462 static PetscErrorCode PetscLogGetStageEventPerfInfo_threaded(PetscLogHandler_Default def, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **eventInfo)
463 {
464   PetscEventPerfInfo *leventInfo = NULL;
465   PetscHashIJKKey     key;
466 
467   PetscFunctionBegin;
468 
469 #if PetscDefined(HAVE_THREADSAFETY)
470   key.i = PetscLogGetTid();
471 #else
472   key.i = 0;
473 #endif
474   key.j = stage;
475   key.k = event;
476   PetscCall(PetscSpinlockLock(&def->lock));
477   PetscCall(PetscHMapEventGet(def->eventInfoMap_th, key, &leventInfo));
478   if (!leventInfo) {
479     PetscCall(PetscNew(&leventInfo));
480     leventInfo->id = event;
481     PetscCall(PetscHMapEventSet(def->eventInfoMap_th, key, leventInfo));
482   }
483   PetscCall(PetscSpinlockUnlock(&def->lock));
484   *eventInfo = leventInfo;
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 #if defined(PETSC_HAVE_CUDA)
489   #include <nvToolsExt.h>
490 #endif
491 static PetscErrorCode PetscLogHandlerEventBegin_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
492 {
493   PetscLogHandler_Default def             = (PetscLogHandler_Default)h->data;
494   PetscEventPerfInfo     *event_perf_info = NULL;
495   PetscLogEventInfo       event_info;
496   PetscLogDouble          time;
497   PetscLogState           state;
498   PetscLogStage           stage;
499 
500   PetscFunctionBegin;
501   PetscCall(PetscLogHandlerGetState(h, &state));
502   /* Synchronization */
503   PetscCall(PetscLogHandlerEventSync_Default(h, event, PetscObjectComm(o1)));
504   PetscCall(PetscLogStateGetCurrentStage(state, &stage));
505   if (def->pause_depth > 0) stage = 0; // in pause-mode, all events run on the main stage
506   if (PetscDefined(HAVE_THREADSAFETY)) {
507     PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info));
508     if (event_perf_info->depth == 0) { PetscCall(PetscEventPerfInfoInit(event_perf_info)); }
509   } else {
510     PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info));
511   }
512   PetscCheck(event_perf_info->depth >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to begin a paused event, this is not allowed");
513   event_perf_info->depth++;
514   /* Check for double counting */
515   if (event_perf_info->depth > 1) PetscFunctionReturn(PETSC_SUCCESS);
516 #if defined(PETSC_HAVE_CUDA)
517   if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {
518     PetscLogEventInfo event_reg_info;
519 
520     PetscCall(PetscLogStateEventGetInfo(state, event, &event_reg_info));
521     nvtxRangePushA(event_reg_info.name);
522   }
523 #endif
524   PetscCall(PetscLogStateEventGetInfo(state, event, &event_info));
525   /* Log the performance info */
526   event_perf_info->count++;
527   PetscCall(PetscTime(&time));
528   PetscCall(PetscEventPerfInfoTic(event_perf_info, time, PetscLogMemory, (int)event));
529   if (def->petsc_logActions) {
530     PetscLogDouble curTime;
531     Action         new_action;
532 
533     PetscCall(PetscTime(&curTime));
534     new_action.time    = curTime - petsc_BaseTime;
535     new_action.action  = PETSC_LOG_ACTION_BEGIN;
536     new_action.event   = event;
537     new_action.classid = event_info.classid;
538     new_action.id1     = o1 ? o1->id : -1;
539     new_action.id2     = o2 ? o2->id : -1;
540     new_action.id3     = o3 ? o3->id : -1;
541     new_action.flops   = petsc_TotalFlops;
542     PetscCall(PetscMallocGetCurrentUsage(&new_action.mem));
543     PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem));
544     PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action));
545   }
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 static PetscErrorCode PetscLogHandlerEventEnd_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
550 {
551   PetscLogHandler_Default def             = (PetscLogHandler_Default)h->data;
552   PetscEventPerfInfo     *event_perf_info = NULL;
553   PetscLogDouble          time;
554   PetscLogState           state;
555   int                     stage;
556 
557   PetscFunctionBegin;
558   PetscCall(PetscLogHandlerGetState(h, &state));
559   if (def->petsc_logActions) {
560     PetscLogEventInfo event_info;
561     PetscLogDouble    curTime;
562     Action            new_action;
563 
564     PetscCall(PetscLogStateEventGetInfo(state, event, &event_info));
565     PetscCall(PetscTime(&curTime));
566     new_action.time    = curTime - petsc_BaseTime;
567     new_action.action  = PETSC_LOG_ACTION_END;
568     new_action.event   = event;
569     new_action.classid = event_info.classid;
570     new_action.id1     = o1 ? o1->id : -1;
571     new_action.id2     = o2 ? o2->id : -2;
572     new_action.id3     = o3 ? o3->id : -3;
573     new_action.flops   = petsc_TotalFlops;
574     PetscCall(PetscMallocGetCurrentUsage(&new_action.mem));
575     PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem));
576     PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action));
577   }
578   PetscCall(PetscLogStateGetCurrentStage(state, &stage));
579   if (def->pause_depth > 0) stage = 0; // all events run on the main stage in pause-mode
580   if (PetscDefined(HAVE_THREADSAFETY)) {
581     PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info));
582   } else {
583     PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info));
584   }
585   PetscCheck(event_perf_info->depth > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to end paused event, not allowed");
586   event_perf_info->depth--;
587   /* Check for double counting */
588   if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
589   else PetscCheck(event_perf_info->depth == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Logging event had unbalanced begin/end pairs");
590 
591   /* Log performance info */
592   PetscCall(PetscTime(&time));
593   PetscCall(PetscEventPerfInfoToc(event_perf_info, time, PetscLogMemory, (int)event));
594   if (PetscDefined(HAVE_THREADSAFETY)) {
595     PetscEventPerfInfo *event_perf_info_global;
596     PetscCall(PetscSpinlockLock(&def->lock));
597     PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info_global));
598     PetscCall(PetscEventPerfInfoAdd_Internal(event_perf_info, event_perf_info_global));
599     PetscCall(PetscSpinlockUnlock(&def->lock));
600   }
601 #if defined(PETSC_HAVE_CUDA)
602   if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) nvtxRangePop();
603 #endif
604   PetscFunctionReturn(PETSC_SUCCESS);
605 }
606 
607 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultDeactivatePush(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event)
608 {
609   PetscEventPerfInfo *event_perf_info;
610 
611   PetscFunctionBegin;
612   if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage));
613   PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info));
614   event_perf_info->depth++;
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultDeactivatePop(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event)
619 {
620   PetscEventPerfInfo *event_perf_info;
621 
622   PetscFunctionBegin;
623   if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage));
624   PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info));
625   event_perf_info->depth--;
626   PetscFunctionReturn(PETSC_SUCCESS);
627 }
628 
629 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultEventsPause(PetscLogHandler h)
630 {
631   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
632   PetscLogDouble          time;
633   PetscInt                num_stages;
634 
635   PetscFunctionBegin;
636   if (def->pause_depth++ > 0) PetscFunctionReturn(PETSC_SUCCESS);
637   PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL));
638   PetscCall(PetscTime(&time));
639   for (PetscInt stage = 0; stage < num_stages; stage++) {
640     PetscStagePerf *stage_info = NULL;
641     PetscInt        num_events;
642 
643     PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info));
644     PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL));
645     for (PetscInt event = 0; event < num_events; event++) {
646       PetscEventPerfInfo *event_info = NULL;
647       PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info));
648       if (event_info->depth > 0) {
649         event_info->depth *= -1;
650         PetscCall(PetscEventPerfInfoPause(event_info, time, PetscLogMemory, event));
651       }
652     }
653     if (stage > 0 && stage_info->perfInfo.depth > 0) {
654       stage_info->perfInfo.depth *= -1;
655       PetscCall(PetscEventPerfInfoPause(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2)));
656     }
657   }
658   PetscFunctionReturn(PETSC_SUCCESS);
659 }
660 
661 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultEventsResume(PetscLogHandler h)
662 {
663   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
664   PetscLogDouble          time;
665   PetscInt                num_stages;
666 
667   PetscFunctionBegin;
668   if (--def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
669   PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL));
670   PetscCall(PetscTime(&time));
671   for (PetscInt stage = 0; stage < num_stages; stage++) {
672     PetscStagePerf *stage_info = NULL;
673     PetscInt        num_events;
674 
675     PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info));
676     PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL));
677     for (PetscInt event = 0; event < num_events; event++) {
678       PetscEventPerfInfo *event_info = NULL;
679       PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info));
680       if (event_info->depth < 0) {
681         event_info->depth *= -1;
682         PetscCall(PetscEventPerfInfoResume(event_info, time, PetscLogMemory, event));
683       }
684     }
685     if (stage > 0 && stage_info->perfInfo.depth < 0) {
686       stage_info->perfInfo.depth *= -1;
687       PetscCall(PetscEventPerfInfoResume(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2)));
688     }
689   }
690   PetscFunctionReturn(PETSC_SUCCESS);
691 }
692 
693 static PetscErrorCode PetscLogHandlerStagePush_Default(PetscLogHandler h, PetscLogStage new_stage)
694 {
695   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
696   PetscLogDouble          time;
697   PetscLogState           state;
698   PetscLogStage           current_stage;
699   PetscStagePerf         *new_stage_info;
700 
701   PetscFunctionBegin;
702   if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
703   PetscCall(PetscLogHandlerGetState(h, &state));
704   current_stage = state->current_stage;
705   PetscCall(PetscLogHandlerDefaultGetStageInfo(h, new_stage, &new_stage_info));
706   PetscCall(PetscTime(&time));
707 
708   /* Record flops/time of previous stage */
709   if (current_stage >= 0) {
710     if (PetscBTLookup(state->active, current_stage)) {
711       PetscStagePerf *current_stage_info;
712       PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, &current_stage_info));
713       PetscCall(PetscEventPerfInfoToc(&current_stage_info->perfInfo, time, PetscLogMemory, (int)-(current_stage + 2)));
714     }
715   }
716   new_stage_info->used = PETSC_TRUE;
717   new_stage_info->perfInfo.count++;
718   new_stage_info->perfInfo.depth++;
719   /* Subtract current quantities so that we obtain the difference when we pop */
720   if (PetscBTLookup(state->active, new_stage)) PetscCall(PetscEventPerfInfoTic(&new_stage_info->perfInfo, time, PetscLogMemory, (int)-(new_stage + 2)));
721   PetscFunctionReturn(PETSC_SUCCESS);
722 }
723 
724 static PetscErrorCode PetscLogHandlerStagePop_Default(PetscLogHandler h, PetscLogStage old_stage)
725 {
726   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
727   PetscLogStage           current_stage;
728   PetscStagePerf         *old_stage_info;
729   PetscLogState           state;
730   PetscLogDouble          time;
731 
732   PetscFunctionBegin;
733   if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
734   PetscCall(PetscLogHandlerGetState(h, &state));
735   current_stage = state->current_stage;
736   PetscCall(PetscLogHandlerDefaultGetStageInfo(h, old_stage, &old_stage_info));
737   PetscCall(PetscTime(&time));
738   old_stage_info->perfInfo.depth--;
739   if (PetscBTLookup(state->active, old_stage)) { PetscCall(PetscEventPerfInfoToc(&old_stage_info->perfInfo, time, PetscLogMemory, (int)-(old_stage + 2))); }
740   if (current_stage >= 0) {
741     if (PetscBTLookup(state->active, current_stage)) {
742       PetscStagePerf *current_stage_info;
743       PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, &current_stage_info));
744       PetscCall(PetscEventPerfInfoTic(&current_stage_info->perfInfo, time, PetscLogMemory, (int)-(current_stage + 2)));
745     }
746   }
747   PetscFunctionReturn(PETSC_SUCCESS);
748 }
749 
750 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultStageSetVisible(PetscLogHandler h, PetscLogStage stage, PetscBool is_visible)
751 {
752   PetscStagePerf *stage_info;
753 
754   PetscFunctionBegin;
755   if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage));
756   PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info));
757   stage_info->perfInfo.visible = is_visible;
758   PetscFunctionReturn(PETSC_SUCCESS);
759 }
760 
761 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultStageGetVisible(PetscLogHandler h, PetscLogStage stage, PetscBool *is_visible)
762 {
763   PetscStagePerf *stage_info;
764 
765   PetscFunctionBegin;
766   if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage));
767   PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info));
768   *is_visible = stage_info->perfInfo.visible;
769   PetscFunctionReturn(PETSC_SUCCESS);
770 }
771 
772 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultSetLogActions(PetscLogHandler handler, PetscBool flag)
773 {
774   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
775 
776   PetscFunctionBegin;
777   def->petsc_logActions = flag;
778   PetscFunctionReturn(PETSC_SUCCESS);
779 }
780 
781 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultSetLogObjects(PetscLogHandler handler, PetscBool flag)
782 {
783   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
784 
785   PetscFunctionBegin;
786   def->petsc_logObjects = flag;
787   PetscFunctionReturn(PETSC_SUCCESS);
788 }
789 
790 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultLogObjectState(PetscLogHandler handler, PetscObject obj, const char format[], va_list Argp)
791 {
792   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
793   size_t                  fullLength;
794 
795   PetscFunctionBegin;
796   if (def->petsc_logObjects) {
797     Object *obj_entry = NULL;
798 
799     PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, obj->id - 1, &obj_entry));
800     PetscCall(PetscVSNPrintf(obj_entry->info, 64, format, &fullLength, Argp));
801   }
802   PetscFunctionReturn(PETSC_SUCCESS);
803 }
804 
805 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultGetNumObjects(PetscLogHandler handler, PetscInt *num_objects)
806 {
807   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
808 
809   PetscFunctionBegin;
810   PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, num_objects, NULL));
811   PetscFunctionReturn(PETSC_SUCCESS);
812 }
813 
814 PETSC_INTERN PetscErrorCode PetscLogHandlerDump_Default(PetscLogHandler handler, const char sname[])
815 {
816   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
817   FILE                   *fd;
818   char                    file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN];
819   PetscLogDouble          flops, _TotalTime;
820   PetscMPIInt             rank;
821   int                     curStage;
822   PetscLogState           state;
823   PetscInt                num_events;
824   PetscLogEvent           event;
825 
826   PetscFunctionBegin;
827   /* Calculate the total elapsed time */
828   PetscCall(PetscTime(&_TotalTime));
829   _TotalTime -= petsc_BaseTime;
830   /* Open log file */
831   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)handler), &rank));
832   PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank));
833   PetscCall(PetscFixFilename(file, fname));
834   PetscCall(PetscFOpen(PETSC_COMM_SELF, fname, "w", &fd));
835   PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname);
836   /* Output totals */
837   PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime));
838   PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Clock Resolution %g\n", 0.0));
839   /* Output actions */
840   if (def->petsc_logActions) {
841     PetscInt num_actions;
842     PetscCall(PetscLogActionArrayGetSize(def->petsc_actions, &num_actions, NULL));
843     PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Actions accomplished %d\n", (int)num_actions));
844     for (int a = 0; a < num_actions; a++) {
845       Action *action;
846 
847       PetscCall(PetscLogActionArrayGetRef(def->petsc_actions, a, &action));
848       PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g %d %d %d %d %d %d %g %g %g\n", action->time, action->action, (int)action->event, (int)action->classid, action->id1, action->id2, action->id3, action->flops, action->mem, action->maxmem));
849     }
850   }
851   /* Output objects */
852   if (def->petsc_logObjects) {
853     PetscInt num_objects;
854 
855     PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL));
856     PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Objects created %d destroyed %d\n", def->petsc_numObjectsCreated, def->petsc_numObjectsDestroyed));
857     for (int o = 0; o < num_objects; o++) {
858       Object *object = NULL;
859 
860       PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, o, &object));
861       if (object->parent != -1) continue; // object with this id wasn't logged, probably a PetscLogHandler
862       PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Parent ID: %d Memory: %d\n", object->parent, (int)object->mem));
863       if (!object->name[0]) {
864         PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Name\n"));
865       } else {
866         PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Name: %s\n", object->name));
867       }
868       if (!object->info[0]) {
869         PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Info\n"));
870       } else {
871         PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Info: %s\n", object->info));
872       }
873     }
874   }
875   /* Output events */
876   PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Event log:\n"));
877   PetscCall(PetscLogHandlerGetState(handler, &state));
878   PetscCall(PetscLogStateGetNumEvents(state, &num_events));
879   PetscCall(PetscLogStateGetCurrentStage(state, &curStage));
880   for (event = 0; event < num_events; event++) {
881     PetscEventPerfInfo *event_info;
882 
883     PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, curStage, event, &event_info));
884     if (event_info->time != 0.0) flops = event_info->flops / event_info->time;
885     else flops = 0.0;
886     PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%d %16d %16g %16g %16g\n", event, event_info->count, event_info->flops, event_info->time, flops));
887   }
888   PetscCall(PetscFClose(PETSC_COMM_SELF, fd));
889   PetscFunctionReturn(PETSC_SUCCESS);
890 }
891 
892 /*
893   PetscLogView_Detailed - Each process prints the times for its own events
894 
895 */
896 static PetscErrorCode PetscLogHandlerView_Default_Detailed(PetscLogHandler handler, PetscViewer viewer)
897 {
898   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
899   PetscLogDouble          locTotalTime, numRed, maxMem;
900   PetscInt                numStages, numEvents;
901   MPI_Comm                comm = PetscObjectComm((PetscObject)viewer);
902   PetscMPIInt             rank, size;
903   PetscLogGlobalNames     global_stages, global_events;
904   PetscLogState           state;
905   PetscEventPerfInfo      zero_info;
906 
907   PetscFunctionBegin;
908   PetscCall(PetscLogHandlerGetState(handler, &state));
909   PetscCallMPI(MPI_Comm_size(comm, &size));
910   PetscCallMPI(MPI_Comm_rank(comm, &rank));
911   /* Must preserve reduction count before we go on */
912   numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
913   /* Get the total elapsed time */
914   PetscCall(PetscTime(&locTotalTime));
915   locTotalTime -= petsc_BaseTime;
916   PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size));
917   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n"));
918   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n"));
919   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n"));
920   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n"));
921   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n"));
922   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n"));
923   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n"));
924   PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages));
925   PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events));
926   PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages));
927   PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents));
928   PetscCall(PetscMemzero(&zero_info, sizeof(zero_info)));
929   PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n"));
930   for (PetscInt stage = 0; stage < numStages; stage++) {
931     PetscInt    stage_id;
932     const char *stage_name;
933 
934     PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
935     PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
936     PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stage_name));
937     PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stage_name));
938     for (PetscInt event = 0; event < numEvents; event++) {
939       PetscEventPerfInfo *eventInfo = &zero_info;
940       PetscBool           is_zero   = PETSC_FALSE;
941       PetscInt            event_id;
942       const char         *event_name;
943 
944       PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id));
945       PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name));
946       if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &eventInfo));
947       is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE;
948       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm));
949       if (!is_zero) { PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stage_name, event_name)); }
950     }
951   }
952   PetscCall(PetscMallocGetMaximumUsage(&maxMem));
953   PetscCall(PetscViewerASCIIPushSynchronized(viewer));
954   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime));
955   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct)));
956   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len)));
957   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed));
958   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops));
959   {
960     PetscInt num_objects;
961 
962     PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL));
963     PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, (int)num_objects));
964   }
965   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem));
966   PetscCall(PetscViewerFlush(viewer));
967   for (PetscInt stage = 0; stage < numStages; stage++) {
968     PetscEventPerfInfo *stage_perf_info = &zero_info;
969     PetscInt            stage_id;
970     const char         *stage_name;
971 
972     PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
973     PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
974     if (stage_id >= 0) {
975       PetscStagePerf *stage_info;
976       PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info));
977       stage_perf_info = &stage_info->perfInfo;
978     }
979     PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stage_name, rank, stage_perf_info->time,
980                                                  stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops));
981     for (PetscInt event = 0; event < numEvents; event++) {
982       PetscEventPerfInfo *eventInfo = &zero_info;
983       PetscBool           is_zero   = PETSC_FALSE;
984       PetscInt            event_id;
985       const char         *event_name;
986 
987       PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id));
988       PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name));
989       if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &eventInfo));
990       is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE;
991       PetscCall(PetscMemcmp(eventInfo, &zero_info, sizeof(zero_info), &is_zero));
992       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm));
993       if (!is_zero) {
994         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", stage_name, event_name, rank,
995                                                      eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops));
996         if (eventInfo->dof[0] >= 0.) {
997           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : ["));
998           for (PetscInt d = 0; d < 8; ++d) {
999             if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", "));
1000             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d]));
1001           }
1002           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]"));
1003           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : ["));
1004           for (PetscInt e = 0; e < 8; ++e) {
1005             if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", "));
1006             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e]));
1007           }
1008           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]"));
1009         }
1010       }
1011       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n"));
1012     }
1013   }
1014   PetscCall(PetscViewerFlush(viewer));
1015   PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1016   PetscCall(PetscLogGlobalNamesDestroy(&global_events));
1017   PetscCall(PetscLogGlobalNamesDestroy(&global_stages));
1018   PetscFunctionReturn(PETSC_SUCCESS);
1019 }
1020 
1021 /*
1022   PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format
1023 */
1024 static PetscErrorCode PetscLogHandlerView_Default_CSV(PetscLogHandler handler, PetscViewer viewer)
1025 {
1026   PetscLogDouble      locTotalTime, maxMem;
1027   PetscInt            numStages, numEvents, stage, event;
1028   MPI_Comm            comm = PetscObjectComm((PetscObject)viewer);
1029   PetscMPIInt         rank, size;
1030   PetscLogGlobalNames global_stages, global_events;
1031   PetscLogState       state;
1032   PetscEventPerfInfo  zero_info;
1033 
1034   PetscFunctionBegin;
1035   PetscCall(PetscLogHandlerGetState(handler, &state));
1036   PetscCallMPI(MPI_Comm_size(comm, &size));
1037   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1038   /* Must preserve reduction count before we go on */
1039   /* Get the total elapsed time */
1040   PetscCall(PetscTime(&locTotalTime));
1041   locTotalTime -= petsc_BaseTime;
1042   PetscCall(PetscMallocGetMaximumUsage(&maxMem));
1043   PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages));
1044   PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events));
1045   PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages));
1046   PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents));
1047   PetscCall(PetscMemzero(&zero_info, sizeof(zero_info)));
1048   PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1049   PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size));
1050   PetscCall(PetscViewerFlush(viewer));
1051   for (stage = 0; stage < numStages; stage++) {
1052     PetscEventPerfInfo *stage_perf_info;
1053     PetscInt            stage_id;
1054     const char         *stage_name;
1055 
1056     PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1057     PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1058     stage_perf_info = &zero_info;
1059     if (stage_id >= 0) {
1060       PetscStagePerf *stage_info;
1061       PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info));
1062       stage_perf_info = &stage_info->perfInfo;
1063     }
1064     PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stage_name, rank, stage_perf_info->time, stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops));
1065     for (event = 0; event < numEvents; event++) {
1066       PetscEventPerfInfo *eventInfo = &zero_info;
1067       PetscBool           is_zero   = PETSC_FALSE;
1068       PetscInt            event_id;
1069       const char         *event_name;
1070 
1071       PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id));
1072       PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name));
1073       if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &eventInfo));
1074       PetscCall(PetscMemcmp(eventInfo, &zero_info, sizeof(zero_info), &is_zero));
1075       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm));
1076       if (!is_zero) {
1077         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stage_name, event_name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops));
1078         if (eventInfo->dof[0] >= 0.) {
1079           for (PetscInt d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d]));
1080           for (PetscInt e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e]));
1081         }
1082       }
1083       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n"));
1084     }
1085   }
1086   PetscCall(PetscViewerFlush(viewer));
1087   PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1088   PetscCall(PetscLogGlobalNamesDestroy(&global_stages));
1089   PetscCall(PetscLogGlobalNamesDestroy(&global_events));
1090   PetscFunctionReturn(PETSC_SUCCESS);
1091 }
1092 
1093 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd)
1094 {
1095   PetscFunctionBegin;
1096   if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS);
1097   PetscCall(PetscFPrintf(comm, fd, "\n\n"));
1098   PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n"));
1099   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1100   PetscCall(PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n"));
1101   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1102   PetscCall(PetscFPrintf(comm, fd, "      #   This program was run with logging synchronization.   #\n"));
1103   PetscCall(PetscFPrintf(comm, fd, "      #   This option provides more meaningful imbalance       #\n"));
1104   PetscCall(PetscFPrintf(comm, fd, "      #   figures at the expense of slowing things down and    #\n"));
1105   PetscCall(PetscFPrintf(comm, fd, "      #   providing a distorted view of the overall runtime.   #\n"));
1106   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1107   PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n\n\n"));
1108   PetscFunctionReturn(PETSC_SUCCESS);
1109 }
1110 
1111 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd)
1112 {
1113   PetscFunctionBegin;
1114   if (PetscDefined(USE_DEBUG)) {
1115     PetscCall(PetscFPrintf(comm, fd, "\n\n"));
1116     PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n"));
1117     PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1118     PetscCall(PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n"));
1119     PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1120     PetscCall(PetscFPrintf(comm, fd, "      #   This code was compiled with a debugging option.      #\n"));
1121     PetscCall(PetscFPrintf(comm, fd, "      #   To get timing results run ./configure                #\n"));
1122     PetscCall(PetscFPrintf(comm, fd, "      #   using --with-debugging=no, the performance will      #\n"));
1123     PetscCall(PetscFPrintf(comm, fd, "      #   be generally two or three times faster.              #\n"));
1124     PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1125     PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n\n\n"));
1126   }
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd)
1131 {
1132 #if defined(PETSC_HAVE_DEVICE)
1133   PetscMPIInt size;
1134   PetscBool   deviceInitialized = PETSC_FALSE;
1135 
1136   PetscFunctionBegin;
1137   PetscCallMPI(MPI_Comm_size(comm, &size));
1138   for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) {
1139     const PetscDeviceType dtype = PetscDeviceTypeCast(i);
1140     if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */
1141       deviceInitialized = PETSC_TRUE;
1142       break;
1143     }
1144   }
1145   /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */
1146   if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS);
1147   PetscCall(PetscFPrintf(comm, fd, "\n\n"));
1148   PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n"));
1149   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1150   PetscCall(PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n"));
1151   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1152   PetscCall(PetscFPrintf(comm, fd, "      #   This code was compiled with GPU support and you've   #\n"));
1153   PetscCall(PetscFPrintf(comm, fd, "      #   created PETSc/GPU objects, but you intentionally     #\n"));
1154   PetscCall(PetscFPrintf(comm, fd, "      #   used -use_gpu_aware_mpi 0, requiring PETSc to copy   #\n"));
1155   PetscCall(PetscFPrintf(comm, fd, "      #   additional data between the GPU and CPU. To obtain   #\n"));
1156   PetscCall(PetscFPrintf(comm, fd, "      #   meaningful timing results on multi-rank runs, use    #\n"));
1157   PetscCall(PetscFPrintf(comm, fd, "      #   GPU-aware MPI instead.                               #\n"));
1158   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1159   PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n\n\n"));
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 #else
1162   return PETSC_SUCCESS;
1163 #endif
1164 }
1165 
1166 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd)
1167 {
1168 #if defined(PETSC_HAVE_DEVICE)
1169 
1170   PetscFunctionBegin;
1171   if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS);
1172   PetscCall(PetscFPrintf(comm, fd, "\n\n"));
1173   PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n"));
1174   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1175   PetscCall(PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n"));
1176   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1177   PetscCall(PetscFPrintf(comm, fd, "      #   This code was run with -log_view_gpu_time            #\n"));
1178   PetscCall(PetscFPrintf(comm, fd, "      #   This provides accurate timing within the GPU kernels #\n"));
1179   PetscCall(PetscFPrintf(comm, fd, "      #   but can slow down the entire computation by a        #\n"));
1180   PetscCall(PetscFPrintf(comm, fd, "      #   measurable amount. For fastest runs we recommend     #\n"));
1181   PetscCall(PetscFPrintf(comm, fd, "      #   not using this option.                               #\n"));
1182   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1183   PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n\n\n"));
1184   PetscFunctionReturn(PETSC_SUCCESS);
1185 #else
1186   return PETSC_SUCCESS;
1187 #endif
1188 }
1189 
1190 static PetscErrorCode PetscLogHandlerView_Default_Info(PetscLogHandler handler, PetscViewer viewer)
1191 {
1192   FILE                   *fd;
1193   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
1194   char                    arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
1195   PetscLogDouble          locTotalTime, TotalTime, TotalFlops;
1196   PetscLogDouble          numMessages, messageLength, avgMessLen, numReductions;
1197   PetscLogDouble          stageTime, flops, flopr, mem, mess, messLen, red;
1198   PetscLogDouble          fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed;
1199   PetscLogDouble          fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed;
1200   PetscLogDouble          min, max, tot, ratio, avg, x, y;
1201   PetscLogDouble          minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax;
1202 #if defined(PETSC_HAVE_DEVICE)
1203   PetscLogEvent  KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */
1204   PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops;
1205 #endif
1206   PetscMPIInt   minC, maxC;
1207   PetscMPIInt   size, rank;
1208   PetscBool    *localStageUsed, *stageUsed;
1209   PetscBool    *localStageVisible, *stageVisible;
1210   PetscInt      numStages, numEvents;
1211   int           stage, oclass;
1212   PetscLogEvent event;
1213   char          version[256];
1214   MPI_Comm      comm;
1215 #if defined(PETSC_HAVE_DEVICE)
1216   PetscInt64 nas = 0x7FF0000000000002;
1217 #endif
1218   PetscLogGlobalNames global_stages, global_events;
1219   PetscEventPerfInfo  zero_info;
1220   PetscLogState       state;
1221 
1222   PetscFunctionBegin;
1223   PetscCall(PetscLogHandlerGetState(handler, &state));
1224   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1225   PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
1226   PetscCall(PetscViewerASCIIGetPointer(viewer, &fd));
1227   PetscCallMPI(MPI_Comm_size(comm, &size));
1228   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1229   /* Get the total elapsed time */
1230   PetscCall(PetscTime(&locTotalTime));
1231   locTotalTime -= petsc_BaseTime;
1232 
1233   PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n"));
1234   PetscCall(PetscFPrintf(comm, fd, "***                                WIDEN YOUR WINDOW TO 160 CHARACTERS.  Use 'enscript -r -fCourier9' to print this document                                 ***\n"));
1235   PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n"));
1236   PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n"));
1237   PetscCall(PetscLogViewWarnSync(comm, fd));
1238   PetscCall(PetscLogViewWarnDebugging(comm, fd));
1239   PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd));
1240   PetscCall(PetscLogViewWarnGpuTime(comm, fd));
1241   PetscCall(PetscGetArchType(arch, sizeof(arch)));
1242   PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
1243   PetscCall(PetscGetUserName(username, sizeof(username)));
1244   PetscCall(PetscGetProgramName(pname, sizeof(pname)));
1245   PetscCall(PetscGetDate(date, sizeof(date)));
1246   PetscCall(PetscGetVersion(version, sizeof(version)));
1247   if (size == 1) {
1248     PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date));
1249   } else {
1250     PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date));
1251   }
1252 #if defined(PETSC_HAVE_OPENMP)
1253   PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads));
1254 #endif
1255   PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version));
1256 
1257   /* Must preserve reduction count before we go on */
1258   red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
1259 
1260   /* Calculate summary information */
1261   PetscCall(PetscFPrintf(comm, fd, "\n                         Max       Max/Min     Avg       Total\n"));
1262   /*   Time */
1263   PetscCall(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1264   PetscCall(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1265   PetscCall(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1266   avg = tot / ((PetscLogDouble)size);
1267   if (min != 0.0) ratio = max / min;
1268   else ratio = 0.0;
1269   PetscCall(PetscFPrintf(comm, fd, "Time (sec):           %5.3e   %7.3f   %5.3e\n", max, ratio, avg));
1270   TotalTime = tot;
1271   /*   Objects */
1272   {
1273     PetscInt num_objects;
1274 
1275     PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL));
1276     avg = (PetscLogDouble)num_objects;
1277   }
1278   PetscCall(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1279   PetscCall(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1280   PetscCall(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1281   avg = tot / ((PetscLogDouble)size);
1282   if (min != 0.0) ratio = max / min;
1283   else ratio = 0.0;
1284   PetscCall(PetscFPrintf(comm, fd, "Objects:              %5.3e   %7.3f   %5.3e\n", max, ratio, avg));
1285   /*   Flops */
1286   PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1287   PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1288   PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1289   avg = tot / ((PetscLogDouble)size);
1290   if (min != 0.0) ratio = max / min;
1291   else ratio = 0.0;
1292   PetscCall(PetscFPrintf(comm, fd, "Flops:                %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot));
1293   TotalFlops = tot;
1294   /*   Flops/sec -- Must talk to Barry here */
1295   if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime;
1296   else flops = 0.0;
1297   PetscCall(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1298   PetscCall(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1299   PetscCall(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1300   avg = tot / ((PetscLogDouble)size);
1301   if (min != 0.0) ratio = max / min;
1302   else ratio = 0.0;
1303   PetscCall(PetscFPrintf(comm, fd, "Flops/sec:            %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot));
1304   /*   Memory */
1305   PetscCall(PetscMallocGetMaximumUsage(&mem));
1306   if (mem > 0.0) {
1307     PetscCall(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1308     PetscCall(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1309     PetscCall(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1310     avg = tot / ((PetscLogDouble)size);
1311     if (min != 0.0) ratio = max / min;
1312     else ratio = 0.0;
1313     PetscCall(PetscFPrintf(comm, fd, "Memory (bytes):       %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot));
1314   }
1315   /*   Messages */
1316   mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
1317   PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1318   PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1319   PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1320   avg = tot / ((PetscLogDouble)size);
1321   if (min != 0.0) ratio = max / min;
1322   else ratio = 0.0;
1323   PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count:        %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot));
1324   numMessages = tot;
1325   /*   Message Lengths */
1326   mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
1327   PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1328   PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1329   PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1330   if (numMessages != 0) avg = tot / numMessages;
1331   else avg = 0.0;
1332   if (min != 0.0) ratio = max / min;
1333   else ratio = 0.0;
1334   PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes):  %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot));
1335   messageLength = tot;
1336   /*   Reductions */
1337   PetscCall(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1338   PetscCall(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1339   PetscCall(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1340   if (min != 0.0) ratio = max / min;
1341   else ratio = 0.0;
1342   PetscCall(PetscFPrintf(comm, fd, "MPI Reductions:       %5.3e   %7.3f\n", max, ratio));
1343   numReductions = red; /* wrong because uses count from process zero */
1344   PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n"));
1345   PetscCall(PetscFPrintf(comm, fd, "                            e.g., VecAXPY() for real vectors of length N --> 2N flops\n"));
1346   PetscCall(PetscFPrintf(comm, fd, "                            and VecAXPY() for complex vectors of length N --> 8N flops\n"));
1347 
1348   PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages));
1349   PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events));
1350   PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages));
1351   PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents));
1352   PetscCall(PetscMemzero(&zero_info, sizeof(zero_info)));
1353   PetscCall(PetscMalloc1(numStages, &localStageUsed));
1354   PetscCall(PetscMalloc1(numStages, &stageUsed));
1355   PetscCall(PetscMalloc1(numStages, &localStageVisible));
1356   PetscCall(PetscMalloc1(numStages, &stageVisible));
1357   if (numStages > 0) {
1358     for (stage = 0; stage < numStages; stage++) {
1359       PetscInt stage_id;
1360 
1361       PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1362       if (stage_id >= 0) {
1363         PetscStagePerf *stage_info;
1364 
1365         PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info));
1366         localStageUsed[stage]    = stage_info->used;
1367         localStageVisible[stage] = stage_info->perfInfo.visible;
1368       } else {
1369         localStageUsed[stage]    = PETSC_FALSE;
1370         localStageVisible[stage] = PETSC_TRUE;
1371       }
1372     }
1373     PetscCall(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm));
1374     PetscCall(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm));
1375     for (stage = 0; stage < numStages; stage++) {
1376       if (stageUsed[stage] && stageVisible[stage]) {
1377         PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages:   ----- Time ------  ----- Flop ------  --- Messages ---  -- Message Lengths --  -- Reductions --\n"));
1378         PetscCall(PetscFPrintf(comm, fd, "                        Avg     %%Total     Avg     %%Total    Count   %%Total     Avg         %%Total    Count   %%Total\n"));
1379         break;
1380       }
1381     }
1382     for (stage = 0; stage < numStages; stage++) {
1383       PetscInt            stage_id;
1384       PetscEventPerfInfo *stage_info;
1385       const char         *stage_name;
1386 
1387       if (!(stageUsed[stage] && stageVisible[stage])) continue;
1388       PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1389       PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1390       stage_info = &zero_info;
1391       if (localStageUsed[stage]) {
1392         PetscStagePerf *stage_perf_info;
1393 
1394         PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_perf_info));
1395         stage_info = &stage_perf_info->perfInfo;
1396       }
1397       PetscCall(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1398       PetscCall(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1399       PetscCall(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1400       PetscCall(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1401       PetscCall(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1402       mess *= 0.5;
1403       messLen *= 0.5;
1404       red /= size;
1405       if (TotalTime != 0.0) fracTime = stageTime / TotalTime;
1406       else fracTime = 0.0;
1407       if (TotalFlops != 0.0) fracFlops = flops / TotalFlops;
1408       else fracFlops = 0.0;
1409       /* Talk to Barry if (stageTime     != 0.0) flops          = (size*flops)/stageTime; else flops          = 0.0; */
1410       if (numMessages != 0.0) fracMessages = mess / numMessages;
1411       else fracMessages = 0.0;
1412       if (mess != 0.0) avgMessLen = messLen / mess;
1413       else avgMessLen = 0.0;
1414       if (messageLength != 0.0) fracLength = messLen / messageLength;
1415       else fracLength = 0.0;
1416       if (numReductions != 0.0) fracReductions = red / numReductions;
1417       else fracReductions = 0.0;
1418       PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%%  %6.4e %5.1f%%  %5.3e %5.1f%%  %5.3e      %5.1f%%  %5.3e %5.1f%%\n", stage, stage_name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions));
1419     }
1420   }
1421 
1422   PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n"));
1423   PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n"));
1424   PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n"));
1425   PetscCall(PetscFPrintf(comm, fd, "   Count: number of times phase was executed\n"));
1426   PetscCall(PetscFPrintf(comm, fd, "   Time and Flop: Max - maximum over all processors\n"));
1427   PetscCall(PetscFPrintf(comm, fd, "                  Ratio - ratio of maximum to minimum over all processors\n"));
1428   PetscCall(PetscFPrintf(comm, fd, "   Mess: number of messages sent\n"));
1429   PetscCall(PetscFPrintf(comm, fd, "   AvgLen: average message length (bytes)\n"));
1430   PetscCall(PetscFPrintf(comm, fd, "   Reduct: number of global reductions\n"));
1431   PetscCall(PetscFPrintf(comm, fd, "   Global: entire computation\n"));
1432   PetscCall(PetscFPrintf(comm, fd, "   Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n"));
1433   PetscCall(PetscFPrintf(comm, fd, "      %%T - percent time in this phase         %%F - percent flop in this phase\n"));
1434   PetscCall(PetscFPrintf(comm, fd, "      %%M - percent messages in this phase     %%L - percent message lengths in this phase\n"));
1435   PetscCall(PetscFPrintf(comm, fd, "      %%R - percent reductions in this phase\n"));
1436   PetscCall(PetscFPrintf(comm, fd, "   Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n"));
1437   if (PetscLogMemory) {
1438     PetscCall(PetscFPrintf(comm, fd, "   Memory usage is summed over all MPI processes, it is given in mega-bytes\n"));
1439     PetscCall(PetscFPrintf(comm, fd, "   Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n"));
1440     PetscCall(PetscFPrintf(comm, fd, "   EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n"));
1441     PetscCall(PetscFPrintf(comm, fd, "   MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n"));
1442     PetscCall(PetscFPrintf(comm, fd, "   RMI Mbytes: Increase in resident memory (sum over all calls to event)\n"));
1443   }
1444 #if defined(PETSC_HAVE_DEVICE)
1445   PetscCall(PetscFPrintf(comm, fd, "   GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n"));
1446   PetscCall(PetscFPrintf(comm, fd, "   CpuToGpu Count: total number of CPU to GPU copies per processor\n"));
1447   PetscCall(PetscFPrintf(comm, fd, "   CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n"));
1448   PetscCall(PetscFPrintf(comm, fd, "   GpuToCpu Count: total number of GPU to CPU copies per processor\n"));
1449   PetscCall(PetscFPrintf(comm, fd, "   GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n"));
1450   PetscCall(PetscFPrintf(comm, fd, "   GPU %%F: percent flops on GPU in this event\n"));
1451 #endif
1452   PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n"));
1453 
1454   PetscCall(PetscLogViewWarnDebugging(comm, fd));
1455 
1456   /* Report events */
1457   PetscCall(PetscFPrintf(comm, fd, "Event                Count      Time (sec)     Flop                              --- Global ---  --- Stage ----  Total"));
1458   if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "  Malloc EMalloc MMalloc RMI"));
1459 #if defined(PETSC_HAVE_DEVICE)
1460   PetscCall(PetscFPrintf(comm, fd, "   GPU    - CpuToGpu -   - GpuToCpu - GPU"));
1461 #endif
1462   PetscCall(PetscFPrintf(comm, fd, "\n"));
1463   PetscCall(PetscFPrintf(comm, fd, "                   Max Ratio  Max     Ratio   Max  Ratio  Mess   AvgLen  Reduct  %%T %%F %%M %%L %%R  %%T %%F %%M %%L %%R Mflop/s"));
1464   if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes"));
1465 #if defined(PETSC_HAVE_DEVICE)
1466   PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count   Size   Count   Size  %%F"));
1467 #endif
1468   PetscCall(PetscFPrintf(comm, fd, "\n"));
1469   PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------"));
1470   if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------"));
1471 #if defined(PETSC_HAVE_DEVICE)
1472   PetscCall(PetscFPrintf(comm, fd, "---------------------------------------"));
1473 #endif
1474   PetscCall(PetscFPrintf(comm, fd, "\n"));
1475 
1476 #if defined(PETSC_HAVE_DEVICE)
1477   /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */
1478   PetscCall(PetscLogStateGetEventFromName(state, "TAOSolve", &TAO_Solve));
1479   PetscCall(PetscLogStateGetEventFromName(state, "TSStep", &TS_Step));
1480   PetscCall(PetscLogStateGetEventFromName(state, "SNESSolve", &SNES_Solve));
1481   PetscCall(PetscLogStateGetEventFromName(state, "KSPSolve", &KSP_Solve));
1482 #endif
1483 
1484   for (stage = 0; stage < numStages; stage++) {
1485     PetscInt            stage_id;
1486     PetscEventPerfInfo *stage_info;
1487     const char         *stage_name;
1488 
1489     if (!(stageVisible[stage] && stageUsed[stage])) continue;
1490     PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1491     PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1492     PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stage_name));
1493     stage_info = &zero_info;
1494     if (localStageUsed[stage]) {
1495       PetscStagePerf *stage_perf_info;
1496 
1497       PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_perf_info));
1498       stage_info = &stage_perf_info->perfInfo;
1499     }
1500     PetscCall(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1501     PetscCall(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1502     PetscCall(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1503     PetscCall(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1504     PetscCall(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1505     mess *= 0.5;
1506     messLen *= 0.5;
1507     red /= size;
1508 
1509     for (event = 0; event < numEvents; event++) {
1510       PetscInt            event_id;
1511       PetscEventPerfInfo *event_info = &zero_info;
1512       PetscBool           is_zero    = PETSC_FALSE;
1513       const char         *event_name;
1514 
1515       PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id));
1516       PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name));
1517       if (event_id >= 0 && stage_id >= 0) { PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &event_info)); }
1518       PetscCall(PetscMemcmp(event_info, &zero_info, sizeof(zero_info), &is_zero));
1519       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm));
1520       if (!is_zero) {
1521         flopr = event_info->flops;
1522         PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1523         PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1524         PetscCall(MPIU_Allreduce(&event_info->flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1525         PetscCall(MPIU_Allreduce(&event_info->time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1526         PetscCall(MPIU_Allreduce(&event_info->time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1527         PetscCall(MPIU_Allreduce(&event_info->time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1528         PetscCall(MPIU_Allreduce(&event_info->numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1529         PetscCall(MPIU_Allreduce(&event_info->messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1530         PetscCall(MPIU_Allreduce(&event_info->numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1531         PetscCall(MPIU_Allreduce(&event_info->count, &minC, 1, MPI_INT, MPI_MIN, comm));
1532         PetscCall(MPIU_Allreduce(&event_info->count, &maxC, 1, MPI_INT, MPI_MAX, comm));
1533         if (PetscLogMemory) {
1534           PetscCall(MPIU_Allreduce(&event_info->memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1535           PetscCall(MPIU_Allreduce(&event_info->mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1536           PetscCall(MPIU_Allreduce(&event_info->mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1537           PetscCall(MPIU_Allreduce(&event_info->mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1538         }
1539 #if defined(PETSC_HAVE_DEVICE)
1540         PetscCall(MPIU_Allreduce(&event_info->CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1541         PetscCall(MPIU_Allreduce(&event_info->GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1542         PetscCall(MPIU_Allreduce(&event_info->CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1543         PetscCall(MPIU_Allreduce(&event_info->GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1544         PetscCall(MPIU_Allreduce(&event_info->GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1545         PetscCall(MPIU_Allreduce(&event_info->GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1546 #endif
1547         if (mint < 0.0) {
1548           PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, event_name));
1549           mint = 0;
1550         }
1551         PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, event_name);
1552 #if defined(PETSC_HAVE_DEVICE)
1553         /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */
1554         if (!PetscLogGpuTimeFlag && petsc_gflops > 0) {
1555           memcpy(&gmaxt, &nas, sizeof(PetscLogDouble));
1556           if (event_id != SNES_Solve && event_id != KSP_Solve && event_id != TS_Step && event_id != TAO_Solve) {
1557             memcpy(&mint, &nas, sizeof(PetscLogDouble));
1558             memcpy(&maxt, &nas, sizeof(PetscLogDouble));
1559           }
1560         }
1561 #endif
1562         totm *= 0.5;
1563         totml *= 0.5;
1564         totr /= size;
1565 
1566         if (maxC != 0) {
1567           if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC;
1568           else ratC = 0.0;
1569           if (mint != 0.0) ratt = maxt / mint;
1570           else ratt = 0.0;
1571           if (minf != 0.0) ratf = maxf / minf;
1572           else ratf = 0.0;
1573           if (TotalTime != 0.0) fracTime = tott / TotalTime;
1574           else fracTime = 0.0;
1575           if (TotalFlops != 0.0) fracFlops = totf / TotalFlops;
1576           else fracFlops = 0.0;
1577           if (stageTime != 0.0) fracStageTime = tott / stageTime;
1578           else fracStageTime = 0.0;
1579           if (flops != 0.0) fracStageFlops = totf / flops;
1580           else fracStageFlops = 0.0;
1581           if (numMessages != 0.0) fracMess = totm / numMessages;
1582           else fracMess = 0.0;
1583           if (messageLength != 0.0) fracMessLen = totml / messageLength;
1584           else fracMessLen = 0.0;
1585           if (numReductions != 0.0) fracRed = totr / numReductions;
1586           else fracRed = 0.0;
1587           if (mess != 0.0) fracStageMess = totm / mess;
1588           else fracStageMess = 0.0;
1589           if (messLen != 0.0) fracStageMessLen = totml / messLen;
1590           else fracStageMessLen = 0.0;
1591           if (red != 0.0) fracStageRed = totr / red;
1592           else fracStageRed = 0.0;
1593           if (totm != 0.0) totml /= totm;
1594           else totml = 0.0;
1595           if (maxt != 0.0) flopr = totf / maxt;
1596           else flopr = 0.0;
1597           if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0)
1598             PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6));
1599           else
1600             PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6));
1601           if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f   %5.0f   %5.0f   %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6));
1602 #if defined(PETSC_HAVE_DEVICE)
1603           if (totf != 0.0) fracgflops = gflops / totf;
1604           else fracgflops = 0.0;
1605           if (gmaxt != 0.0) gflopr = gflops / gmaxt;
1606           else gflopr = 0.0;
1607           PetscCall(PetscFPrintf(comm, fd, "   %5.0f   %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops));
1608 #endif
1609           PetscCall(PetscFPrintf(comm, fd, "\n"));
1610         }
1611       }
1612     }
1613   }
1614 
1615   /* Memory usage and object creation */
1616   PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------"));
1617   if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------"));
1618 #if defined(PETSC_HAVE_DEVICE)
1619   PetscCall(PetscFPrintf(comm, fd, "---------------------------------------"));
1620 #endif
1621   PetscCall(PetscFPrintf(comm, fd, "\n"));
1622   PetscCall(PetscFPrintf(comm, fd, "\n"));
1623 
1624   /* Right now, only stages on the first processor are reported here, meaning only objects associated with
1625      the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
1626      stats for stages local to processor sets.
1627   */
1628   /* We should figure out the longest object name here (now 20 characters) */
1629   PetscCall(PetscFPrintf(comm, fd, "Object Type          Creations   Destructions. Reports information only for process 0.\n"));
1630   for (stage = 0; stage < numStages; stage++) {
1631     const char *stage_name;
1632 
1633     PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1634     PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stage_name));
1635     if (localStageUsed[stage]) {
1636       PetscInt num_classes;
1637 
1638       PetscCall(PetscLogStateGetNumClasses(state, &num_classes));
1639       for (oclass = 0; oclass < num_classes; oclass++) {
1640         PetscClassPerf *class_perf_info;
1641 
1642         PetscCall(PetscLogHandlerDefaultGetClassPerf(handler, stage, oclass, &class_perf_info));
1643         if ((class_perf_info->creations > 0) || (class_perf_info->destructions > 0)) {
1644           PetscLogClassInfo class_reg_info;
1645 
1646           PetscCall(PetscLogStateClassGetInfo(state, oclass, &class_reg_info));
1647           PetscCall(PetscFPrintf(comm, fd, "%20s %5d          %5d\n", class_reg_info.name, class_perf_info->creations, class_perf_info->destructions));
1648         }
1649       }
1650     }
1651   }
1652 
1653   PetscCall(PetscFree(localStageUsed));
1654   PetscCall(PetscFree(stageUsed));
1655   PetscCall(PetscFree(localStageVisible));
1656   PetscCall(PetscFree(stageVisible));
1657   PetscCall(PetscLogGlobalNamesDestroy(&global_stages));
1658   PetscCall(PetscLogGlobalNamesDestroy(&global_events));
1659 
1660   /* Information unrelated to this particular run */
1661   PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n"));
1662   PetscCall(PetscTime(&y));
1663   PetscCall(PetscTime(&x));
1664   PetscCall(PetscTime(&y));
1665   PetscCall(PetscTime(&y));
1666   PetscCall(PetscTime(&y));
1667   PetscCall(PetscTime(&y));
1668   PetscCall(PetscTime(&y));
1669   PetscCall(PetscTime(&y));
1670   PetscCall(PetscTime(&y));
1671   PetscCall(PetscTime(&y));
1672   PetscCall(PetscTime(&y));
1673   PetscCall(PetscTime(&y));
1674   PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0));
1675   /* MPI information */
1676   if (size > 1) {
1677     MPI_Status  status;
1678     PetscMPIInt tag;
1679     MPI_Comm    newcomm;
1680 
1681     PetscCallMPI(MPI_Barrier(comm));
1682     PetscCall(PetscTime(&x));
1683     PetscCallMPI(MPI_Barrier(comm));
1684     PetscCallMPI(MPI_Barrier(comm));
1685     PetscCallMPI(MPI_Barrier(comm));
1686     PetscCallMPI(MPI_Barrier(comm));
1687     PetscCallMPI(MPI_Barrier(comm));
1688     PetscCall(PetscTime(&y));
1689     PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0));
1690     PetscCall(PetscCommDuplicate(comm, &newcomm, &tag));
1691     PetscCallMPI(MPI_Barrier(comm));
1692     if (rank) {
1693       PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status));
1694       PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm));
1695     } else {
1696       PetscCall(PetscTime(&x));
1697       PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm));
1698       PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status));
1699       PetscCall(PetscTime(&y));
1700       PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size));
1701     }
1702     PetscCall(PetscCommDestroy(&newcomm));
1703   }
1704   PetscCall(PetscOptionsView(NULL, viewer));
1705 
1706   /* Machine and compile information */
1707   if (PetscDefined(USE_FORTRAN_KERNELS)) {
1708     PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n"));
1709   } else {
1710     PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n"));
1711   }
1712   if (PetscDefined(USE_64BIT_INDICES)) {
1713     PetscCall(PetscFPrintf(comm, fd, "Compiled with 64-bit PetscInt\n"));
1714   } else if (PetscDefined(USE___FLOAT128)) {
1715     PetscCall(PetscFPrintf(comm, fd, "Compiled with 32-bit PetscInt\n"));
1716   }
1717   if (PetscDefined(USE_REAL_SINGLE)) {
1718     PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n"));
1719   } else if (PetscDefined(USE___FLOAT128)) {
1720     PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n"));
1721   }
1722   if (PetscDefined(USE_REAL_MAT_SINGLE)) {
1723     PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n"));
1724   } else {
1725     PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n"));
1726   }
1727   PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt)));
1728 
1729   PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions));
1730   PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo));
1731   PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo));
1732   PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo));
1733   PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo));
1734 
1735   /* Cleanup */
1736   PetscCall(PetscFPrintf(comm, fd, "\n"));
1737   PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd));
1738   PetscCall(PetscLogViewWarnDebugging(comm, fd));
1739   PetscCall(PetscFPTrapPop());
1740   PetscFunctionReturn(PETSC_SUCCESS);
1741 }
1742 
1743 static PetscErrorCode PetscLogHandlerView_Default(PetscLogHandler handler, PetscViewer viewer)
1744 {
1745   PetscViewerFormat format;
1746   PetscFunctionBegin;
1747   PetscCall(PetscViewerGetFormat(viewer, &format));
1748   if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) {
1749     PetscCall(PetscLogHandlerView_Default_Info(handler, viewer));
1750   } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1751     PetscCall(PetscLogHandlerView_Default_Detailed(handler, viewer));
1752   } else if (format == PETSC_VIEWER_ASCII_CSV) {
1753     PetscCall(PetscLogHandlerView_Default_CSV(handler, viewer));
1754   }
1755   PetscFunctionReturn(PETSC_SUCCESS);
1756 }
1757 
1758 /*MC
1759   PETSC_LOG_HANDLER_DEFAULT - PETSC_LOG_DEFAULT = "default" -  A `PetscLogHandler` that collects data for PETSc
1760   default profiling log viewers (`PetscLogView()` and `PetscLogDump()`).
1761 
1762   Options Database Keys:
1763 + -log_include_actions - include a growing list of actions (event beginnings and endings, object creations and destructions) in `PetscLogDump()` (`PetscLogActions()`).
1764 - -log_include_objects - include a growing list of object creations and destructions in `PetscLogDump()` (`PetscLogObjects()`).
1765 
1766   Level: developer
1767 
1768 .seealso: [](ch_profiling), `PetscLogHandler`
1769 M*/
1770 
1771 PETSC_INTERN PetscErrorCode PetscLogHandlerCreate_Default(PetscLogHandler handler)
1772 {
1773   PetscFunctionBegin;
1774   PetscCall(PetscLogHandlerContextCreate_Default((PetscLogHandler_Default *)&handler->data));
1775   handler->ops->destroy       = PetscLogHandlerDestroy_Default;
1776   handler->ops->eventbegin    = PetscLogHandlerEventBegin_Default;
1777   handler->ops->eventend      = PetscLogHandlerEventEnd_Default;
1778   handler->ops->eventsync     = PetscLogHandlerEventSync_Default;
1779   handler->ops->objectcreate  = PetscLogHandlerObjectCreate_Default;
1780   handler->ops->objectdestroy = PetscLogHandlerObjectDestroy_Default;
1781   handler->ops->stagepush     = PetscLogHandlerStagePush_Default;
1782   handler->ops->stagepop      = PetscLogHandlerStagePop_Default;
1783   handler->ops->view          = PetscLogHandlerView_Default;
1784   PetscFunctionReturn(PETSC_SUCCESS);
1785 }
1786