xref: /petsc/src/sys/logging/handler/impls/default/logdefault.c (revision 6467efc9ef4d99487050cf3d5fe3db991cbd2979)
1 #include <petscviewer.h>
2 #include <petscdevice.h>
3 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/
4 #include <petsc/private/loghandlerimpl.h>
5 #include <petsc/private/deviceimpl.h>
6 #include <petscconfiginfo.h>
7 #include <petscmachineinfo.h>
8 #include "logdefault.h"
9 
10 #if !PetscDefined(USE_LOG)
11 // zeros for global counters
12 static PetscLogDouble petsc_TotalFlops   = 0.0;
13 static PetscLogDouble petsc_irecv_ct     = 0.0;
14 static PetscLogDouble petsc_isend_ct     = 0.0;
15 static PetscLogDouble petsc_recv_ct      = 0.0;
16 static PetscLogDouble petsc_send_ct      = 0.0;
17 static PetscLogDouble petsc_irecv_len    = 0.0;
18 static PetscLogDouble petsc_isend_len    = 0.0;
19 static PetscLogDouble petsc_recv_len     = 0.0;
20 static PetscLogDouble petsc_send_len     = 0.0;
21 static PetscLogDouble petsc_allreduce_ct = 0.0;
22 static PetscLogDouble petsc_gather_ct    = 0.0;
23 static PetscLogDouble petsc_scatter_ct   = 0.0;
24 
25 /* Thread local storage */
26 static PetscLogDouble petsc_TotalFlops_th   = 0.0;
27 static PetscLogDouble petsc_irecv_ct_th     = 0.0;
28 static PetscLogDouble petsc_isend_ct_th     = 0.0;
29 static PetscLogDouble petsc_recv_ct_th      = 0.0;
30 static PetscLogDouble petsc_send_ct_th      = 0.0;
31 static PetscLogDouble petsc_irecv_len_th    = 0.0;
32 static PetscLogDouble petsc_isend_len_th    = 0.0;
33 static PetscLogDouble petsc_recv_len_th     = 0.0;
34 static PetscLogDouble petsc_send_len_th     = 0.0;
35 static PetscLogDouble petsc_allreduce_ct_th = 0.0;
36 static PetscLogDouble petsc_gather_ct_th    = 0.0;
37 static PetscLogDouble petsc_scatter_ct_th   = 0.0;
38 
39   #if PetscDefined(HAVE_DEVICE)
40 /* Global GPU counters */
41 static PetscLogDouble petsc_gflops = 0.0;
42 static PetscLogDouble petsc_gtime  = 0.0;
43 
44 /* Thread local storage */
45 static PetscLogDouble petsc_ctog_ct_th = 0.0;
46 static PetscLogDouble petsc_gtoc_ct_th = 0.0;
47 static PetscLogDouble petsc_ctog_sz_th = 0.0;
48 static PetscLogDouble petsc_gtoc_sz_th = 0.0;
49 static PetscLogDouble petsc_gflops_th  = 0.0;
50   #endif
51 #endif
52 
53 static PetscErrorCode PetscEventPerfInfoInit(PetscEventPerfInfo *eventInfo)
54 {
55   PetscFunctionBegin;
56   PetscCall(PetscMemzero(eventInfo, sizeof(*eventInfo)));
57   eventInfo->visible   = PETSC_TRUE;
58   eventInfo->id        = -1;
59   eventInfo->dof[0]    = -1.0;
60   eventInfo->dof[1]    = -1.0;
61   eventInfo->dof[2]    = -1.0;
62   eventInfo->dof[3]    = -1.0;
63   eventInfo->dof[4]    = -1.0;
64   eventInfo->dof[5]    = -1.0;
65   eventInfo->dof[6]    = -1.0;
66   eventInfo->dof[7]    = -1.0;
67   eventInfo->errors[0] = -1.0;
68   eventInfo->errors[1] = -1.0;
69   eventInfo->errors[2] = -1.0;
70   eventInfo->errors[3] = -1.0;
71   eventInfo->errors[4] = -1.0;
72   eventInfo->errors[5] = -1.0;
73   eventInfo->errors[6] = -1.0;
74   eventInfo->errors[7] = -1.0;
75   PetscFunctionReturn(PETSC_SUCCESS);
76 }
77 
78 static PetscErrorCode PetscEventPerfInfoTic_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool resume)
79 {
80   PetscFunctionBegin;
81   if (resume) {
82     eventInfo->timeTmp -= time;
83     eventInfo->flopsTmp -= petsc_TotalFlops_th;
84   } else {
85     eventInfo->timeTmp  = -time;
86     eventInfo->flopsTmp = -petsc_TotalFlops_th;
87   }
88   eventInfo->numMessages -= petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th;
89   eventInfo->messageLength -= petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len_th + petsc_send_len_th;
90   eventInfo->numReductions -= petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th;
91 #if defined(PETSC_HAVE_DEVICE)
92   eventInfo->CpuToGpuCount -= petsc_ctog_ct_th;
93   eventInfo->GpuToCpuCount -= petsc_gtoc_ct_th;
94   eventInfo->CpuToGpuSize -= petsc_ctog_sz_th;
95   eventInfo->GpuToCpuSize -= petsc_gtoc_sz_th;
96   eventInfo->GpuFlops -= petsc_gflops_th;
97   eventInfo->GpuTime -= petsc_gtime;
98 #endif
99   if (logMemory) {
100     PetscLogDouble usage;
101     PetscCall(PetscMemoryGetCurrentUsage(&usage));
102     eventInfo->memIncrease -= usage;
103     PetscCall(PetscMallocGetCurrentUsage(&usage));
104     eventInfo->mallocSpace -= usage;
105     PetscCall(PetscMallocGetMaximumUsage(&usage));
106     eventInfo->mallocIncrease -= usage;
107     PetscCall(PetscMallocPushMaximumUsage(event));
108   }
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 static PetscErrorCode PetscEventPerfInfoTic(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event)
113 {
114   PetscFunctionBegin;
115   PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_FALSE));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 static PetscErrorCode PetscEventPerfInfoResume(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event)
120 {
121   PetscFunctionBegin;
122   PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_TRUE));
123   PetscFunctionReturn(PETSC_SUCCESS);
124 }
125 
126 static PetscErrorCode PetscEventPerfInfoToc_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool pause)
127 {
128   PetscFunctionBegin;
129   eventInfo->timeTmp += time;
130   eventInfo->flopsTmp += petsc_TotalFlops_th;
131   if (!pause) {
132     eventInfo->time += eventInfo->timeTmp;
133     eventInfo->time2 += eventInfo->timeTmp * eventInfo->timeTmp;
134     eventInfo->flops += eventInfo->flopsTmp;
135     eventInfo->flops2 += eventInfo->flopsTmp * eventInfo->flopsTmp;
136   }
137   eventInfo->numMessages += petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th;
138   eventInfo->messageLength += petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len + petsc_send_len_th;
139   eventInfo->numReductions += petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th;
140 #if defined(PETSC_HAVE_DEVICE)
141   eventInfo->CpuToGpuCount += petsc_ctog_ct_th;
142   eventInfo->GpuToCpuCount += petsc_gtoc_ct_th;
143   eventInfo->CpuToGpuSize += petsc_ctog_sz_th;
144   eventInfo->GpuToCpuSize += petsc_gtoc_sz_th;
145   eventInfo->GpuFlops += petsc_gflops_th;
146   eventInfo->GpuTime += petsc_gtime;
147 #endif
148   if (logMemory) {
149     PetscLogDouble usage, musage;
150     PetscCall(PetscMemoryGetCurrentUsage(&usage)); /* the comments below match the column labels printed in PetscLogView_Default() */
151     eventInfo->memIncrease += usage;               /* RMI */
152     PetscCall(PetscMallocGetCurrentUsage(&usage));
153     eventInfo->mallocSpace += usage; /* Malloc */
154     PetscCall(PetscMallocPopMaximumUsage((int)event, &musage));
155     eventInfo->mallocIncreaseEvent = PetscMax(musage - usage, eventInfo->mallocIncreaseEvent); /* EMalloc */
156     PetscCall(PetscMallocGetMaximumUsage(&usage));
157     eventInfo->mallocIncrease += usage; /* MMalloc */
158   }
159   PetscFunctionReturn(PETSC_SUCCESS);
160 }
161 
162 static PetscErrorCode PetscEventPerfInfoToc(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event)
163 {
164   PetscFunctionBegin;
165   PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode PetscEventPerfInfoPause(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event)
170 {
171   PetscFunctionBegin;
172   PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE));
173   PetscFunctionReturn(PETSC_SUCCESS);
174 }
175 
176 static PetscErrorCode PetscEventPerfInfoAdd_Internal(const PetscEventPerfInfo *eventInfo, PetscEventPerfInfo *outInfo)
177 {
178   PetscFunctionBegin;
179   outInfo->count += eventInfo->count;
180   outInfo->time += eventInfo->time;
181   outInfo->time2 += eventInfo->time2;
182   outInfo->flops += eventInfo->flops;
183   outInfo->flops2 += eventInfo->flops2;
184   outInfo->numMessages += eventInfo->numMessages;
185   outInfo->messageLength += eventInfo->messageLength;
186   outInfo->numReductions += eventInfo->numReductions;
187 #if defined(PETSC_HAVE_DEVICE)
188   outInfo->CpuToGpuCount += eventInfo->CpuToGpuCount;
189   outInfo->GpuToCpuCount += eventInfo->GpuToCpuCount;
190   outInfo->CpuToGpuSize += eventInfo->CpuToGpuSize;
191   outInfo->GpuToCpuSize += eventInfo->GpuToCpuSize;
192   outInfo->GpuFlops += eventInfo->GpuFlops;
193   outInfo->GpuTime += eventInfo->GpuTime;
194 #endif
195   outInfo->memIncrease += eventInfo->memIncrease;
196   outInfo->mallocSpace += eventInfo->mallocSpace;
197   outInfo->mallocIncreaseEvent += eventInfo->mallocIncreaseEvent;
198   outInfo->mallocIncrease += eventInfo->mallocIncrease;
199   PetscFunctionReturn(PETSC_SUCCESS);
200 }
201 
202 PETSC_LOG_RESIZABLE_ARRAY(EventPerfArray, PetscEventPerfInfo, PetscLogEvent, PetscEventPerfInfoInit, NULL, NULL)
203 
204 /* --- PetscClassPerf --- */
205 
206 typedef struct {
207   int            creations;    /* The number of objects of this class created */
208   int            destructions; /* The number of objects of this class destroyed */
209   PetscLogDouble mem;          /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */
210   PetscLogDouble descMem;      /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */
211 } PetscClassPerf;
212 
213 static PetscErrorCode PetscClassPerfInit(PetscClassPerf *classInfo)
214 {
215   PetscFunctionBegin;
216   PetscCall(PetscMemzero(classInfo, sizeof(*classInfo)));
217   PetscFunctionReturn(PETSC_SUCCESS);
218 }
219 
220 PETSC_LOG_RESIZABLE_ARRAY(ClassPerfArray, PetscClassPerf, PetscLogClass, PetscClassPerfInit, NULL, NULL)
221 
222 /* --- PetscStagePerf --- */
223 
224 typedef struct _PetscStagePerf {
225   PetscBool              used;     /* The stage was pushed on this processor */
226   PetscEventPerfInfo     perfInfo; /* The stage performance information */
227   PetscLogEventPerfArray eventLog; /* The event information for this stage */
228   PetscLogClassPerfArray classLog; /* The class information for this stage */
229 } PetscStagePerf;
230 
231 static PetscErrorCode PetscStageInfoInit(PetscStagePerf *stageInfo)
232 {
233   PetscFunctionBegin;
234   PetscCall(PetscMemzero(stageInfo, sizeof(*stageInfo)));
235   PetscCall(PetscLogEventPerfArrayCreate(128, &(stageInfo->eventLog)));
236   PetscCall(PetscLogClassPerfArrayCreate(128, &(stageInfo->classLog)));
237   PetscCall(PetscEventPerfInfoInit(&stageInfo->perfInfo));
238   PetscFunctionReturn(PETSC_SUCCESS);
239 }
240 
241 static PetscErrorCode PetscStageInfoReset(PetscStagePerf *stageInfo)
242 {
243   PetscFunctionBegin;
244   PetscCall(PetscLogEventPerfArrayDestroy(&(stageInfo->eventLog)));
245   PetscCall(PetscLogClassPerfArrayDestroy(&(stageInfo->classLog)));
246   PetscFunctionReturn(PETSC_SUCCESS);
247 }
248 
249 PETSC_LOG_RESIZABLE_ARRAY(StageInfoArray, PetscStagePerf, PetscLogStage, PetscStageInfoInit, PetscStageInfoReset, NULL)
250 
251 /* --- Action --- */
252 
253 /* The structure for action logging */
254 typedef enum {
255   PETSC_LOG_ACTION_CREATE,
256   PETSC_LOG_ACTION_DESTROY,
257   PETSC_LOG_ACTION_BEGIN,
258   PETSC_LOG_ACTION_END,
259 } PetscLogActionType;
260 
261 PETSC_LOG_RESIZABLE_ARRAY(ActionArray, Action, PetscLogEvent, NULL, NULL, NULL)
262 
263 /* --- Object --- */
264 
265 PETSC_LOG_RESIZABLE_ARRAY(ObjectArray, Object, PetscObject, NULL, NULL, NULL)
266 
267 /* Map from (threadid,stage,event) to perfInfo data struct */
268 #include <petsc/private/hashmapijk.h>
269 
270 #if !PetscDefined(HAVE_THREADSAFETY)
271 // PetscHMapEvent is only declared in logimpl.h if PetscDefined(HAVE_THREADSAFETY): define it otherwise so that we
272 // can just always have it in the struct
273 PETSC_HASH_MAP(HMapEvent, PetscHashIJKKey, PetscEventPerfInfo *, PetscHashIJKKeyHash, PetscHashIJKKeyEqual, NULL)
274 #endif
275 
276 typedef struct _n_PetscLogHandler_Default *PetscLogHandler_Default;
277 struct _n_PetscLogHandler_Default {
278   PetscLogStageInfoArray stages;
279   PetscSpinlock          lock;
280   PetscLogActionArray    petsc_actions;
281   PetscLogObjectArray    petsc_objects;
282   PetscBool              petsc_logActions;
283   PetscBool              petsc_logObjects;
284   int                    petsc_numObjectsCreated;
285   int                    petsc_numObjectsDestroyed;
286   PetscHMapEvent         eventInfoMap_th;
287   int                    pause_depth;
288 };
289 
290 /* --- PetscLogHandler_Default --- */
291 
292 static PetscErrorCode PetscLogHandlerContextCreate_Default(PetscLogHandler_Default *def_p)
293 {
294   PetscLogHandler_Default def;
295 
296   PetscFunctionBegin;
297   PetscCall(PetscNew(def_p));
298   def = *def_p;
299   PetscCall(PetscLogStageInfoArrayCreate(8, &(def->stages)));
300   PetscCall(PetscLogActionArrayCreate(64, &(def->petsc_actions)));
301   PetscCall(PetscLogObjectArrayCreate(64, &(def->petsc_objects)));
302 
303   PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_actions", &def->petsc_logActions, NULL));
304   PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_objects", &def->petsc_logObjects, NULL));
305   if (PetscDefined(HAVE_THREADSAFETY)) { PetscCall(PetscHMapEventCreate(&def->eventInfoMap_th)); }
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static PetscErrorCode PetscLogHandlerDestroy_Default(PetscLogHandler h)
310 {
311   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
312 
313   PetscFunctionBegin;
314   PetscCall(PetscLogStageInfoArrayDestroy(&def->stages));
315   PetscCall(PetscLogActionArrayDestroy(&def->petsc_actions));
316   PetscCall(PetscLogObjectArrayDestroy(&def->petsc_objects));
317   if (def->eventInfoMap_th) {
318     PetscEventPerfInfo **array;
319     PetscInt             n, off = 0;
320 
321     PetscCall(PetscHMapEventGetSize(def->eventInfoMap_th, &n));
322     PetscCall(PetscMalloc1(n, &array));
323     PetscCall(PetscHMapEventGetVals(def->eventInfoMap_th, &off, array));
324     for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i]));
325     PetscCall(PetscFree(array));
326     PetscCall(PetscHMapEventDestroy(&def->eventInfoMap_th));
327   }
328   PetscCall(PetscFree(def));
329   PetscFunctionReturn(PETSC_SUCCESS);
330 }
331 
332 static PetscErrorCode PetscLogHandlerDefaultGetStageInfo(PetscLogHandler handler, PetscLogStage stage, PetscStagePerf **stage_info_p)
333 {
334   PetscStagePerf         *stage_info = NULL;
335   PetscLogHandler_Default def        = (PetscLogHandler_Default)handler->data;
336 
337   PetscFunctionBegin;
338   PetscCall(PetscLogStageInfoArrayResize(def->stages, stage + 1));
339   PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info));
340   *stage_info_p = stage_info;
341   PetscFunctionReturn(PETSC_SUCCESS);
342 }
343 
344 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultGetEventPerfInfo(PetscLogHandler handler, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **event_info_p)
345 {
346   PetscEventPerfInfo    *event_info = NULL;
347   PetscStagePerf        *stage_info = NULL;
348   PetscLogEventPerfArray event_log;
349 
350   PetscFunctionBegin;
351   if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(handler->state, &stage));
352   PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info));
353   event_log = stage_info->eventLog;
354   PetscCall(PetscLogEventPerfArrayResize(event_log, event + 1));
355   PetscCall(PetscLogEventPerfArrayGetRef(event_log, event, &event_info));
356   event_info->id = event;
357   *event_info_p  = event_info;
358   PetscFunctionReturn(PETSC_SUCCESS);
359 }
360 
361 static PetscErrorCode PetscLogHandlerDefaultGetClassPerf(PetscLogHandler handler, PetscLogStage stage, PetscLogClass clss, PetscClassPerf **class_info)
362 {
363   PetscLogClassPerfArray class_log;
364   PetscStagePerf        *stage_info;
365 
366   PetscFunctionBegin;
367   PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info));
368   class_log = stage_info->classLog;
369   PetscCall(PetscLogClassPerfArrayResize(class_log, clss + 1));
370   PetscCall(PetscLogClassPerfArrayGetRef(class_log, clss, class_info));
371   PetscFunctionReturn(PETSC_SUCCESS);
372 }
373 
374 static PetscErrorCode PetscLogHandlerObjectCreate_Default(PetscLogHandler h, PetscObject obj)
375 {
376   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
377   PetscLogState           state;
378   PetscLogStage           stage;
379   PetscClassPerf         *classInfo;
380   int                     oclass = 0;
381 
382   PetscFunctionBegin;
383   PetscCall(PetscLogHandlerGetState(h, &state));
384   PetscCall(PetscSpinlockLock(&def->lock));
385   /* Record stage info */
386   PetscCall(PetscLogStateGetCurrentStage(state, &stage));
387   PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass));
388   PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo));
389   classInfo->creations++;
390   /* Record the creation action */
391   if (def->petsc_logActions) {
392     Action new_action;
393 
394     PetscCall(PetscTime(&new_action.time));
395     new_action.time -= petsc_BaseTime;
396     new_action.action  = PETSC_LOG_ACTION_CREATE;
397     new_action.event   = -1;
398     new_action.classid = obj->classid;
399     new_action.id1     = obj->id;
400     new_action.id2     = -1;
401     new_action.id3     = -1;
402     new_action.flops   = petsc_TotalFlops;
403     PetscCall(PetscMallocGetCurrentUsage(&new_action.mem));
404     PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem));
405     PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action));
406   }
407   /* We don't just use obj->id to count all objects that are created
408      because PetscLogHandlers are objects and PetscLogObjectDestroy() will not
409      be called for them: the number of objects created and destroyed as counted
410      here and below would have an imbalance */
411   def->petsc_numObjectsCreated++;
412   /* Record the object */
413   if (def->petsc_logObjects) {
414     Object new_object;
415 
416     new_object.parent = -1;
417     new_object.obj    = obj;
418     new_object.mem    = 0;
419 
420     PetscCall(PetscMemzero(new_object.name, sizeof(new_object.name)));
421     PetscCall(PetscMemzero(new_object.info, sizeof(new_object.info)));
422     PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1");
423     PetscCall(PetscLogObjectArrayResize(def->petsc_objects, obj->id));
424     PetscCall(PetscLogObjectArraySet(def->petsc_objects, obj->id - 1, new_object));
425   }
426   PetscCall(PetscSpinlockUnlock(&def->lock));
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
430 static PetscErrorCode PetscLogHandlerObjectDestroy_Default(PetscLogHandler h, PetscObject obj)
431 {
432   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
433   PetscLogState           state;
434   PetscLogStage           stage;
435   PetscClassPerf         *classInfo;
436   int                     oclass = 0;
437 
438   PetscFunctionBegin;
439   PetscCall(PetscLogHandlerGetState(h, &state));
440   /* Record stage info */
441   PetscCall(PetscSpinlockLock(&def->lock));
442   PetscCall(PetscLogStateGetCurrentStage(state, &stage));
443   if (stage >= 0) {
444     /* stage < 0 can happen if the log summary is output before some things are destroyed */
445     PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass));
446     PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo));
447     classInfo->destructions++;
448   }
449   /* Cannot Credit all ancestors with your memory because they may have already been destroyed*/
450   def->petsc_numObjectsDestroyed++;
451   /* Dynamically enlarge logging structures */
452   /* Record the destruction action */
453   if (def->petsc_logActions) {
454     Action new_action;
455 
456     PetscCall(PetscTime(&new_action.time));
457     new_action.time -= petsc_BaseTime;
458     new_action.event   = -1;
459     new_action.action  = PETSC_LOG_ACTION_DESTROY;
460     new_action.classid = obj->classid;
461     new_action.id1     = obj->id;
462     new_action.id2     = -1;
463     new_action.id3     = -1;
464     new_action.flops   = petsc_TotalFlops;
465     PetscCall(PetscMallocGetCurrentUsage(&new_action.mem));
466     PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem));
467     PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action));
468   }
469   if (def->petsc_logObjects) {
470     Object *obj_entry = NULL;
471 
472     PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1");
473     PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, obj->id - 1, &obj_entry));
474     if (obj->name) PetscCall(PetscStrncpy(obj_entry->name, obj->name, 64));
475     obj_entry->obj = NULL;
476   }
477   PetscCall(PetscSpinlockUnlock(&def->lock));
478   PetscFunctionReturn(PETSC_SUCCESS);
479 }
480 
481 static PetscErrorCode PetscLogHandlerEventSync_Default(PetscLogHandler h, PetscLogEvent event, MPI_Comm comm)
482 {
483   PetscLogState       state;
484   PetscLogEventInfo   event_info;
485   PetscEventPerfInfo *event_perf_info;
486   int                 stage;
487   PetscLogDouble      time = 0.0;
488 
489   PetscFunctionBegin;
490   if (!(PetscLogSyncOn) || comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS);
491   PetscCall(PetscLogHandlerGetState(h, &state));
492   PetscCall(PetscLogStateEventGetInfo(state, event, &event_info));
493   if (!event_info.collective) PetscFunctionReturn(PETSC_SUCCESS);
494   PetscCall(PetscLogStateGetCurrentStage(state, &stage));
495   PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info));
496   if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
497 
498   PetscCall(PetscTimeSubtract(&time));
499   PetscCallMPI(MPI_Barrier(comm));
500   PetscCall(PetscTimeAdd(&time));
501   event_perf_info->syncTime += time;
502   PetscFunctionReturn(PETSC_SUCCESS);
503 }
504 
505 static PetscErrorCode PetscLogGetStageEventPerfInfo_threaded(PetscLogHandler_Default def, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **eventInfo)
506 {
507   PetscEventPerfInfo *leventInfo = NULL;
508   PetscHashIJKKey     key;
509 
510   PetscFunctionBegin;
511 
512 #if PetscDefined(HAVE_THREADSAFETY)
513   key.i = PetscLogGetTid();
514 #else
515   key.i = 0;
516 #endif
517   key.j = stage;
518   key.k = event;
519   PetscCall(PetscSpinlockLock(&def->lock));
520   PetscCall(PetscHMapEventGet(def->eventInfoMap_th, key, &leventInfo));
521   if (!leventInfo) {
522     PetscCall(PetscNew(&leventInfo));
523     leventInfo->id = event;
524     PetscCall(PetscHMapEventSet(def->eventInfoMap_th, key, leventInfo));
525   }
526   PetscCall(PetscSpinlockUnlock(&def->lock));
527   *eventInfo = leventInfo;
528   PetscFunctionReturn(PETSC_SUCCESS);
529 }
530 
531 #if defined(PETSC_HAVE_CUDA)
532   #include <nvToolsExt.h>
533 #endif
534 static PetscErrorCode PetscLogHandlerEventBegin_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
535 {
536   PetscLogHandler_Default def             = (PetscLogHandler_Default)h->data;
537   PetscEventPerfInfo     *event_perf_info = NULL;
538   PetscLogEventInfo       event_info;
539   PetscLogDouble          time;
540   PetscLogState           state;
541   PetscLogStage           stage;
542 
543   PetscFunctionBegin;
544   PetscCall(PetscLogHandlerGetState(h, &state));
545   /* Synchronization */
546   PetscCall(PetscLogHandlerEventSync_Default(h, event, PetscObjectComm(o1)));
547   PetscCall(PetscLogStateGetCurrentStage(state, &stage));
548   if (def->pause_depth > 0) stage = 0; // in pause-mode, all events run on the main stage
549   if (PetscDefined(HAVE_THREADSAFETY)) {
550     PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info));
551     if (event_perf_info->depth == 0) { PetscCall(PetscEventPerfInfoInit(event_perf_info)); }
552   } else {
553     PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info));
554   }
555   PetscCheck(event_perf_info->depth >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to begin a paused event, this is not allowed");
556   event_perf_info->depth++;
557   /* Check for double counting */
558   if (event_perf_info->depth > 1) PetscFunctionReturn(PETSC_SUCCESS);
559 #if defined(PETSC_HAVE_CUDA)
560   if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) {
561     PetscLogEventInfo event_reg_info;
562 
563     PetscCall(PetscLogStateEventGetInfo(state, event, &event_reg_info));
564     nvtxRangePushA(event_reg_info.name);
565   }
566 #endif
567   PetscCall(PetscLogStateEventGetInfo(state, event, &event_info));
568   /* Log the performance info */
569   event_perf_info->count++;
570   PetscCall(PetscTime(&time));
571   PetscCall(PetscEventPerfInfoTic(event_perf_info, time, PetscLogMemory, (int)event));
572   if (def->petsc_logActions) {
573     PetscLogDouble curTime;
574     Action         new_action;
575 
576     PetscCall(PetscTime(&curTime));
577     new_action.time    = curTime - petsc_BaseTime;
578     new_action.action  = PETSC_LOG_ACTION_BEGIN;
579     new_action.event   = event;
580     new_action.classid = event_info.classid;
581     new_action.id1     = o1 ? o1->id : -1;
582     new_action.id2     = o2 ? o2->id : -1;
583     new_action.id3     = o3 ? o3->id : -1;
584     new_action.flops   = petsc_TotalFlops;
585     PetscCall(PetscMallocGetCurrentUsage(&new_action.mem));
586     PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem));
587     PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action));
588   }
589   PetscFunctionReturn(PETSC_SUCCESS);
590 }
591 
592 static PetscErrorCode PetscLogHandlerEventEnd_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
593 {
594   PetscLogHandler_Default def             = (PetscLogHandler_Default)h->data;
595   PetscEventPerfInfo     *event_perf_info = NULL;
596   PetscLogDouble          time;
597   PetscLogState           state;
598   int                     stage;
599 
600   PetscFunctionBegin;
601   PetscCall(PetscLogHandlerGetState(h, &state));
602   if (def->petsc_logActions) {
603     PetscLogEventInfo event_info;
604     PetscLogDouble    curTime;
605     Action            new_action;
606 
607     PetscCall(PetscLogStateEventGetInfo(state, event, &event_info));
608     PetscCall(PetscTime(&curTime));
609     new_action.time    = curTime - petsc_BaseTime;
610     new_action.action  = PETSC_LOG_ACTION_END;
611     new_action.event   = event;
612     new_action.classid = event_info.classid;
613     new_action.id1     = o1 ? o1->id : -1;
614     new_action.id2     = o2 ? o2->id : -2;
615     new_action.id3     = o3 ? o3->id : -3;
616     new_action.flops   = petsc_TotalFlops;
617     PetscCall(PetscMallocGetCurrentUsage(&new_action.mem));
618     PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem));
619     PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action));
620   }
621   PetscCall(PetscLogStateGetCurrentStage(state, &stage));
622   if (def->pause_depth > 0) stage = 0; // all events run on the main stage in pause-mode
623   if (PetscDefined(HAVE_THREADSAFETY)) {
624     PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info));
625   } else {
626     PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info));
627   }
628   PetscCheck(event_perf_info->depth > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to end paused event, not allowed");
629   event_perf_info->depth--;
630   /* Check for double counting */
631   if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
632   else PetscCheck(event_perf_info->depth == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Logging event had unbalanced begin/end pairs");
633 
634   /* Log performance info */
635   PetscCall(PetscTime(&time));
636   PetscCall(PetscEventPerfInfoToc(event_perf_info, time, PetscLogMemory, (int)event));
637   if (PetscDefined(HAVE_THREADSAFETY)) {
638     PetscEventPerfInfo *event_perf_info_global;
639     PetscCall(PetscSpinlockLock(&def->lock));
640     PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info_global));
641     PetscCall(PetscEventPerfInfoAdd_Internal(event_perf_info, event_perf_info_global));
642     PetscCall(PetscSpinlockUnlock(&def->lock));
643   }
644 #if defined(PETSC_HAVE_CUDA)
645   if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) nvtxRangePop();
646 #endif
647   PetscFunctionReturn(PETSC_SUCCESS);
648 }
649 
650 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultDeactivatePush(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event)
651 {
652   PetscEventPerfInfo *event_perf_info;
653 
654   PetscFunctionBegin;
655   if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage));
656   PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info));
657   event_perf_info->depth++;
658   PetscFunctionReturn(PETSC_SUCCESS);
659 }
660 
661 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultDeactivatePop(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event)
662 {
663   PetscEventPerfInfo *event_perf_info;
664 
665   PetscFunctionBegin;
666   if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage));
667   PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info));
668   event_perf_info->depth--;
669   PetscFunctionReturn(PETSC_SUCCESS);
670 }
671 
672 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultEventsPause(PetscLogHandler h)
673 {
674   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
675   PetscLogDouble          time;
676   PetscInt                num_stages;
677 
678   PetscFunctionBegin;
679   if (def->pause_depth++ > 0) PetscFunctionReturn(PETSC_SUCCESS);
680   PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL));
681   PetscCall(PetscTime(&time));
682   for (PetscInt stage = 0; stage < num_stages; stage++) {
683     PetscStagePerf *stage_info = NULL;
684     PetscInt        num_events;
685 
686     PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info));
687     PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL));
688     for (PetscInt event = 0; event < num_events; event++) {
689       PetscEventPerfInfo *event_info = NULL;
690       PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info));
691       if (event_info->depth > 0) {
692         event_info->depth *= -1;
693         PetscCall(PetscEventPerfInfoPause(event_info, time, PetscLogMemory, event));
694       }
695     }
696     if (stage > 0 && stage_info->perfInfo.depth > 0) {
697       stage_info->perfInfo.depth *= -1;
698       PetscCall(PetscEventPerfInfoPause(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2)));
699     }
700   }
701   PetscFunctionReturn(PETSC_SUCCESS);
702 }
703 
704 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultEventsResume(PetscLogHandler h)
705 {
706   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
707   PetscLogDouble          time;
708   PetscInt                num_stages;
709 
710   PetscFunctionBegin;
711   if (--def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
712   PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL));
713   PetscCall(PetscTime(&time));
714   for (PetscInt stage = 0; stage < num_stages; stage++) {
715     PetscStagePerf *stage_info = NULL;
716     PetscInt        num_events;
717 
718     PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info));
719     PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL));
720     for (PetscInt event = 0; event < num_events; event++) {
721       PetscEventPerfInfo *event_info = NULL;
722       PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info));
723       if (event_info->depth < 0) {
724         event_info->depth *= -1;
725         PetscCall(PetscEventPerfInfoResume(event_info, time, PetscLogMemory, event));
726       }
727     }
728     if (stage > 0 && stage_info->perfInfo.depth < 0) {
729       stage_info->perfInfo.depth *= -1;
730       PetscCall(PetscEventPerfInfoResume(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2)));
731     }
732   }
733   PetscFunctionReturn(PETSC_SUCCESS);
734 }
735 
736 static PetscErrorCode PetscLogHandlerStagePush_Default(PetscLogHandler h, PetscLogStage new_stage)
737 {
738   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
739   PetscLogDouble          time;
740   PetscLogState           state;
741   PetscLogStage           current_stage;
742   PetscStagePerf         *new_stage_info;
743 
744   PetscFunctionBegin;
745   if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
746   PetscCall(PetscLogHandlerGetState(h, &state));
747   current_stage = state->current_stage;
748   PetscCall(PetscLogHandlerDefaultGetStageInfo(h, new_stage, &new_stage_info));
749   PetscCall(PetscTime(&time));
750 
751   /* Record flops/time of previous stage */
752   if (current_stage >= 0) {
753     if (PetscBTLookup(state->active, current_stage)) {
754       PetscStagePerf *current_stage_info;
755       PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, &current_stage_info));
756       PetscCall(PetscEventPerfInfoToc(&current_stage_info->perfInfo, time, PetscLogMemory, (int)-(current_stage + 2)));
757     }
758   }
759   new_stage_info->used = PETSC_TRUE;
760   new_stage_info->perfInfo.count++;
761   new_stage_info->perfInfo.depth++;
762   /* Subtract current quantities so that we obtain the difference when we pop */
763   if (PetscBTLookup(state->active, new_stage)) PetscCall(PetscEventPerfInfoTic(&new_stage_info->perfInfo, time, PetscLogMemory, (int)-(new_stage + 2)));
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
767 static PetscErrorCode PetscLogHandlerStagePop_Default(PetscLogHandler h, PetscLogStage old_stage)
768 {
769   PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
770   PetscLogStage           current_stage;
771   PetscStagePerf         *old_stage_info;
772   PetscLogState           state;
773   PetscLogDouble          time;
774 
775   PetscFunctionBegin;
776   if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
777   PetscCall(PetscLogHandlerGetState(h, &state));
778   current_stage = state->current_stage;
779   PetscCall(PetscLogHandlerDefaultGetStageInfo(h, old_stage, &old_stage_info));
780   PetscCall(PetscTime(&time));
781   old_stage_info->perfInfo.depth--;
782   if (PetscBTLookup(state->active, old_stage)) { PetscCall(PetscEventPerfInfoToc(&old_stage_info->perfInfo, time, PetscLogMemory, (int)-(old_stage + 2))); }
783   if (current_stage >= 0) {
784     if (PetscBTLookup(state->active, current_stage)) {
785       PetscStagePerf *current_stage_info;
786       PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, &current_stage_info));
787       PetscCall(PetscEventPerfInfoTic(&current_stage_info->perfInfo, time, PetscLogMemory, (int)-(current_stage + 2)));
788     }
789   }
790   PetscFunctionReturn(PETSC_SUCCESS);
791 }
792 
793 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultStageSetVisible(PetscLogHandler h, PetscLogStage stage, PetscBool is_visible)
794 {
795   PetscStagePerf *stage_info;
796 
797   PetscFunctionBegin;
798   if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage));
799   PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info));
800   stage_info->perfInfo.visible = is_visible;
801   PetscFunctionReturn(PETSC_SUCCESS);
802 }
803 
804 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultStageGetVisible(PetscLogHandler h, PetscLogStage stage, PetscBool *is_visible)
805 {
806   PetscStagePerf *stage_info;
807 
808   PetscFunctionBegin;
809   if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage));
810   PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info));
811   *is_visible = stage_info->perfInfo.visible;
812   PetscFunctionReturn(PETSC_SUCCESS);
813 }
814 
815 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultSetLogActions(PetscLogHandler handler, PetscBool flag)
816 {
817   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
818 
819   PetscFunctionBegin;
820   def->petsc_logActions = flag;
821   PetscFunctionReturn(PETSC_SUCCESS);
822 }
823 
824 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultSetLogObjects(PetscLogHandler handler, PetscBool flag)
825 {
826   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
827 
828   PetscFunctionBegin;
829   def->petsc_logObjects = flag;
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultLogObjectState(PetscLogHandler handler, PetscObject obj, const char format[], va_list Argp)
834 {
835   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
836   size_t                  fullLength;
837 
838   PetscFunctionBegin;
839   if (def->petsc_logObjects) {
840     Object *obj_entry = NULL;
841 
842     PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, obj->id - 1, &obj_entry));
843     PetscCall(PetscVSNPrintf(obj_entry->info, 64, format, &fullLength, Argp));
844   }
845   PetscFunctionReturn(PETSC_SUCCESS);
846 }
847 
848 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultGetNumObjects(PetscLogHandler handler, PetscInt *num_objects)
849 {
850   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
851 
852   PetscFunctionBegin;
853   PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, num_objects, NULL));
854   PetscFunctionReturn(PETSC_SUCCESS);
855 }
856 
857 PETSC_INTERN PetscErrorCode PetscLogHandlerDump_Default(PetscLogHandler handler, const char sname[])
858 {
859   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
860   FILE                   *fd;
861   char                    file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN];
862   PetscLogDouble          flops, _TotalTime;
863   PetscMPIInt             rank;
864   int                     curStage;
865   PetscLogState           state;
866   PetscInt                num_events;
867   PetscLogEvent           event;
868 
869   PetscFunctionBegin;
870   /* Calculate the total elapsed time */
871   PetscCall(PetscTime(&_TotalTime));
872   _TotalTime -= petsc_BaseTime;
873   /* Open log file */
874   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)handler), &rank));
875   PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank));
876   PetscCall(PetscFixFilename(file, fname));
877   PetscCall(PetscFOpen(PETSC_COMM_SELF, fname, "w", &fd));
878   PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname);
879   /* Output totals */
880   PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime));
881   PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Clock Resolution %g\n", 0.0));
882   /* Output actions */
883   if (def->petsc_logActions) {
884     PetscInt num_actions;
885     PetscCall(PetscLogActionArrayGetSize(def->petsc_actions, &num_actions, NULL));
886     PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Actions accomplished %d\n", (int)num_actions));
887     for (int a = 0; a < num_actions; a++) {
888       Action *action;
889 
890       PetscCall(PetscLogActionArrayGetRef(def->petsc_actions, a, &action));
891       PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g %d %d %d %d %d %d %g %g %g\n", action->time, action->action, (int)action->event, (int)action->classid, action->id1, action->id2, action->id3, action->flops, action->mem, action->maxmem));
892     }
893   }
894   /* Output objects */
895   if (def->petsc_logObjects) {
896     PetscInt num_objects;
897 
898     PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL));
899     PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Objects created %d destroyed %d\n", def->petsc_numObjectsCreated, def->petsc_numObjectsDestroyed));
900     for (int o = 0; o < num_objects; o++) {
901       Object *object = NULL;
902 
903       PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, o, &object));
904       if (object->parent != -1) continue; // object with this id wasn't logged, probably a PetscLogHandler
905       PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Parent ID: %d Memory: %d\n", object->parent, (int)object->mem));
906       if (!object->name[0]) {
907         PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Name\n"));
908       } else {
909         PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Name: %s\n", object->name));
910       }
911       if (!object->info[0]) {
912         PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Info\n"));
913       } else {
914         PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Info: %s\n", object->info));
915       }
916     }
917   }
918   /* Output events */
919   PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Event log:\n"));
920   PetscCall(PetscLogHandlerGetState(handler, &state));
921   PetscCall(PetscLogStateGetNumEvents(state, &num_events));
922   PetscCall(PetscLogStateGetCurrentStage(state, &curStage));
923   for (event = 0; event < num_events; event++) {
924     PetscEventPerfInfo *event_info;
925 
926     PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, curStage, event, &event_info));
927     if (event_info->time != 0.0) flops = event_info->flops / event_info->time;
928     else flops = 0.0;
929     PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%d %16d %16g %16g %16g\n", event, event_info->count, event_info->flops, event_info->time, flops));
930   }
931   PetscCall(PetscFClose(PETSC_COMM_SELF, fd));
932   PetscFunctionReturn(PETSC_SUCCESS);
933 }
934 
935 /*
936   PetscLogView_Detailed - Each process prints the times for its own events
937 
938 */
939 static PetscErrorCode PetscLogHandlerView_Default_Detailed(PetscLogHandler handler, PetscViewer viewer)
940 {
941   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
942   PetscLogDouble          locTotalTime, numRed, maxMem;
943   PetscInt                numStages, numEvents;
944   MPI_Comm                comm = PetscObjectComm((PetscObject)viewer);
945   PetscMPIInt             rank, size;
946   PetscLogGlobalNames     global_stages, global_events;
947   PetscLogState           state;
948   PetscEventPerfInfo      zero_info;
949 
950   PetscFunctionBegin;
951   PetscCall(PetscLogHandlerGetState(handler, &state));
952   PetscCallMPI(MPI_Comm_size(comm, &size));
953   PetscCallMPI(MPI_Comm_rank(comm, &rank));
954   /* Must preserve reduction count before we go on */
955   numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
956   /* Get the total elapsed time */
957   PetscCall(PetscTime(&locTotalTime));
958   locTotalTime -= petsc_BaseTime;
959   PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size));
960   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n"));
961   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n"));
962   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n"));
963   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n"));
964   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n"));
965   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n"));
966   PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n"));
967   PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages));
968   PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events));
969   PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages));
970   PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents));
971   PetscCall(PetscMemzero(&zero_info, sizeof(zero_info)));
972   PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n"));
973   for (PetscInt stage = 0; stage < numStages; stage++) {
974     PetscInt    stage_id;
975     const char *stage_name;
976 
977     PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
978     PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
979     PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stage_name));
980     PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stage_name));
981     for (PetscInt event = 0; event < numEvents; event++) {
982       PetscEventPerfInfo *eventInfo = &zero_info;
983       PetscBool           is_zero   = PETSC_FALSE;
984       PetscInt            event_id;
985       const char         *event_name;
986 
987       PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id));
988       PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name));
989       if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &eventInfo));
990       is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE;
991       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm));
992       if (!is_zero) { PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stage_name, event_name)); }
993     }
994   }
995   PetscCall(PetscMallocGetMaximumUsage(&maxMem));
996   PetscCall(PetscViewerASCIIPushSynchronized(viewer));
997   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime));
998   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct)));
999   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len)));
1000   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed));
1001   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops));
1002   {
1003     PetscInt num_objects;
1004 
1005     PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL));
1006     PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, (int)num_objects));
1007   }
1008   PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem));
1009   PetscCall(PetscViewerFlush(viewer));
1010   for (PetscInt stage = 0; stage < numStages; stage++) {
1011     PetscEventPerfInfo *stage_perf_info = &zero_info;
1012     PetscInt            stage_id;
1013     const char         *stage_name;
1014 
1015     PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1016     PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1017     if (stage_id >= 0) {
1018       PetscStagePerf *stage_info;
1019       PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info));
1020       stage_perf_info = &stage_info->perfInfo;
1021     }
1022     PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stage_name, rank, stage_perf_info->time,
1023                                                  stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops));
1024     for (PetscInt event = 0; event < numEvents; event++) {
1025       PetscEventPerfInfo *eventInfo = &zero_info;
1026       PetscBool           is_zero   = PETSC_FALSE;
1027       PetscInt            event_id;
1028       const char         *event_name;
1029 
1030       PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id));
1031       PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name));
1032       if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &eventInfo));
1033       is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE;
1034       PetscCall(PetscMemcmp(eventInfo, &zero_info, sizeof(zero_info), &is_zero));
1035       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm));
1036       if (!is_zero) {
1037         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", stage_name, event_name, rank,
1038                                                      eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops));
1039         if (eventInfo->dof[0] >= 0.) {
1040           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : ["));
1041           for (PetscInt d = 0; d < 8; ++d) {
1042             if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", "));
1043             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d]));
1044           }
1045           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]"));
1046           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : ["));
1047           for (PetscInt e = 0; e < 8; ++e) {
1048             if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", "));
1049             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e]));
1050           }
1051           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]"));
1052         }
1053       }
1054       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n"));
1055     }
1056   }
1057   PetscCall(PetscViewerFlush(viewer));
1058   PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1059   PetscCall(PetscLogGlobalNamesDestroy(&global_events));
1060   PetscCall(PetscLogGlobalNamesDestroy(&global_stages));
1061   PetscFunctionReturn(PETSC_SUCCESS);
1062 }
1063 
1064 /*
1065   PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format
1066 */
1067 static PetscErrorCode PetscLogHandlerView_Default_CSV(PetscLogHandler handler, PetscViewer viewer)
1068 {
1069   PetscLogDouble      locTotalTime, maxMem;
1070   PetscInt            numStages, numEvents, stage, event;
1071   MPI_Comm            comm = PetscObjectComm((PetscObject)viewer);
1072   PetscMPIInt         rank, size;
1073   PetscLogGlobalNames global_stages, global_events;
1074   PetscLogState       state;
1075   PetscEventPerfInfo  zero_info;
1076 
1077   PetscFunctionBegin;
1078   PetscCall(PetscLogHandlerGetState(handler, &state));
1079   PetscCallMPI(MPI_Comm_size(comm, &size));
1080   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1081   /* Must preserve reduction count before we go on */
1082   /* Get the total elapsed time */
1083   PetscCall(PetscTime(&locTotalTime));
1084   locTotalTime -= petsc_BaseTime;
1085   PetscCall(PetscMallocGetMaximumUsage(&maxMem));
1086   PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages));
1087   PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events));
1088   PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages));
1089   PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents));
1090   PetscCall(PetscMemzero(&zero_info, sizeof(zero_info)));
1091   PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1092   PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size));
1093   PetscCall(PetscViewerFlush(viewer));
1094   for (stage = 0; stage < numStages; stage++) {
1095     PetscEventPerfInfo *stage_perf_info;
1096     PetscInt            stage_id;
1097     const char         *stage_name;
1098 
1099     PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1100     PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1101     stage_perf_info = &zero_info;
1102     if (stage_id >= 0) {
1103       PetscStagePerf *stage_info;
1104       PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info));
1105       stage_perf_info = &stage_info->perfInfo;
1106     }
1107     PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stage_name, rank, stage_perf_info->time, stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops));
1108     for (event = 0; event < numEvents; event++) {
1109       PetscEventPerfInfo *eventInfo = &zero_info;
1110       PetscBool           is_zero   = PETSC_FALSE;
1111       PetscInt            event_id;
1112       const char         *event_name;
1113 
1114       PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id));
1115       PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name));
1116       if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &eventInfo));
1117       PetscCall(PetscMemcmp(eventInfo, &zero_info, sizeof(zero_info), &is_zero));
1118       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm));
1119       if (!is_zero) {
1120         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stage_name, event_name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops));
1121         if (eventInfo->dof[0] >= 0.) {
1122           for (PetscInt d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d]));
1123           for (PetscInt e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e]));
1124         }
1125       }
1126       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n"));
1127     }
1128   }
1129   PetscCall(PetscViewerFlush(viewer));
1130   PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1131   PetscCall(PetscLogGlobalNamesDestroy(&global_stages));
1132   PetscCall(PetscLogGlobalNamesDestroy(&global_events));
1133   PetscFunctionReturn(PETSC_SUCCESS);
1134 }
1135 
1136 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd)
1137 {
1138   PetscFunctionBegin;
1139   if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS);
1140   PetscCall(PetscFPrintf(comm, fd, "\n\n"));
1141   PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n"));
1142   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1143   PetscCall(PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n"));
1144   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1145   PetscCall(PetscFPrintf(comm, fd, "      #   This program was run with logging synchronization.   #\n"));
1146   PetscCall(PetscFPrintf(comm, fd, "      #   This option provides more meaningful imbalance       #\n"));
1147   PetscCall(PetscFPrintf(comm, fd, "      #   figures at the expense of slowing things down and    #\n"));
1148   PetscCall(PetscFPrintf(comm, fd, "      #   providing a distorted view of the overall runtime.   #\n"));
1149   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1150   PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n\n\n"));
1151   PetscFunctionReturn(PETSC_SUCCESS);
1152 }
1153 
1154 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd)
1155 {
1156   PetscFunctionBegin;
1157   if (PetscDefined(USE_DEBUG)) {
1158     PetscCall(PetscFPrintf(comm, fd, "\n\n"));
1159     PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n"));
1160     PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1161     PetscCall(PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n"));
1162     PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1163     PetscCall(PetscFPrintf(comm, fd, "      #   This code was compiled with a debugging option.      #\n"));
1164     PetscCall(PetscFPrintf(comm, fd, "      #   To get timing results run ./configure                #\n"));
1165     PetscCall(PetscFPrintf(comm, fd, "      #   using --with-debugging=no, the performance will      #\n"));
1166     PetscCall(PetscFPrintf(comm, fd, "      #   be generally two or three times faster.              #\n"));
1167     PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1168     PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n\n\n"));
1169   }
1170   PetscFunctionReturn(PETSC_SUCCESS);
1171 }
1172 
1173 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd)
1174 {
1175 #if defined(PETSC_HAVE_DEVICE)
1176   PetscMPIInt size;
1177   PetscBool   deviceInitialized = PETSC_FALSE;
1178 
1179   PetscFunctionBegin;
1180   PetscCallMPI(MPI_Comm_size(comm, &size));
1181   for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) {
1182     const PetscDeviceType dtype = PetscDeviceTypeCast(i);
1183     if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */
1184       deviceInitialized = PETSC_TRUE;
1185       break;
1186     }
1187   }
1188   /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */
1189   if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS);
1190   PetscCall(PetscFPrintf(comm, fd, "\n\n"));
1191   PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n"));
1192   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1193   PetscCall(PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n"));
1194   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1195   PetscCall(PetscFPrintf(comm, fd, "      #   This code was compiled with GPU support and you've   #\n"));
1196   PetscCall(PetscFPrintf(comm, fd, "      #   created PETSc/GPU objects, but you intentionally     #\n"));
1197   PetscCall(PetscFPrintf(comm, fd, "      #   used -use_gpu_aware_mpi 0, requiring PETSc to copy   #\n"));
1198   PetscCall(PetscFPrintf(comm, fd, "      #   additional data between the GPU and CPU. To obtain   #\n"));
1199   PetscCall(PetscFPrintf(comm, fd, "      #   meaningful timing results on multi-rank runs, use    #\n"));
1200   PetscCall(PetscFPrintf(comm, fd, "      #   GPU-aware MPI instead.                               #\n"));
1201   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1202   PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n\n\n"));
1203   PetscFunctionReturn(PETSC_SUCCESS);
1204 #else
1205   return PETSC_SUCCESS;
1206 #endif
1207 }
1208 
1209 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd)
1210 {
1211 #if defined(PETSC_HAVE_DEVICE)
1212 
1213   PetscFunctionBegin;
1214   if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS);
1215   PetscCall(PetscFPrintf(comm, fd, "\n\n"));
1216   PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n"));
1217   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1218   PetscCall(PetscFPrintf(comm, fd, "      #                       WARNING!!!                       #\n"));
1219   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1220   PetscCall(PetscFPrintf(comm, fd, "      #   This code was run with -log_view_gpu_time            #\n"));
1221   PetscCall(PetscFPrintf(comm, fd, "      #   This provides accurate timing within the GPU kernels #\n"));
1222   PetscCall(PetscFPrintf(comm, fd, "      #   but can slow down the entire computation by a        #\n"));
1223   PetscCall(PetscFPrintf(comm, fd, "      #   measurable amount. For fastest runs we recommend     #\n"));
1224   PetscCall(PetscFPrintf(comm, fd, "      #   not using this option.                               #\n"));
1225   PetscCall(PetscFPrintf(comm, fd, "      #                                                        #\n"));
1226   PetscCall(PetscFPrintf(comm, fd, "      ##########################################################\n\n\n"));
1227   PetscFunctionReturn(PETSC_SUCCESS);
1228 #else
1229   return PETSC_SUCCESS;
1230 #endif
1231 }
1232 
1233 static PetscErrorCode PetscLogHandlerView_Default_Info(PetscLogHandler handler, PetscViewer viewer)
1234 {
1235   FILE                   *fd;
1236   PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
1237   char                    arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
1238   PetscLogDouble          locTotalTime, TotalTime, TotalFlops;
1239   PetscLogDouble          numMessages, messageLength, avgMessLen, numReductions;
1240   PetscLogDouble          stageTime, flops, flopr, mem, mess, messLen, red;
1241   PetscLogDouble          fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed;
1242   PetscLogDouble          fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed;
1243   PetscLogDouble          min, max, tot, ratio, avg, x, y;
1244   PetscLogDouble          minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax;
1245 #if defined(PETSC_HAVE_DEVICE)
1246   PetscLogEvent  KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */
1247   PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops;
1248 #endif
1249   PetscMPIInt   minC, maxC;
1250   PetscMPIInt   size, rank;
1251   PetscBool    *localStageUsed, *stageUsed;
1252   PetscBool    *localStageVisible, *stageVisible;
1253   PetscInt      numStages, numEvents;
1254   int           stage, oclass;
1255   PetscLogEvent event;
1256   char          version[256];
1257   MPI_Comm      comm;
1258 #if defined(PETSC_HAVE_DEVICE)
1259   PetscInt64 nas = 0x7FF0000000000002;
1260 #endif
1261   PetscLogGlobalNames global_stages, global_events;
1262   PetscEventPerfInfo  zero_info;
1263   PetscLogState       state;
1264 
1265   PetscFunctionBegin;
1266   PetscCall(PetscLogHandlerGetState(handler, &state));
1267   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1268   PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
1269   PetscCall(PetscViewerASCIIGetPointer(viewer, &fd));
1270   PetscCallMPI(MPI_Comm_size(comm, &size));
1271   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1272   /* Get the total elapsed time */
1273   PetscCall(PetscTime(&locTotalTime));
1274   locTotalTime -= petsc_BaseTime;
1275 
1276   PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n"));
1277   PetscCall(PetscFPrintf(comm, fd, "***                                WIDEN YOUR WINDOW TO 160 CHARACTERS.  Use 'enscript -r -fCourier9' to print this document                                 ***\n"));
1278   PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n"));
1279   PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n"));
1280   PetscCall(PetscLogViewWarnSync(comm, fd));
1281   PetscCall(PetscLogViewWarnDebugging(comm, fd));
1282   PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd));
1283   PetscCall(PetscLogViewWarnGpuTime(comm, fd));
1284   PetscCall(PetscGetArchType(arch, sizeof(arch)));
1285   PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
1286   PetscCall(PetscGetUserName(username, sizeof(username)));
1287   PetscCall(PetscGetProgramName(pname, sizeof(pname)));
1288   PetscCall(PetscGetDate(date, sizeof(date)));
1289   PetscCall(PetscGetVersion(version, sizeof(version)));
1290   if (size == 1) {
1291     PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date));
1292   } else {
1293     PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date));
1294   }
1295 #if defined(PETSC_HAVE_OPENMP)
1296   PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads));
1297 #endif
1298   PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version));
1299 
1300   /* Must preserve reduction count before we go on */
1301   red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
1302 
1303   /* Calculate summary information */
1304   PetscCall(PetscFPrintf(comm, fd, "\n                         Max       Max/Min     Avg       Total\n"));
1305   /*   Time */
1306   PetscCall(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1307   PetscCall(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1308   PetscCall(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1309   avg = tot / ((PetscLogDouble)size);
1310   if (min != 0.0) ratio = max / min;
1311   else ratio = 0.0;
1312   PetscCall(PetscFPrintf(comm, fd, "Time (sec):           %5.3e   %7.3f   %5.3e\n", max, ratio, avg));
1313   TotalTime = tot;
1314   /*   Objects */
1315   {
1316     PetscInt num_objects;
1317 
1318     PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL));
1319     avg = (PetscLogDouble)num_objects;
1320   }
1321   PetscCall(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1322   PetscCall(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1323   PetscCall(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1324   avg = tot / ((PetscLogDouble)size);
1325   if (min != 0.0) ratio = max / min;
1326   else ratio = 0.0;
1327   PetscCall(PetscFPrintf(comm, fd, "Objects:              %5.3e   %7.3f   %5.3e\n", max, ratio, avg));
1328   /*   Flops */
1329   PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1330   PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1331   PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1332   avg = tot / ((PetscLogDouble)size);
1333   if (min != 0.0) ratio = max / min;
1334   else ratio = 0.0;
1335   PetscCall(PetscFPrintf(comm, fd, "Flops:                %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot));
1336   TotalFlops = tot;
1337   /*   Flops/sec -- Must talk to Barry here */
1338   if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime;
1339   else flops = 0.0;
1340   PetscCall(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1341   PetscCall(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1342   PetscCall(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1343   avg = tot / ((PetscLogDouble)size);
1344   if (min != 0.0) ratio = max / min;
1345   else ratio = 0.0;
1346   PetscCall(PetscFPrintf(comm, fd, "Flops/sec:            %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot));
1347   /*   Memory */
1348   PetscCall(PetscMallocGetMaximumUsage(&mem));
1349   if (mem > 0.0) {
1350     PetscCall(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1351     PetscCall(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1352     PetscCall(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1353     avg = tot / ((PetscLogDouble)size);
1354     if (min != 0.0) ratio = max / min;
1355     else ratio = 0.0;
1356     PetscCall(PetscFPrintf(comm, fd, "Memory (bytes):       %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot));
1357   }
1358   /*   Messages */
1359   mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
1360   PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1361   PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1362   PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1363   avg = tot / ((PetscLogDouble)size);
1364   if (min != 0.0) ratio = max / min;
1365   else ratio = 0.0;
1366   PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count:        %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot));
1367   numMessages = tot;
1368   /*   Message Lengths */
1369   mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
1370   PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1371   PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1372   PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1373   if (numMessages != 0) avg = tot / numMessages;
1374   else avg = 0.0;
1375   if (min != 0.0) ratio = max / min;
1376   else ratio = 0.0;
1377   PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes):  %5.3e   %7.3f   %5.3e  %5.3e\n", max, ratio, avg, tot));
1378   messageLength = tot;
1379   /*   Reductions */
1380   PetscCall(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1381   PetscCall(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1382   PetscCall(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1383   if (min != 0.0) ratio = max / min;
1384   else ratio = 0.0;
1385   PetscCall(PetscFPrintf(comm, fd, "MPI Reductions:       %5.3e   %7.3f\n", max, ratio));
1386   numReductions = red; /* wrong because uses count from process zero */
1387   PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n"));
1388   PetscCall(PetscFPrintf(comm, fd, "                            e.g., VecAXPY() for real vectors of length N --> 2N flops\n"));
1389   PetscCall(PetscFPrintf(comm, fd, "                            and VecAXPY() for complex vectors of length N --> 8N flops\n"));
1390 
1391   PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages));
1392   PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events));
1393   PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages));
1394   PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents));
1395   PetscCall(PetscMemzero(&zero_info, sizeof(zero_info)));
1396   PetscCall(PetscMalloc1(numStages, &localStageUsed));
1397   PetscCall(PetscMalloc1(numStages, &stageUsed));
1398   PetscCall(PetscMalloc1(numStages, &localStageVisible));
1399   PetscCall(PetscMalloc1(numStages, &stageVisible));
1400   if (numStages > 0) {
1401     for (stage = 0; stage < numStages; stage++) {
1402       PetscInt stage_id;
1403 
1404       PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1405       if (stage_id >= 0) {
1406         PetscStagePerf *stage_info;
1407 
1408         PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info));
1409         localStageUsed[stage]    = stage_info->used;
1410         localStageVisible[stage] = stage_info->perfInfo.visible;
1411       } else {
1412         localStageUsed[stage]    = PETSC_FALSE;
1413         localStageVisible[stage] = PETSC_TRUE;
1414       }
1415     }
1416     PetscCall(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm));
1417     PetscCall(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm));
1418     for (stage = 0; stage < numStages; stage++) {
1419       if (stageUsed[stage] && stageVisible[stage]) {
1420         PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages:   ----- Time ------  ----- Flop ------  --- Messages ---  -- Message Lengths --  -- Reductions --\n"));
1421         PetscCall(PetscFPrintf(comm, fd, "                        Avg     %%Total     Avg     %%Total    Count   %%Total     Avg         %%Total    Count   %%Total\n"));
1422         break;
1423       }
1424     }
1425     for (stage = 0; stage < numStages; stage++) {
1426       PetscInt            stage_id;
1427       PetscEventPerfInfo *stage_info;
1428       const char         *stage_name;
1429 
1430       if (!(stageUsed[stage] && stageVisible[stage])) continue;
1431       PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1432       PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1433       stage_info = &zero_info;
1434       if (localStageUsed[stage]) {
1435         PetscStagePerf *stage_perf_info;
1436 
1437         PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_perf_info));
1438         stage_info = &stage_perf_info->perfInfo;
1439       }
1440       PetscCall(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1441       PetscCall(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1442       PetscCall(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1443       PetscCall(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1444       PetscCall(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1445       mess *= 0.5;
1446       messLen *= 0.5;
1447       red /= size;
1448       if (TotalTime != 0.0) fracTime = stageTime / TotalTime;
1449       else fracTime = 0.0;
1450       if (TotalFlops != 0.0) fracFlops = flops / TotalFlops;
1451       else fracFlops = 0.0;
1452       /* Talk to Barry if (stageTime     != 0.0) flops          = (size*flops)/stageTime; else flops          = 0.0; */
1453       if (numMessages != 0.0) fracMessages = mess / numMessages;
1454       else fracMessages = 0.0;
1455       if (mess != 0.0) avgMessLen = messLen / mess;
1456       else avgMessLen = 0.0;
1457       if (messageLength != 0.0) fracLength = messLen / messageLength;
1458       else fracLength = 0.0;
1459       if (numReductions != 0.0) fracReductions = red / numReductions;
1460       else fracReductions = 0.0;
1461       PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%%  %6.4e %5.1f%%  %5.3e %5.1f%%  %5.3e      %5.1f%%  %5.3e %5.1f%%\n", stage, stage_name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions));
1462     }
1463   }
1464 
1465   PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n"));
1466   PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n"));
1467   PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n"));
1468   PetscCall(PetscFPrintf(comm, fd, "   Count: number of times phase was executed\n"));
1469   PetscCall(PetscFPrintf(comm, fd, "   Time and Flop: Max - maximum over all processors\n"));
1470   PetscCall(PetscFPrintf(comm, fd, "                  Ratio - ratio of maximum to minimum over all processors\n"));
1471   PetscCall(PetscFPrintf(comm, fd, "   Mess: number of messages sent\n"));
1472   PetscCall(PetscFPrintf(comm, fd, "   AvgLen: average message length (bytes)\n"));
1473   PetscCall(PetscFPrintf(comm, fd, "   Reduct: number of global reductions\n"));
1474   PetscCall(PetscFPrintf(comm, fd, "   Global: entire computation\n"));
1475   PetscCall(PetscFPrintf(comm, fd, "   Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n"));
1476   PetscCall(PetscFPrintf(comm, fd, "      %%T - percent time in this phase         %%F - percent flop in this phase\n"));
1477   PetscCall(PetscFPrintf(comm, fd, "      %%M - percent messages in this phase     %%L - percent message lengths in this phase\n"));
1478   PetscCall(PetscFPrintf(comm, fd, "      %%R - percent reductions in this phase\n"));
1479   PetscCall(PetscFPrintf(comm, fd, "   Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n"));
1480   if (PetscLogMemory) {
1481     PetscCall(PetscFPrintf(comm, fd, "   Memory usage is summed over all MPI processes, it is given in mega-bytes\n"));
1482     PetscCall(PetscFPrintf(comm, fd, "   Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n"));
1483     PetscCall(PetscFPrintf(comm, fd, "   EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n"));
1484     PetscCall(PetscFPrintf(comm, fd, "   MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n"));
1485     PetscCall(PetscFPrintf(comm, fd, "   RMI Mbytes: Increase in resident memory (sum over all calls to event)\n"));
1486   }
1487 #if defined(PETSC_HAVE_DEVICE)
1488   PetscCall(PetscFPrintf(comm, fd, "   GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n"));
1489   PetscCall(PetscFPrintf(comm, fd, "   CpuToGpu Count: total number of CPU to GPU copies per processor\n"));
1490   PetscCall(PetscFPrintf(comm, fd, "   CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n"));
1491   PetscCall(PetscFPrintf(comm, fd, "   GpuToCpu Count: total number of GPU to CPU copies per processor\n"));
1492   PetscCall(PetscFPrintf(comm, fd, "   GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n"));
1493   PetscCall(PetscFPrintf(comm, fd, "   GPU %%F: percent flops on GPU in this event\n"));
1494 #endif
1495   PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n"));
1496 
1497   PetscCall(PetscLogViewWarnDebugging(comm, fd));
1498 
1499   /* Report events */
1500   PetscCall(PetscFPrintf(comm, fd, "Event                Count      Time (sec)     Flop                              --- Global ---  --- Stage ----  Total"));
1501   if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "  Malloc EMalloc MMalloc RMI"));
1502 #if defined(PETSC_HAVE_DEVICE)
1503   PetscCall(PetscFPrintf(comm, fd, "   GPU    - CpuToGpu -   - GpuToCpu - GPU"));
1504 #endif
1505   PetscCall(PetscFPrintf(comm, fd, "\n"));
1506   PetscCall(PetscFPrintf(comm, fd, "                   Max Ratio  Max     Ratio   Max  Ratio  Mess   AvgLen  Reduct  %%T %%F %%M %%L %%R  %%T %%F %%M %%L %%R Mflop/s"));
1507   if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes"));
1508 #if defined(PETSC_HAVE_DEVICE)
1509   PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count   Size   Count   Size  %%F"));
1510 #endif
1511   PetscCall(PetscFPrintf(comm, fd, "\n"));
1512   PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------"));
1513   if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------"));
1514 #if defined(PETSC_HAVE_DEVICE)
1515   PetscCall(PetscFPrintf(comm, fd, "---------------------------------------"));
1516 #endif
1517   PetscCall(PetscFPrintf(comm, fd, "\n"));
1518 
1519 #if defined(PETSC_HAVE_DEVICE)
1520   /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */
1521   PetscCall(PetscLogStateGetEventFromName(state, "TAOSolve", &TAO_Solve));
1522   PetscCall(PetscLogStateGetEventFromName(state, "TSStep", &TS_Step));
1523   PetscCall(PetscLogStateGetEventFromName(state, "SNESSolve", &SNES_Solve));
1524   PetscCall(PetscLogStateGetEventFromName(state, "KSPSolve", &KSP_Solve));
1525 #endif
1526 
1527   for (stage = 0; stage < numStages; stage++) {
1528     PetscInt            stage_id;
1529     PetscEventPerfInfo *stage_info;
1530     const char         *stage_name;
1531 
1532     if (!(stageVisible[stage] && stageUsed[stage])) continue;
1533     PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1534     PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1535     PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stage_name));
1536     stage_info = &zero_info;
1537     if (localStageUsed[stage]) {
1538       PetscStagePerf *stage_perf_info;
1539 
1540       PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_perf_info));
1541       stage_info = &stage_perf_info->perfInfo;
1542     }
1543     PetscCall(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1544     PetscCall(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1545     PetscCall(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1546     PetscCall(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1547     PetscCall(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1548     mess *= 0.5;
1549     messLen *= 0.5;
1550     red /= size;
1551 
1552     for (event = 0; event < numEvents; event++) {
1553       PetscInt            event_id;
1554       PetscEventPerfInfo *event_info = &zero_info;
1555       PetscBool           is_zero    = PETSC_FALSE;
1556       const char         *event_name;
1557 
1558       PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id));
1559       PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name));
1560       if (event_id >= 0 && stage_id >= 0) { PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &event_info)); }
1561       PetscCall(PetscMemcmp(event_info, &zero_info, sizeof(zero_info), &is_zero));
1562       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm));
1563       if (!is_zero) {
1564         flopr = event_info->flops;
1565         PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1566         PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1567         PetscCall(MPIU_Allreduce(&event_info->flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1568         PetscCall(MPIU_Allreduce(&event_info->time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1569         PetscCall(MPIU_Allreduce(&event_info->time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1570         PetscCall(MPIU_Allreduce(&event_info->time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1571         PetscCall(MPIU_Allreduce(&event_info->numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1572         PetscCall(MPIU_Allreduce(&event_info->messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1573         PetscCall(MPIU_Allreduce(&event_info->numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1574         PetscCall(MPIU_Allreduce(&event_info->count, &minC, 1, MPI_INT, MPI_MIN, comm));
1575         PetscCall(MPIU_Allreduce(&event_info->count, &maxC, 1, MPI_INT, MPI_MAX, comm));
1576         if (PetscLogMemory) {
1577           PetscCall(MPIU_Allreduce(&event_info->memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1578           PetscCall(MPIU_Allreduce(&event_info->mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1579           PetscCall(MPIU_Allreduce(&event_info->mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1580           PetscCall(MPIU_Allreduce(&event_info->mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1581         }
1582 #if defined(PETSC_HAVE_DEVICE)
1583         PetscCall(MPIU_Allreduce(&event_info->CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1584         PetscCall(MPIU_Allreduce(&event_info->GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1585         PetscCall(MPIU_Allreduce(&event_info->CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1586         PetscCall(MPIU_Allreduce(&event_info->GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1587         PetscCall(MPIU_Allreduce(&event_info->GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1588         PetscCall(MPIU_Allreduce(&event_info->GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1589 #endif
1590         if (mint < 0.0) {
1591           PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, event_name));
1592           mint = 0;
1593         }
1594         PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, event_name);
1595 #if defined(PETSC_HAVE_DEVICE)
1596         /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */
1597         if (!PetscLogGpuTimeFlag && petsc_gflops > 0) {
1598           memcpy(&gmaxt, &nas, sizeof(PetscLogDouble));
1599           if (event_id != SNES_Solve && event_id != KSP_Solve && event_id != TS_Step && event_id != TAO_Solve) {
1600             memcpy(&mint, &nas, sizeof(PetscLogDouble));
1601             memcpy(&maxt, &nas, sizeof(PetscLogDouble));
1602           }
1603         }
1604 #endif
1605         totm *= 0.5;
1606         totml *= 0.5;
1607         totr /= size;
1608 
1609         if (maxC != 0) {
1610           if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC;
1611           else ratC = 0.0;
1612           if (mint != 0.0) ratt = maxt / mint;
1613           else ratt = 0.0;
1614           if (minf != 0.0) ratf = maxf / minf;
1615           else ratf = 0.0;
1616           if (TotalTime != 0.0) fracTime = tott / TotalTime;
1617           else fracTime = 0.0;
1618           if (TotalFlops != 0.0) fracFlops = totf / TotalFlops;
1619           else fracFlops = 0.0;
1620           if (stageTime != 0.0) fracStageTime = tott / stageTime;
1621           else fracStageTime = 0.0;
1622           if (flops != 0.0) fracStageFlops = totf / flops;
1623           else fracStageFlops = 0.0;
1624           if (numMessages != 0.0) fracMess = totm / numMessages;
1625           else fracMess = 0.0;
1626           if (messageLength != 0.0) fracMessLen = totml / messageLength;
1627           else fracMessLen = 0.0;
1628           if (numReductions != 0.0) fracRed = totr / numReductions;
1629           else fracRed = 0.0;
1630           if (mess != 0.0) fracStageMess = totm / mess;
1631           else fracStageMess = 0.0;
1632           if (messLen != 0.0) fracStageMessLen = totml / messLen;
1633           else fracStageMessLen = 0.0;
1634           if (red != 0.0) fracStageRed = totr / red;
1635           else fracStageRed = 0.0;
1636           if (totm != 0.0) totml /= totm;
1637           else totml = 0.0;
1638           if (maxt != 0.0) flopr = totf / maxt;
1639           else flopr = 0.0;
1640           if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0)
1641             PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6));
1642           else
1643             PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6));
1644           if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f   %5.0f   %5.0f   %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6));
1645 #if defined(PETSC_HAVE_DEVICE)
1646           if (totf != 0.0) fracgflops = gflops / totf;
1647           else fracgflops = 0.0;
1648           if (gmaxt != 0.0) gflopr = gflops / gmaxt;
1649           else gflopr = 0.0;
1650           PetscCall(PetscFPrintf(comm, fd, "   %5.0f   %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops));
1651 #endif
1652           PetscCall(PetscFPrintf(comm, fd, "\n"));
1653         }
1654       }
1655     }
1656   }
1657 
1658   /* Memory usage and object creation */
1659   PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------"));
1660   if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------"));
1661 #if defined(PETSC_HAVE_DEVICE)
1662   PetscCall(PetscFPrintf(comm, fd, "---------------------------------------"));
1663 #endif
1664   PetscCall(PetscFPrintf(comm, fd, "\n"));
1665   PetscCall(PetscFPrintf(comm, fd, "\n"));
1666 
1667   /* Right now, only stages on the first processor are reported here, meaning only objects associated with
1668      the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
1669      stats for stages local to processor sets.
1670   */
1671   /* We should figure out the longest object name here (now 20 characters) */
1672   PetscCall(PetscFPrintf(comm, fd, "Object Type          Creations   Destructions. Reports information only for process 0.\n"));
1673   for (stage = 0; stage < numStages; stage++) {
1674     const char *stage_name;
1675 
1676     PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1677     PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stage_name));
1678     if (localStageUsed[stage]) {
1679       PetscInt num_classes;
1680 
1681       PetscCall(PetscLogStateGetNumClasses(state, &num_classes));
1682       for (oclass = 0; oclass < num_classes; oclass++) {
1683         PetscClassPerf *class_perf_info;
1684 
1685         PetscCall(PetscLogHandlerDefaultGetClassPerf(handler, stage, oclass, &class_perf_info));
1686         if ((class_perf_info->creations > 0) || (class_perf_info->destructions > 0)) {
1687           PetscLogClassInfo class_reg_info;
1688 
1689           PetscCall(PetscLogStateClassGetInfo(state, oclass, &class_reg_info));
1690           PetscCall(PetscFPrintf(comm, fd, "%20s %5d          %5d\n", class_reg_info.name, class_perf_info->creations, class_perf_info->destructions));
1691         }
1692       }
1693     }
1694   }
1695 
1696   PetscCall(PetscFree(localStageUsed));
1697   PetscCall(PetscFree(stageUsed));
1698   PetscCall(PetscFree(localStageVisible));
1699   PetscCall(PetscFree(stageVisible));
1700   PetscCall(PetscLogGlobalNamesDestroy(&global_stages));
1701   PetscCall(PetscLogGlobalNamesDestroy(&global_events));
1702 
1703   /* Information unrelated to this particular run */
1704   PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n"));
1705   PetscCall(PetscTime(&y));
1706   PetscCall(PetscTime(&x));
1707   PetscCall(PetscTime(&y));
1708   PetscCall(PetscTime(&y));
1709   PetscCall(PetscTime(&y));
1710   PetscCall(PetscTime(&y));
1711   PetscCall(PetscTime(&y));
1712   PetscCall(PetscTime(&y));
1713   PetscCall(PetscTime(&y));
1714   PetscCall(PetscTime(&y));
1715   PetscCall(PetscTime(&y));
1716   PetscCall(PetscTime(&y));
1717   PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0));
1718   /* MPI information */
1719   if (size > 1) {
1720     MPI_Status  status;
1721     PetscMPIInt tag;
1722     MPI_Comm    newcomm;
1723 
1724     PetscCallMPI(MPI_Barrier(comm));
1725     PetscCall(PetscTime(&x));
1726     PetscCallMPI(MPI_Barrier(comm));
1727     PetscCallMPI(MPI_Barrier(comm));
1728     PetscCallMPI(MPI_Barrier(comm));
1729     PetscCallMPI(MPI_Barrier(comm));
1730     PetscCallMPI(MPI_Barrier(comm));
1731     PetscCall(PetscTime(&y));
1732     PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0));
1733     PetscCall(PetscCommDuplicate(comm, &newcomm, &tag));
1734     PetscCallMPI(MPI_Barrier(comm));
1735     if (rank) {
1736       PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status));
1737       PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm));
1738     } else {
1739       PetscCall(PetscTime(&x));
1740       PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm));
1741       PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status));
1742       PetscCall(PetscTime(&y));
1743       PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size));
1744     }
1745     PetscCall(PetscCommDestroy(&newcomm));
1746   }
1747   PetscCall(PetscOptionsView(NULL, viewer));
1748 
1749   /* Machine and compile information */
1750   if (PetscDefined(USE_FORTRAN_KERNELS)) {
1751     PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n"));
1752   } else {
1753     PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n"));
1754   }
1755   if (PetscDefined(USE_64BIT_INDICES)) {
1756     PetscCall(PetscFPrintf(comm, fd, "Compiled with 64-bit PetscInt\n"));
1757   } else if (PetscDefined(USE___FLOAT128)) {
1758     PetscCall(PetscFPrintf(comm, fd, "Compiled with 32-bit PetscInt\n"));
1759   }
1760   if (PetscDefined(USE_REAL_SINGLE)) {
1761     PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n"));
1762   } else if (PetscDefined(USE___FLOAT128)) {
1763     PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n"));
1764   }
1765   if (PetscDefined(USE_REAL_MAT_SINGLE)) {
1766     PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n"));
1767   } else {
1768     PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n"));
1769   }
1770   PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt)));
1771 
1772   PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions));
1773   PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo));
1774   PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo));
1775   PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo));
1776   PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo));
1777 
1778   /* Cleanup */
1779   PetscCall(PetscFPrintf(comm, fd, "\n"));
1780   PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd));
1781   PetscCall(PetscLogViewWarnDebugging(comm, fd));
1782   PetscCall(PetscFPTrapPop());
1783   PetscFunctionReturn(PETSC_SUCCESS);
1784 }
1785 
1786 static PetscErrorCode PetscLogHandlerView_Default(PetscLogHandler handler, PetscViewer viewer)
1787 {
1788   PetscViewerFormat format;
1789   PetscFunctionBegin;
1790   PetscCall(PetscViewerGetFormat(viewer, &format));
1791   if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) {
1792     PetscCall(PetscLogHandlerView_Default_Info(handler, viewer));
1793   } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1794     PetscCall(PetscLogHandlerView_Default_Detailed(handler, viewer));
1795   } else if (format == PETSC_VIEWER_ASCII_CSV) {
1796     PetscCall(PetscLogHandlerView_Default_CSV(handler, viewer));
1797   }
1798   PetscFunctionReturn(PETSC_SUCCESS);
1799 }
1800 
1801 /*MC
1802   PETSC_LOG_HANDLER_DEFAULT - PETSC_LOG_DEFAULT = "default" -  A `PetscLogHandler` that collects data for PETSc
1803   default profiling log viewers (`PetscLogView()` and `PetscLogDump()`).
1804 
1805   Options Database Keys:
1806 + -log_include_actions - include a growing list of actions (event beginnings and endings, object creations and destructions) in `PetscLogDump()` (`PetscLogActions()`).
1807 - -log_include_objects - include a growing list of object creations and destructions in `PetscLogDump()` (`PetscLogObjects()`).
1808 
1809   Level: developer
1810 
1811 .seealso: [](ch_profiling), `PetscLogHandler`
1812 M*/
1813 
1814 PETSC_INTERN PetscErrorCode PetscLogHandlerCreate_Default(PetscLogHandler handler)
1815 {
1816   PetscFunctionBegin;
1817   PetscCall(PetscLogHandlerContextCreate_Default((PetscLogHandler_Default *)&handler->data));
1818   handler->ops->destroy       = PetscLogHandlerDestroy_Default;
1819   handler->ops->eventbegin    = PetscLogHandlerEventBegin_Default;
1820   handler->ops->eventend      = PetscLogHandlerEventEnd_Default;
1821   handler->ops->eventsync     = PetscLogHandlerEventSync_Default;
1822   handler->ops->objectcreate  = PetscLogHandlerObjectCreate_Default;
1823   handler->ops->objectdestroy = PetscLogHandlerObjectDestroy_Default;
1824   handler->ops->stagepush     = PetscLogHandlerStagePush_Default;
1825   handler->ops->stagepop      = PetscLogHandlerStagePop_Default;
1826   handler->ops->view          = PetscLogHandlerView_Default;
1827   PetscFunctionReturn(PETSC_SUCCESS);
1828 }
1829