1 #include <petscviewer.h>
2 #include <petscdevice.h>
3 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/
4 #include <petsc/private/loghandlerimpl.h>
5 #include <petsc/private/deviceimpl.h>
6 #include <petscconfiginfo.h>
7 #include <petscmachineinfo.h>
8 #include "logdefault.h"
9
PetscEventPerfInfoInit(PetscEventPerfInfo * eventInfo)10 static PetscErrorCode PetscEventPerfInfoInit(PetscEventPerfInfo *eventInfo)
11 {
12 PetscFunctionBegin;
13 PetscCall(PetscMemzero(eventInfo, sizeof(*eventInfo)));
14 eventInfo->visible = PETSC_TRUE;
15 eventInfo->id = -1;
16 eventInfo->dof[0] = -1.0;
17 eventInfo->dof[1] = -1.0;
18 eventInfo->dof[2] = -1.0;
19 eventInfo->dof[3] = -1.0;
20 eventInfo->dof[4] = -1.0;
21 eventInfo->dof[5] = -1.0;
22 eventInfo->dof[6] = -1.0;
23 eventInfo->dof[7] = -1.0;
24 eventInfo->errors[0] = -1.0;
25 eventInfo->errors[1] = -1.0;
26 eventInfo->errors[2] = -1.0;
27 eventInfo->errors[3] = -1.0;
28 eventInfo->errors[4] = -1.0;
29 eventInfo->errors[5] = -1.0;
30 eventInfo->errors[6] = -1.0;
31 eventInfo->errors[7] = -1.0;
32 PetscFunctionReturn(PETSC_SUCCESS);
33 }
34
PetscEventPerfInfoTic_Internal(PetscEventPerfInfo * eventInfo,PetscLogDouble time,PetscBool logMemory,int event,PetscBool resume)35 static PetscErrorCode PetscEventPerfInfoTic_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool resume)
36 {
37 PetscFunctionBegin;
38 if (resume) {
39 eventInfo->timeTmp -= time;
40 eventInfo->flopsTmp -= petsc_TotalFlops_th;
41 } else {
42 eventInfo->timeTmp = -time;
43 eventInfo->flopsTmp = -petsc_TotalFlops_th;
44 }
45 eventInfo->numMessages -= petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th;
46 eventInfo->messageLength -= petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len_th + petsc_send_len_th;
47 eventInfo->numReductions -= petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th;
48 #if defined(PETSC_HAVE_DEVICE)
49 eventInfo->CpuToGpuCount -= petsc_ctog_ct_th;
50 eventInfo->GpuToCpuCount -= petsc_gtoc_ct_th;
51 eventInfo->CpuToGpuSize -= petsc_ctog_sz_th;
52 eventInfo->GpuToCpuSize -= petsc_gtoc_sz_th;
53 eventInfo->GpuFlops -= petsc_gflops_th;
54 eventInfo->GpuTime -= petsc_gtime;
55 if (PetscLogGpuEnergyFlag) eventInfo->GpuEnergy -= petsc_genergy;
56 if (PetscLogGpuEnergyMeterFlag) eventInfo->GpuEnergy -= petsc_genergy_meter;
57 #endif
58 if (logMemory) {
59 PetscLogDouble usage;
60 PetscCall(PetscMemoryGetCurrentUsage(&usage));
61 eventInfo->memIncrease -= usage;
62 PetscCall(PetscMallocGetCurrentUsage(&usage));
63 eventInfo->mallocSpace -= usage;
64 PetscCall(PetscMallocGetMaximumUsage(&usage));
65 eventInfo->mallocIncrease -= usage;
66 PetscCall(PetscMallocPushMaximumUsage(event));
67 }
68 PetscFunctionReturn(PETSC_SUCCESS);
69 }
70
PetscEventPerfInfoTic(PetscEventPerfInfo * eventInfo,PetscLogDouble time,PetscBool logMemory,int event)71 static PetscErrorCode PetscEventPerfInfoTic(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event)
72 {
73 PetscFunctionBegin;
74 PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_FALSE));
75 PetscFunctionReturn(PETSC_SUCCESS);
76 }
77
PetscEventPerfInfoResume(PetscEventPerfInfo * eventInfo,PetscLogDouble time,PetscBool logMemory,int event)78 static PetscErrorCode PetscEventPerfInfoResume(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event)
79 {
80 PetscFunctionBegin;
81 PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_TRUE));
82 PetscFunctionReturn(PETSC_SUCCESS);
83 }
84
PetscEventPerfInfoToc_Internal(PetscEventPerfInfo * eventInfo,PetscLogDouble time,PetscBool logMemory,int event,PetscBool pause)85 static PetscErrorCode PetscEventPerfInfoToc_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool pause)
86 {
87 PetscFunctionBegin;
88 eventInfo->timeTmp += time;
89 eventInfo->flopsTmp += petsc_TotalFlops_th;
90 if (!pause) {
91 eventInfo->time += eventInfo->timeTmp;
92 eventInfo->time2 += eventInfo->timeTmp * eventInfo->timeTmp;
93 eventInfo->flops += eventInfo->flopsTmp;
94 eventInfo->flops2 += eventInfo->flopsTmp * eventInfo->flopsTmp;
95 }
96 eventInfo->numMessages += petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th;
97 eventInfo->messageLength += petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len + petsc_send_len_th;
98 eventInfo->numReductions += petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th;
99 #if defined(PETSC_HAVE_DEVICE)
100 eventInfo->CpuToGpuCount += petsc_ctog_ct_th;
101 eventInfo->GpuToCpuCount += petsc_gtoc_ct_th;
102 eventInfo->CpuToGpuSize += petsc_ctog_sz_th;
103 eventInfo->GpuToCpuSize += petsc_gtoc_sz_th;
104 eventInfo->GpuFlops += petsc_gflops_th;
105 eventInfo->GpuTime += petsc_gtime;
106 if (PetscLogGpuEnergyFlag) eventInfo->GpuEnergy += petsc_genergy;
107 if (PetscLogGpuEnergyMeterFlag) eventInfo->GpuEnergy += petsc_genergy_meter;
108 #endif
109 if (logMemory) {
110 PetscLogDouble usage, musage;
111 PetscCall(PetscMemoryGetCurrentUsage(&usage)); /* the comments below match the column labels printed in PetscLogView_Default() */
112 eventInfo->memIncrease += usage; /* RMI */
113 PetscCall(PetscMallocGetCurrentUsage(&usage));
114 eventInfo->mallocSpace += usage; /* Malloc */
115 PetscCall(PetscMallocPopMaximumUsage(event, &musage));
116 eventInfo->mallocIncreaseEvent = PetscMax(musage - usage, eventInfo->mallocIncreaseEvent); /* EMalloc */
117 PetscCall(PetscMallocGetMaximumUsage(&usage));
118 eventInfo->mallocIncrease += usage; /* MMalloc */
119 }
120 PetscFunctionReturn(PETSC_SUCCESS);
121 }
122
PetscEventPerfInfoToc(PetscEventPerfInfo * eventInfo,PetscLogDouble time,PetscBool logMemory,int event)123 static PetscErrorCode PetscEventPerfInfoToc(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event)
124 {
125 PetscFunctionBegin;
126 PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE));
127 PetscFunctionReturn(PETSC_SUCCESS);
128 }
129
PetscEventPerfInfoPause(PetscEventPerfInfo * eventInfo,PetscLogDouble time,PetscBool logMemory,int event)130 static PetscErrorCode PetscEventPerfInfoPause(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event)
131 {
132 PetscFunctionBegin;
133 PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_TRUE));
134 PetscFunctionReturn(PETSC_SUCCESS);
135 }
136
PetscEventPerfInfoAdd_Internal(const PetscEventPerfInfo * eventInfo,PetscEventPerfInfo * outInfo)137 static PetscErrorCode PetscEventPerfInfoAdd_Internal(const PetscEventPerfInfo *eventInfo, PetscEventPerfInfo *outInfo)
138 {
139 PetscFunctionBegin;
140 outInfo->count += eventInfo->count;
141 outInfo->time += eventInfo->time;
142 outInfo->time2 += eventInfo->time2;
143 outInfo->flops += eventInfo->flops;
144 outInfo->flops2 += eventInfo->flops2;
145 outInfo->numMessages += eventInfo->numMessages;
146 outInfo->messageLength += eventInfo->messageLength;
147 outInfo->numReductions += eventInfo->numReductions;
148 #if defined(PETSC_HAVE_DEVICE)
149 outInfo->CpuToGpuCount += eventInfo->CpuToGpuCount;
150 outInfo->GpuToCpuCount += eventInfo->GpuToCpuCount;
151 outInfo->CpuToGpuSize += eventInfo->CpuToGpuSize;
152 outInfo->GpuToCpuSize += eventInfo->GpuToCpuSize;
153 outInfo->GpuFlops += eventInfo->GpuFlops;
154 outInfo->GpuTime += eventInfo->GpuTime;
155 outInfo->GpuEnergy += eventInfo->GpuEnergy;
156 #endif
157 outInfo->memIncrease += eventInfo->memIncrease;
158 outInfo->mallocSpace += eventInfo->mallocSpace;
159 outInfo->mallocIncreaseEvent += eventInfo->mallocIncreaseEvent;
160 outInfo->mallocIncrease += eventInfo->mallocIncrease;
161 PetscFunctionReturn(PETSC_SUCCESS);
162 }
163
164 PETSC_LOG_RESIZABLE_ARRAY(EventPerfArray, PetscEventPerfInfo, PetscLogEvent, PetscEventPerfInfoInit, NULL, NULL)
165
166 /* --- PetscClassPerf --- */
167
168 typedef struct {
169 int creations; /* The number of objects of this class created */
170 int destructions; /* The number of objects of this class destroyed */
171 PetscLogDouble mem; /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */
172 PetscLogDouble descMem; /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */
173 } PetscClassPerf;
174
PetscClassPerfInit(PetscClassPerf * classInfo)175 static PetscErrorCode PetscClassPerfInit(PetscClassPerf *classInfo)
176 {
177 PetscFunctionBegin;
178 PetscCall(PetscMemzero(classInfo, sizeof(*classInfo)));
179 PetscFunctionReturn(PETSC_SUCCESS);
180 }
181
182 PETSC_LOG_RESIZABLE_ARRAY(ClassPerfArray, PetscClassPerf, PetscLogClass, PetscClassPerfInit, NULL, NULL)
183
184 /* --- PetscStagePerf --- */
185
186 typedef struct _PetscStagePerf {
187 PetscBool used; /* The stage was pushed on this processor */
188 PetscEventPerfInfo perfInfo; /* The stage performance information */
189 PetscLogEventPerfArray eventLog; /* The event information for this stage */
190 PetscLogClassPerfArray classLog; /* The class information for this stage */
191 } PetscStagePerf;
192
PetscStageInfoInit(PetscStagePerf * stageInfo)193 static PetscErrorCode PetscStageInfoInit(PetscStagePerf *stageInfo)
194 {
195 PetscFunctionBegin;
196 PetscCall(PetscMemzero(stageInfo, sizeof(*stageInfo)));
197 PetscCall(PetscLogEventPerfArrayCreate(128, &stageInfo->eventLog));
198 PetscCall(PetscLogClassPerfArrayCreate(128, &stageInfo->classLog));
199 PetscCall(PetscEventPerfInfoInit(&stageInfo->perfInfo));
200 PetscFunctionReturn(PETSC_SUCCESS);
201 }
202
PetscStageInfoReset(PetscStagePerf * stageInfo)203 static PetscErrorCode PetscStageInfoReset(PetscStagePerf *stageInfo)
204 {
205 PetscFunctionBegin;
206 PetscCall(PetscLogEventPerfArrayDestroy(&stageInfo->eventLog));
207 PetscCall(PetscLogClassPerfArrayDestroy(&stageInfo->classLog));
208 PetscFunctionReturn(PETSC_SUCCESS);
209 }
210
211 PETSC_LOG_RESIZABLE_ARRAY(StageInfoArray, PetscStagePerf, PetscLogStage, PetscStageInfoInit, PetscStageInfoReset, NULL)
212
213 /* --- Action --- */
214
215 /* The structure for action logging */
216 typedef enum {
217 PETSC_LOG_ACTION_CREATE,
218 PETSC_LOG_ACTION_DESTROY,
219 PETSC_LOG_ACTION_BEGIN,
220 PETSC_LOG_ACTION_END,
221 } PetscLogActionType;
222
223 typedef struct _Action {
224 PetscLogActionType action; /* The type of execution */
225 PetscLogEvent event; /* The event number */
226 PetscClassId classid; /* The event class id */
227 PetscLogDouble time; /* The time of occurrence */
228 PetscLogDouble flops; /* The cumulative flops */
229 PetscLogDouble mem; /* The current memory usage */
230 PetscLogDouble maxmem; /* The maximum memory usage */
231 PetscObjectId id1, id2, id3; /* The ids of associated objects */
232 } Action;
233
234 PETSC_LOG_RESIZABLE_ARRAY(ActionArray, Action, PetscLogEvent, NULL, NULL, NULL)
235
236 /* --- Object --- */
237
238 /* The structure for object logging */
239 typedef struct _Object {
240 PetscObject obj; /* The associated PetscObject */
241 int parent; /* The parent id */
242 PetscLogDouble mem; /* The memory associated with the object */
243 char name[64]; /* The object name */
244 char info[64]; /* The information string */
245 } Object;
246
247 PETSC_LOG_RESIZABLE_ARRAY(ObjectArray, Object, PetscObject, NULL, NULL, NULL)
248
249 /* Map from (threadid,stage,event) to perfInfo data struct */
250 #include <petsc/private/hashmapijk.h>
251
252 PETSC_HASH_MAP(HMapEvent, PetscHashIJKKey, PetscEventPerfInfo *, PetscHashIJKKeyHash, PetscHashIJKKeyEqual, NULL)
253
254 typedef struct _n_PetscLogHandler_Default *PetscLogHandler_Default;
255 struct _n_PetscLogHandler_Default {
256 PetscLogStageInfoArray stages;
257 PetscSpinlock lock;
258 PetscLogActionArray petsc_actions;
259 PetscLogObjectArray petsc_objects;
260 PetscBool petsc_logActions;
261 PetscBool petsc_logObjects;
262 int petsc_numObjectsCreated;
263 int petsc_numObjectsDestroyed;
264 PetscHMapEvent eventInfoMap_th;
265 int pause_depth;
266 PetscBool use_threadsafe;
267 };
268
269 /* --- PetscLogHandler_Default --- */
270
PetscLogHandlerContextCreate_Default(PetscLogHandler_Default * def_p)271 static PetscErrorCode PetscLogHandlerContextCreate_Default(PetscLogHandler_Default *def_p)
272 {
273 PetscLogHandler_Default def;
274
275 PetscFunctionBegin;
276 PetscCall(PetscNew(def_p));
277 def = *def_p;
278 PetscCall(PetscLogStageInfoArrayCreate(8, &def->stages));
279 PetscCall(PetscLogActionArrayCreate(64, &def->petsc_actions));
280 PetscCall(PetscLogObjectArrayCreate(64, &def->petsc_objects));
281 PetscCall(PetscSpinlockCreate(&def->lock));
282
283 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_actions", &def->petsc_logActions, NULL));
284 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_objects", &def->petsc_logObjects, NULL));
285 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_handler_default_use_threadsafe_events", &def->use_threadsafe, NULL));
286 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) PetscCall(PetscHMapEventCreate(&def->eventInfoMap_th));
287 PetscFunctionReturn(PETSC_SUCCESS);
288 }
289
PetscLogHandlerDestroy_Default(PetscLogHandler h)290 static PetscErrorCode PetscLogHandlerDestroy_Default(PetscLogHandler h)
291 {
292 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
293
294 PetscFunctionBegin;
295 PetscCall(PetscLogStageInfoArrayDestroy(&def->stages));
296 PetscCall(PetscLogActionArrayDestroy(&def->petsc_actions));
297 PetscCall(PetscLogObjectArrayDestroy(&def->petsc_objects));
298 PetscCall(PetscSpinlockDestroy(&def->lock));
299 if (def->eventInfoMap_th) {
300 PetscEventPerfInfo **array;
301 PetscInt n, off = 0;
302
303 PetscCall(PetscHMapEventGetSize(def->eventInfoMap_th, &n));
304 PetscCall(PetscMalloc1(n, &array));
305 PetscCall(PetscHMapEventGetVals(def->eventInfoMap_th, &off, array));
306 for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i]));
307 PetscCall(PetscFree(array));
308 PetscCall(PetscHMapEventDestroy(&def->eventInfoMap_th));
309 }
310 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerGetEventPerfInfo_C", NULL));
311 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerGetStagePerfInfo_C", NULL));
312 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerSetLogActions_C", NULL));
313 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerSetLogObjects_C", NULL));
314 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerLogObjectState_C", NULL));
315 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerGetNumObjects_C", NULL));
316 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventDeactivatePush_C", NULL));
317 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventDeactivatePop_C", NULL));
318 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventsPause_C", NULL));
319 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventsResume_C", NULL));
320 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerDump_C", NULL));
321 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerStageSetVisible_C", NULL));
322 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerStageGetVisible_C", NULL));
323 PetscCall(PetscFree(def));
324 PetscFunctionReturn(PETSC_SUCCESS);
325 }
326
PetscLogHandlerDefaultGetStageInfo(PetscLogHandler handler,PetscLogStage stage,PetscStagePerf ** stage_info_p)327 static PetscErrorCode PetscLogHandlerDefaultGetStageInfo(PetscLogHandler handler, PetscLogStage stage, PetscStagePerf **stage_info_p)
328 {
329 PetscStagePerf *stage_info = NULL;
330 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
331
332 PetscFunctionBegin;
333 PetscCall(PetscLogStageInfoArrayResize(def->stages, stage + 1));
334 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info));
335 *stage_info_p = stage_info;
336 PetscFunctionReturn(PETSC_SUCCESS);
337 }
338
PetscLogHandlerGetEventPerfInfo_Default(PetscLogHandler handler,PetscLogStage stage,PetscLogEvent event,PetscEventPerfInfo ** event_info_p)339 static PetscErrorCode PetscLogHandlerGetEventPerfInfo_Default(PetscLogHandler handler, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **event_info_p)
340 {
341 PetscEventPerfInfo *event_info = NULL;
342 PetscStagePerf *stage_info = NULL;
343 PetscLogEventPerfArray event_log;
344
345 PetscFunctionBegin;
346 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(handler->state, &stage));
347 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info));
348 event_log = stage_info->eventLog;
349 PetscCall(PetscLogEventPerfArrayResize(event_log, event + 1));
350 PetscCall(PetscLogEventPerfArrayGetRef(event_log, event, &event_info));
351 event_info->id = event;
352 *event_info_p = event_info;
353 PetscFunctionReturn(PETSC_SUCCESS);
354 }
355
PetscLogHandlerGetStagePerfInfo_Default(PetscLogHandler handler,PetscLogStage stage,PetscEventPerfInfo ** stage_info_p)356 static PetscErrorCode PetscLogHandlerGetStagePerfInfo_Default(PetscLogHandler handler, PetscLogStage stage, PetscEventPerfInfo **stage_info_p)
357 {
358 PetscStagePerf *stage_perf_info = NULL;
359
360 PetscFunctionBegin;
361 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(handler->state, &stage));
362 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_perf_info));
363 *stage_info_p = &stage_perf_info->perfInfo;
364 PetscFunctionReturn(PETSC_SUCCESS);
365 }
366
PetscLogHandlerDefaultGetClassPerf(PetscLogHandler handler,PetscLogStage stage,PetscLogClass clss,PetscClassPerf ** class_info)367 static PetscErrorCode PetscLogHandlerDefaultGetClassPerf(PetscLogHandler handler, PetscLogStage stage, PetscLogClass clss, PetscClassPerf **class_info)
368 {
369 PetscLogClassPerfArray class_log;
370 PetscStagePerf *stage_info;
371
372 PetscFunctionBegin;
373 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info));
374 class_log = stage_info->classLog;
375 PetscCall(PetscLogClassPerfArrayResize(class_log, clss + 1));
376 PetscCall(PetscLogClassPerfArrayGetRef(class_log, clss, class_info));
377 PetscFunctionReturn(PETSC_SUCCESS);
378 }
379
380 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wconversion")
PetscLogHandlerObjectCreate_Default(PetscLogHandler h,PetscObject obj)381 static PetscErrorCode PetscLogHandlerObjectCreate_Default(PetscLogHandler h, PetscObject obj)
382 {
383 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
384 PetscLogState state;
385 PetscLogStage stage;
386 PetscClassPerf *classInfo;
387 int oclass = 0;
388
389 PetscFunctionBegin;
390 PetscCall(PetscLogHandlerGetState(h, &state));
391 PetscCall(PetscSpinlockLock(&def->lock));
392 /* Record stage info */
393 PetscCall(PetscLogStateGetCurrentStage(state, &stage));
394 PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass));
395 PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo));
396 classInfo->creations++;
397 /* Record the creation action */
398 if (def->petsc_logActions) {
399 Action new_action;
400
401 PetscCall(PetscTime(&new_action.time));
402 new_action.time -= petsc_BaseTime;
403 new_action.action = PETSC_LOG_ACTION_CREATE;
404 new_action.event = -1;
405 new_action.classid = obj->classid;
406 new_action.id1 = obj->id;
407 new_action.id2 = -1;
408 new_action.id3 = -1;
409 new_action.flops = petsc_TotalFlops;
410 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem));
411 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem));
412 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action));
413 }
414 /* We don't just use obj->id to count all objects that are created
415 because PetscLogHandlers are objects and PetscLogObjectDestroy() will not
416 be called for them: the number of objects created and destroyed as counted
417 here and below would have an imbalance */
418 def->petsc_numObjectsCreated++;
419 /* Record the object */
420 if (def->petsc_logObjects) {
421 Object new_object;
422 PetscInt objid;
423
424 new_object.parent = -1;
425 new_object.obj = obj;
426 new_object.mem = 0;
427
428 PetscCall(PetscMemzero(new_object.name, sizeof(new_object.name)));
429 PetscCall(PetscMemzero(new_object.info, sizeof(new_object.info)));
430 PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1");
431 PetscCall(PetscIntCast(obj->id, &objid));
432 PetscCall(PetscLogObjectArrayResize(def->petsc_objects, objid));
433 PetscCall(PetscLogObjectArraySet(def->petsc_objects, objid - 1, new_object));
434 }
435 PetscCall(PetscSpinlockUnlock(&def->lock));
436 PetscFunctionReturn(PETSC_SUCCESS);
437 }
438
PetscLogHandlerObjectDestroy_Default(PetscLogHandler h,PetscObject obj)439 static PetscErrorCode PetscLogHandlerObjectDestroy_Default(PetscLogHandler h, PetscObject obj)
440 {
441 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
442 PetscLogState state;
443 PetscLogStage stage;
444 PetscClassPerf *classInfo;
445 int oclass = 0;
446
447 PetscFunctionBegin;
448 PetscCall(PetscLogHandlerGetState(h, &state));
449 /* Record stage info */
450 PetscCall(PetscSpinlockLock(&def->lock));
451 PetscCall(PetscLogStateGetCurrentStage(state, &stage));
452 if (stage >= 0) {
453 /* stage < 0 can happen if the log summary is output before some things are destroyed */
454 PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass));
455 PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo));
456 classInfo->destructions++;
457 }
458 /* Cannot Credit all ancestors with your memory because they may have already been destroyed*/
459 def->petsc_numObjectsDestroyed++;
460 /* Dynamically enlarge logging structures */
461 /* Record the destruction action */
462 if (def->petsc_logActions) {
463 Action new_action;
464
465 PetscCall(PetscTime(&new_action.time));
466 new_action.time -= petsc_BaseTime;
467 new_action.event = -1;
468 new_action.action = PETSC_LOG_ACTION_DESTROY;
469 new_action.classid = obj->classid;
470 new_action.id1 = obj->id;
471 new_action.id2 = -1;
472 new_action.id3 = -1;
473 new_action.flops = petsc_TotalFlops;
474 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem));
475 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem));
476 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action));
477 }
478 if (def->petsc_logObjects) {
479 Object *obj_entry = NULL;
480 PetscInt objid;
481
482 PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1");
483 PetscCall(PetscIntCast(obj->id - 1, &objid));
484 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, objid, &obj_entry));
485 if (obj->name) PetscCall(PetscStrncpy(obj_entry->name, obj->name, 64));
486 obj_entry->obj = NULL;
487 }
488 PetscCall(PetscSpinlockUnlock(&def->lock));
489 PetscFunctionReturn(PETSC_SUCCESS);
490 }
491
PetscLogHandlerEventSync_Default(PetscLogHandler h,PetscLogEvent event,MPI_Comm comm)492 static PetscErrorCode PetscLogHandlerEventSync_Default(PetscLogHandler h, PetscLogEvent event, MPI_Comm comm)
493 {
494 PetscLogState state;
495 PetscLogEventInfo event_info;
496 PetscEventPerfInfo *event_perf_info;
497 int stage;
498 PetscLogDouble time = 0.0;
499
500 PetscFunctionBegin;
501 if (!PetscLogSyncOn || comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS);
502 PetscCall(PetscLogHandlerGetState(h, &state));
503 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info));
504 if (!event_info.collective) PetscFunctionReturn(PETSC_SUCCESS);
505 PetscCall(PetscLogStateGetCurrentStage(state, &stage));
506 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info));
507 if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
508
509 PetscCall(PetscTimeSubtract(&time));
510 PetscCallMPI(MPI_Barrier(comm));
511 PetscCall(PetscTimeAdd(&time));
512 event_perf_info->syncTime += time;
513 PetscFunctionReturn(PETSC_SUCCESS);
514 }
515
PetscLogGetStageEventPerfInfo_threaded(PetscLogHandler_Default def,PetscLogStage stage,PetscLogEvent event,PetscEventPerfInfo ** eventInfo)516 static PetscErrorCode PetscLogGetStageEventPerfInfo_threaded(PetscLogHandler_Default def, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **eventInfo)
517 {
518 PetscEventPerfInfo *leventInfo = NULL;
519 PetscHashIJKKey key;
520
521 PetscFunctionBegin;
522 #if PetscDefined(HAVE_THREADSAFETY)
523 key.i = PetscLogGetTid();
524 #else
525 key.i = 0;
526 #endif
527 key.j = stage;
528 key.k = event;
529 PetscCall(PetscSpinlockLock(&def->lock));
530 PetscCall(PetscHMapEventGet(def->eventInfoMap_th, key, &leventInfo));
531 if (!leventInfo) {
532 PetscCall(PetscNew(&leventInfo));
533 leventInfo->id = event;
534 PetscCall(PetscHMapEventSet(def->eventInfoMap_th, key, leventInfo));
535 }
536 PetscCall(PetscSpinlockUnlock(&def->lock));
537 *eventInfo = leventInfo;
538 PetscFunctionReturn(PETSC_SUCCESS);
539 }
540
PetscLogHandlerEventBegin_Default(PetscLogHandler h,PetscLogEvent event,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4)541 static PetscErrorCode PetscLogHandlerEventBegin_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
542 {
543 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
544 PetscEventPerfInfo *event_perf_info = NULL;
545 PetscLogEventInfo event_info;
546 PetscLogDouble time;
547 PetscLogState state;
548 PetscLogStage stage;
549
550 PetscFunctionBegin;
551 PetscCall(PetscLogHandlerGetState(h, &state));
552 if (PetscDefined(USE_DEBUG)) {
553 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info));
554 if (PetscUnlikely(o1)) PetscValidHeader(o1, 3);
555 if (PetscUnlikely(o2)) PetscValidHeader(o2, 4);
556 if (PetscUnlikely(o3)) PetscValidHeader(o3, 5);
557 if (PetscUnlikely(o4)) PetscValidHeader(o4, 6);
558 if (event_info.collective && o1) {
559 PetscInt64 b1[2], b2[2];
560
561 b1[0] = -o1->cidx;
562 b1[1] = o1->cidx;
563 PetscCallMPI(MPIU_Allreduce(b1, b2, 2, MPIU_INT64, MPI_MAX, PetscObjectComm(o1)));
564 PetscCheck(-b2[0] == b2[1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Collective event %s not called collectively %" PetscInt64_FMT " != %" PetscInt64_FMT, event_info.name, -b2[0], b2[1]);
565 }
566 }
567 /* Synchronization */
568 PetscCall(PetscLogHandlerEventSync_Default(h, event, PetscObjectComm(o1)));
569 PetscCall(PetscLogStateGetCurrentStage(state, &stage));
570 if (def->pause_depth > 0) stage = 0; // in pause-mode, all events run on the main stage
571 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) {
572 PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info));
573 if (event_perf_info->depth == 0) PetscCall(PetscEventPerfInfoInit(event_perf_info));
574 } else {
575 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info));
576 }
577 PetscCheck(event_perf_info->depth >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to begin a paused event, this is not allowed");
578 event_perf_info->depth++;
579 /* Check for double counting */
580 if (event_perf_info->depth > 1) PetscFunctionReturn(PETSC_SUCCESS);
581 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info));
582 /* Log the performance info */
583 event_perf_info->count++;
584 PetscCall(PetscTime(&time));
585 PetscCall(PetscEventPerfInfoTic(event_perf_info, time, PetscLogMemory, event));
586 if (def->petsc_logActions) {
587 PetscLogDouble curTime;
588 Action new_action;
589
590 PetscCall(PetscTime(&curTime));
591 new_action.time = curTime - petsc_BaseTime;
592 new_action.action = PETSC_LOG_ACTION_BEGIN;
593 new_action.event = event;
594 new_action.classid = event_info.classid;
595 new_action.id1 = o1 ? o1->id : -1;
596 new_action.id2 = o2 ? o2->id : -1;
597 new_action.id3 = o3 ? o3->id : -1;
598 new_action.flops = petsc_TotalFlops;
599 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem));
600 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem));
601 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action));
602 }
603 PetscFunctionReturn(PETSC_SUCCESS);
604 }
605
PetscLogHandlerEventEnd_Default(PetscLogHandler h,PetscLogEvent event,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4)606 static PetscErrorCode PetscLogHandlerEventEnd_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4)
607 {
608 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
609 PetscEventPerfInfo *event_perf_info = NULL;
610 PetscLogDouble time;
611 PetscLogState state;
612 int stage;
613 PetscLogEventInfo event_info;
614
615 PetscFunctionBegin;
616 PetscCall(PetscLogHandlerGetState(h, &state));
617 if (PetscDefined(USE_DEBUG)) {
618 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info));
619 if (PetscUnlikely(o1)) PetscValidHeader(o1, 3);
620 if (PetscUnlikely(o2)) PetscValidHeader(o2, 4);
621 if (PetscUnlikely(o3)) PetscValidHeader(o3, 5);
622 if (PetscUnlikely(o4)) PetscValidHeader(o4, 6);
623 if (event_info.collective && o1) {
624 PetscInt64 b1[2], b2[2];
625
626 b1[0] = -o1->cidx;
627 b1[1] = o1->cidx;
628 PetscCallMPI(MPIU_Allreduce(b1, b2, 2, MPIU_INT64, MPI_MAX, PetscObjectComm(o1)));
629 PetscCheck(-b2[0] == b2[1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Collective event %s not called collectively %" PetscInt64_FMT " != %" PetscInt64_FMT, event_info.name, -b2[0], b2[1]);
630 }
631 }
632 if (def->petsc_logActions) {
633 PetscLogDouble curTime;
634 Action new_action;
635
636 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info));
637 PetscCall(PetscTime(&curTime));
638 new_action.time = curTime - petsc_BaseTime;
639 new_action.action = PETSC_LOG_ACTION_END;
640 new_action.event = event;
641 new_action.classid = event_info.classid;
642 new_action.id1 = o1 ? o1->id : -1;
643 new_action.id2 = o2 ? o2->id : -2;
644 new_action.id3 = o3 ? o3->id : -3;
645 new_action.flops = petsc_TotalFlops;
646 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem));
647 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem));
648 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action));
649 }
650 PetscCall(PetscLogStateGetCurrentStage(state, &stage));
651 if (def->pause_depth > 0) stage = 0; // all events run on the main stage in pause-mode
652 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) {
653 PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info));
654 } else {
655 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info));
656 }
657 PetscCheck(event_perf_info->depth > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to end paused event, not allowed");
658 event_perf_info->depth--;
659 /* Check for double counting */
660 if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
661 else PetscCheck(event_perf_info->depth == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Logging event had unbalanced begin/end pairs");
662
663 /* Log performance info */
664 PetscCall(PetscTime(&time));
665 PetscCall(PetscEventPerfInfoToc(event_perf_info, time, PetscLogMemory, event));
666 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) {
667 PetscEventPerfInfo *event_perf_info_global;
668 PetscCall(PetscSpinlockLock(&def->lock));
669 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info_global));
670 PetscCall(PetscEventPerfInfoAdd_Internal(event_perf_info, event_perf_info_global));
671 PetscCall(PetscSpinlockUnlock(&def->lock));
672 }
673 PetscFunctionReturn(PETSC_SUCCESS);
674 }
675
PetscLogHandlerEventDeactivatePush_Default(PetscLogHandler h,PetscLogStage stage,PetscLogEvent event)676 static PetscErrorCode PetscLogHandlerEventDeactivatePush_Default(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event)
677 {
678 PetscEventPerfInfo *event_perf_info;
679
680 PetscFunctionBegin;
681 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage));
682 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info));
683 event_perf_info->depth++;
684 PetscFunctionReturn(PETSC_SUCCESS);
685 }
686
PetscLogHandlerEventDeactivatePop_Default(PetscLogHandler h,PetscLogStage stage,PetscLogEvent event)687 static PetscErrorCode PetscLogHandlerEventDeactivatePop_Default(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event)
688 {
689 PetscEventPerfInfo *event_perf_info;
690
691 PetscFunctionBegin;
692 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage));
693 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info));
694 event_perf_info->depth--;
695 PetscFunctionReturn(PETSC_SUCCESS);
696 }
697
PetscLogHandlerEventsPause_Default(PetscLogHandler h)698 static PetscErrorCode PetscLogHandlerEventsPause_Default(PetscLogHandler h)
699 {
700 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
701 PetscLogDouble time;
702 PetscInt num_stages;
703
704 PetscFunctionBegin;
705 if (def->pause_depth++ > 0) PetscFunctionReturn(PETSC_SUCCESS);
706 PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL));
707 PetscCall(PetscTime(&time));
708 /* Pause stages in reverse of the order they were pushed */
709 for (PetscInt stage = num_stages - 1; stage >= 0; stage--) {
710 PetscStagePerf *stage_info = NULL;
711 PetscInt num_events;
712
713 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info));
714 PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL));
715 /* Pause events in reverse of the order they were pushed */
716 for (PetscInt event = num_events - 1; event >= 0; event--) {
717 PetscEventPerfInfo *event_info = NULL;
718 PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info));
719 if (event_info->depth > 0) {
720 event_info->depth *= -1;
721 PetscCall(PetscEventPerfInfoPause(event_info, time, PetscLogMemory, event));
722 }
723 }
724 if (stage > 0 && stage_info->perfInfo.depth > 0) {
725 stage_info->perfInfo.depth *= -1;
726 PetscCall(PetscEventPerfInfoPause(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2)));
727 }
728 }
729 PetscFunctionReturn(PETSC_SUCCESS);
730 }
731
PetscLogHandlerEventsResume_Default(PetscLogHandler h)732 static PetscErrorCode PetscLogHandlerEventsResume_Default(PetscLogHandler h)
733 {
734 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
735 PetscLogDouble time;
736 PetscInt num_stages;
737
738 PetscFunctionBegin;
739 if (--def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
740 PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL));
741 PetscCall(PetscTime(&time));
742 /* Unpause stages in the same order they were pushed */
743 for (PetscInt stage = 0; stage < num_stages; stage++) {
744 PetscStagePerf *stage_info = NULL;
745 PetscInt num_events;
746
747 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info));
748 PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL));
749 /* Unpause events in the same order they were pushed */
750 for (PetscInt event = 0; event < num_events; event++) {
751 PetscEventPerfInfo *event_info = NULL;
752 PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info));
753 if (event_info->depth < 0) {
754 event_info->depth *= -1;
755 PetscCall(PetscEventPerfInfoResume(event_info, time, PetscLogMemory, event));
756 }
757 }
758 if (stage > 0 && stage_info->perfInfo.depth < 0) {
759 stage_info->perfInfo.depth *= -1;
760 PetscCall(PetscEventPerfInfoResume(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2)));
761 }
762 }
763 PetscFunctionReturn(PETSC_SUCCESS);
764 }
765
PetscLogHandlerStagePush_Default(PetscLogHandler h,PetscLogStage new_stage)766 static PetscErrorCode PetscLogHandlerStagePush_Default(PetscLogHandler h, PetscLogStage new_stage)
767 {
768 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
769 PetscLogDouble time;
770 PetscLogState state;
771 PetscLogStage current_stage;
772 PetscStagePerf *new_stage_info;
773
774 PetscFunctionBegin;
775 if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
776 PetscCall(PetscLogHandlerGetState(h, &state));
777 current_stage = state->current_stage;
778 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, new_stage, &new_stage_info));
779 PetscCall(PetscTime(&time));
780
781 /* Record flops/time of previous stage */
782 if (current_stage >= 0) {
783 if (PetscBTLookup(state->active, current_stage)) {
784 PetscStagePerf *current_stage_info;
785 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, ¤t_stage_info));
786 PetscCall(PetscEventPerfInfoToc(¤t_stage_info->perfInfo, time, PetscLogMemory, -(current_stage + 2)));
787 }
788 }
789 new_stage_info->used = PETSC_TRUE;
790 new_stage_info->perfInfo.count++;
791 new_stage_info->perfInfo.depth++;
792 /* Subtract current quantities so that we obtain the difference when we pop */
793 if (PetscBTLookup(state->active, new_stage)) PetscCall(PetscEventPerfInfoTic(&new_stage_info->perfInfo, time, PetscLogMemory, -(new_stage + 2)));
794 PetscFunctionReturn(PETSC_SUCCESS);
795 }
796
PetscLogHandlerStagePop_Default(PetscLogHandler h,PetscLogStage old_stage)797 static PetscErrorCode PetscLogHandlerStagePop_Default(PetscLogHandler h, PetscLogStage old_stage)
798 {
799 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data;
800 PetscLogStage current_stage;
801 PetscStagePerf *old_stage_info;
802 PetscLogState state;
803 PetscLogDouble time;
804
805 PetscFunctionBegin;
806 if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS);
807 PetscCall(PetscLogHandlerGetState(h, &state));
808 current_stage = state->current_stage;
809 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, old_stage, &old_stage_info));
810 PetscCall(PetscTime(&time));
811 old_stage_info->perfInfo.depth--;
812 if (PetscBTLookup(state->active, old_stage)) PetscCall(PetscEventPerfInfoToc(&old_stage_info->perfInfo, time, PetscLogMemory, -(old_stage + 2)));
813 if (current_stage >= 0) {
814 if (PetscBTLookup(state->active, current_stage)) {
815 PetscStagePerf *current_stage_info;
816 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, ¤t_stage_info));
817 PetscCall(PetscEventPerfInfoTic(¤t_stage_info->perfInfo, time, PetscLogMemory, -(current_stage + 2)));
818 }
819 }
820 PetscFunctionReturn(PETSC_SUCCESS);
821 }
822
PetscLogHandlerStageSetVisible_Default(PetscLogHandler h,PetscLogStage stage,PetscBool is_visible)823 static PetscErrorCode PetscLogHandlerStageSetVisible_Default(PetscLogHandler h, PetscLogStage stage, PetscBool is_visible)
824 {
825 PetscStagePerf *stage_info;
826
827 PetscFunctionBegin;
828 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage));
829 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info));
830 stage_info->perfInfo.visible = is_visible;
831 PetscFunctionReturn(PETSC_SUCCESS);
832 }
833
PetscLogHandlerStageGetVisible_Default(PetscLogHandler h,PetscLogStage stage,PetscBool * is_visible)834 static PetscErrorCode PetscLogHandlerStageGetVisible_Default(PetscLogHandler h, PetscLogStage stage, PetscBool *is_visible)
835 {
836 PetscStagePerf *stage_info;
837
838 PetscFunctionBegin;
839 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage));
840 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info));
841 *is_visible = stage_info->perfInfo.visible;
842 PetscFunctionReturn(PETSC_SUCCESS);
843 }
844
PetscLogHandlerSetLogActions_Default(PetscLogHandler handler,PetscBool flag)845 static PetscErrorCode PetscLogHandlerSetLogActions_Default(PetscLogHandler handler, PetscBool flag)
846 {
847 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
848
849 PetscFunctionBegin;
850 def->petsc_logActions = flag;
851 PetscFunctionReturn(PETSC_SUCCESS);
852 }
853
PetscLogHandlerSetLogObjects_Default(PetscLogHandler handler,PetscBool flag)854 static PetscErrorCode PetscLogHandlerSetLogObjects_Default(PetscLogHandler handler, PetscBool flag)
855 {
856 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
857
858 PetscFunctionBegin;
859 def->petsc_logObjects = flag;
860 PetscFunctionReturn(PETSC_SUCCESS);
861 }
862
PetscLogHandlerLogObjectState_Default(PetscLogHandler handler,PetscObject obj,const char format[],va_list Argp)863 static PetscErrorCode PetscLogHandlerLogObjectState_Default(PetscLogHandler handler, PetscObject obj, const char format[], va_list Argp)
864 {
865 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
866 size_t fullLength;
867
868 PetscFunctionBegin;
869 if (def->petsc_logObjects) {
870 Object *obj_entry = NULL;
871 PetscInt objid;
872
873 PetscCall(PetscIntCast(obj->id - 1, &objid));
874 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, objid, &obj_entry));
875 PetscCall(PetscVSNPrintf(obj_entry->info, 64, format, &fullLength, Argp));
876 }
877 PetscFunctionReturn(PETSC_SUCCESS);
878 }
879
PetscLogHandlerGetNumObjects_Default(PetscLogHandler handler,PetscInt * num_objects)880 static PetscErrorCode PetscLogHandlerGetNumObjects_Default(PetscLogHandler handler, PetscInt *num_objects)
881 {
882 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
883
884 PetscFunctionBegin;
885 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, num_objects, NULL));
886 PetscFunctionReturn(PETSC_SUCCESS);
887 }
888
PetscLogHandlerDump_Default(PetscLogHandler handler,const char sname[])889 static PetscErrorCode PetscLogHandlerDump_Default(PetscLogHandler handler, const char sname[])
890 {
891 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
892 FILE *fd;
893 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN];
894 PetscLogDouble flops, _TotalTime;
895 PetscMPIInt rank;
896 int curStage;
897 PetscLogState state;
898 PetscInt num_events;
899 PetscLogEvent event;
900
901 PetscFunctionBegin;
902 /* Calculate the total elapsed time */
903 PetscCall(PetscTime(&_TotalTime));
904 _TotalTime -= petsc_BaseTime;
905 /* Open log file */
906 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)handler), &rank));
907 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank));
908 PetscCall(PetscFixFilename(file, fname));
909 PetscCall(PetscFOpen(PETSC_COMM_SELF, fname, "w", &fd));
910 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname);
911 /* Output totals */
912 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime));
913 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Clock Resolution %g\n", 0.0));
914 /* Output actions */
915 if (def->petsc_logActions) {
916 PetscInt num_actions;
917 PetscCall(PetscLogActionArrayGetSize(def->petsc_actions, &num_actions, NULL));
918 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Actions accomplished %" PetscInt_FMT "\n", num_actions));
919 for (int a = 0; a < num_actions; a++) {
920 Action *action;
921
922 PetscCall(PetscLogActionArrayGetRef(def->petsc_actions, a, &action));
923 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g %d %d %d %" PetscInt64_FMT " %" PetscInt64_FMT " %" PetscInt64_FMT " %g %g %g\n", action->time, action->action, action->event, action->classid, action->id1, action->id2, action->id3, action->flops,
924 action->mem, action->maxmem));
925 }
926 }
927 /* Output objects */
928 if (def->petsc_logObjects) {
929 PetscInt num_objects;
930
931 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL));
932 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Objects created %d destroyed %d\n", def->petsc_numObjectsCreated, def->petsc_numObjectsDestroyed));
933 for (int o = 0; o < num_objects; o++) {
934 Object *object = NULL;
935
936 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, o, &object));
937 if (object->parent != -1) continue; // object with this id wasn't logged, probably a PetscLogHandler
938 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Parent ID: %d Memory: %d\n", object->parent, (int)object->mem));
939 if (!object->name[0]) {
940 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Name\n"));
941 } else {
942 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Name: %s\n", object->name));
943 }
944 if (!object->info[0]) {
945 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Info\n"));
946 } else {
947 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Info: %s\n", object->info));
948 }
949 }
950 }
951 /* Output events */
952 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Event log:\n"));
953 PetscCall(PetscLogHandlerGetState(handler, &state));
954 PetscCall(PetscLogStateGetNumEvents(state, &num_events));
955 PetscCall(PetscLogStateGetCurrentStage(state, &curStage));
956 for (event = 0; event < num_events; event++) {
957 PetscEventPerfInfo *event_info;
958
959 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, curStage, event, &event_info));
960 if (event_info->time != 0.0) flops = event_info->flops / event_info->time;
961 else flops = 0.0;
962 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%d %16d %16g %16g %16g\n", event, event_info->count, event_info->flops, event_info->time, flops));
963 }
964 PetscCall(PetscFClose(PETSC_COMM_SELF, fd));
965 PetscFunctionReturn(PETSC_SUCCESS);
966 }
967
968 /*
969 PetscLogView_Detailed - Each process prints the times for its own events
970
971 */
PetscLogHandlerView_Default_Detailed(PetscLogHandler handler,PetscViewer viewer)972 static PetscErrorCode PetscLogHandlerView_Default_Detailed(PetscLogHandler handler, PetscViewer viewer)
973 {
974 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
975 PetscLogDouble locTotalTime, numRed, maxMem;
976 PetscInt numStages, numEvents;
977 MPI_Comm comm = PetscObjectComm((PetscObject)viewer);
978 PetscMPIInt rank, size;
979 PetscLogGlobalNames global_stages, global_events;
980 PetscLogState state;
981 PetscEventPerfInfo zero_info;
982
983 PetscFunctionBegin;
984 PetscCall(PetscLogHandlerGetState(handler, &state));
985 PetscCallMPI(MPI_Comm_size(comm, &size));
986 PetscCallMPI(MPI_Comm_rank(comm, &rank));
987 /* Must preserve reduction count before we go on */
988 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
989 /* Get the total elapsed time */
990 PetscCall(PetscTime(&locTotalTime));
991 locTotalTime -= petsc_BaseTime;
992 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size));
993 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n"));
994 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n"));
995 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n"));
996 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n"));
997 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n"));
998 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n"));
999 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n"));
1000 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages));
1001 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events));
1002 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages));
1003 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents));
1004 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info)));
1005 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n"));
1006 for (PetscInt stage = 0; stage < numStages; stage++) {
1007 PetscInt stage_id;
1008 const char *stage_name;
1009
1010 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1011 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1012 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stage_name));
1013 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stage_name));
1014 for (PetscInt event = 0; event < numEvents; event++) {
1015 PetscEventPerfInfo *eventInfo = &zero_info;
1016 PetscBool is_zero = PETSC_FALSE;
1017 PetscInt event_id;
1018 const char *event_name;
1019
1020 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id));
1021 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name));
1022 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &eventInfo));
1023 is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE;
1024 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPI_C_BOOL, MPI_LAND, comm));
1025 if (!is_zero) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stage_name, event_name));
1026 }
1027 }
1028 PetscCall(PetscMallocGetMaximumUsage(&maxMem));
1029 PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1030 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime));
1031 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct));
1032 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len));
1033 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed));
1034 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops));
1035 {
1036 PetscInt num_objects;
1037
1038 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL));
1039 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %" PetscInt_FMT "\n", rank, num_objects));
1040 }
1041 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem));
1042 PetscCall(PetscViewerFlush(viewer));
1043 for (PetscInt stage = 0; stage < numStages; stage++) {
1044 PetscEventPerfInfo *stage_perf_info = &zero_info;
1045 PetscInt stage_id;
1046 const char *stage_name;
1047
1048 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1049 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1050 if (stage_id >= 0) {
1051 PetscStagePerf *stage_info;
1052 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info));
1053 stage_perf_info = &stage_info->perfInfo;
1054 }
1055 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stage_name, rank, stage_perf_info->time,
1056 stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops));
1057 for (PetscInt event = 0; event < numEvents; event++) {
1058 PetscEventPerfInfo *eventInfo = &zero_info;
1059 PetscBool is_zero = PETSC_FALSE;
1060 PetscInt event_id;
1061 const char *event_name;
1062
1063 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id));
1064 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name));
1065 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &eventInfo));
1066 is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE;
1067 PetscCall(PetscArraycmp(eventInfo, &zero_info, 1, &is_zero));
1068 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPI_C_BOOL, MPI_LAND, comm));
1069 if (!is_zero) {
1070 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", stage_name, event_name, rank,
1071 eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops));
1072 if (eventInfo->dof[0] >= 0.) {
1073 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : ["));
1074 for (PetscInt d = 0; d < 8; ++d) {
1075 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", "));
1076 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d]));
1077 }
1078 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]"));
1079 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : ["));
1080 for (PetscInt e = 0; e < 8; ++e) {
1081 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", "));
1082 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e]));
1083 }
1084 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]"));
1085 }
1086 }
1087 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n"));
1088 }
1089 }
1090 PetscCall(PetscViewerFlush(viewer));
1091 PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1092 PetscCall(PetscLogGlobalNamesDestroy(&global_events));
1093 PetscCall(PetscLogGlobalNamesDestroy(&global_stages));
1094 PetscFunctionReturn(PETSC_SUCCESS);
1095 }
1096
1097 /*
1098 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format
1099 */
PetscLogHandlerView_Default_CSV(PetscLogHandler handler,PetscViewer viewer)1100 static PetscErrorCode PetscLogHandlerView_Default_CSV(PetscLogHandler handler, PetscViewer viewer)
1101 {
1102 PetscLogDouble locTotalTime, maxMem;
1103 PetscInt numStages, numEvents, stage, event;
1104 MPI_Comm comm = PetscObjectComm((PetscObject)viewer);
1105 PetscMPIInt rank, size;
1106 PetscLogGlobalNames global_stages, global_events;
1107 PetscLogState state;
1108 PetscEventPerfInfo zero_info;
1109
1110 PetscFunctionBegin;
1111 PetscCall(PetscLogHandlerGetState(handler, &state));
1112 PetscCallMPI(MPI_Comm_size(comm, &size));
1113 PetscCallMPI(MPI_Comm_rank(comm, &rank));
1114 /* Must preserve reduction count before we go on */
1115 /* Get the total elapsed time */
1116 PetscCall(PetscTime(&locTotalTime));
1117 locTotalTime -= petsc_BaseTime;
1118 PetscCall(PetscMallocGetMaximumUsage(&maxMem));
1119 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages));
1120 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events));
1121 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages));
1122 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents));
1123 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info)));
1124 PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1125 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size));
1126 PetscCall(PetscViewerFlush(viewer));
1127 for (stage = 0; stage < numStages; stage++) {
1128 PetscEventPerfInfo *stage_perf_info;
1129 PetscInt stage_id;
1130 const char *stage_name;
1131
1132 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1133 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1134 stage_perf_info = &zero_info;
1135 if (stage_id >= 0) {
1136 PetscStagePerf *stage_info;
1137 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info));
1138 stage_perf_info = &stage_info->perfInfo;
1139 }
1140 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stage_name, rank, stage_perf_info->time, stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops));
1141 for (event = 0; event < numEvents; event++) {
1142 PetscEventPerfInfo *eventInfo = &zero_info;
1143 PetscBool is_zero = PETSC_FALSE;
1144 PetscInt event_id;
1145 const char *event_name;
1146
1147 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id));
1148 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name));
1149 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &eventInfo));
1150 PetscCall(PetscArraycmp(eventInfo, &zero_info, 1, &is_zero));
1151 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPI_C_BOOL, MPI_LAND, comm));
1152 if (!is_zero) {
1153 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stage_name, event_name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops));
1154 if (eventInfo->dof[0] >= 0.) {
1155 for (PetscInt d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d]));
1156 for (PetscInt e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e]));
1157 }
1158 }
1159 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n"));
1160 }
1161 }
1162 PetscCall(PetscViewerFlush(viewer));
1163 PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1164 PetscCall(PetscLogGlobalNamesDestroy(&global_stages));
1165 PetscCall(PetscLogGlobalNamesDestroy(&global_events));
1166 PetscFunctionReturn(PETSC_SUCCESS);
1167 }
1168
PetscLogViewWarnSync(PetscViewer viewer)1169 static PetscErrorCode PetscLogViewWarnSync(PetscViewer viewer)
1170 {
1171 PetscFunctionBegin;
1172 if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS);
1173 PetscCall(PetscViewerASCIIPrintf(viewer, "\n\n"));
1174 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n"));
1175 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n"));
1176 PetscCall(PetscViewerASCIIPrintf(viewer, " # WARNING!!! #\n"));
1177 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n"));
1178 PetscCall(PetscViewerASCIIPrintf(viewer, " # This program was run with logging synchronization. #\n"));
1179 PetscCall(PetscViewerASCIIPrintf(viewer, " # This option provides more meaningful imbalance #\n"));
1180 PetscCall(PetscViewerASCIIPrintf(viewer, " # figures at the expense of slowing things down and #\n"));
1181 PetscCall(PetscViewerASCIIPrintf(viewer, " # providing a distorted view of the overall runtime. #\n"));
1182 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n"));
1183 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n\n\n"));
1184 PetscFunctionReturn(PETSC_SUCCESS);
1185 }
1186
PetscLogViewWarnDebugging(PetscViewer viewer)1187 static PetscErrorCode PetscLogViewWarnDebugging(PetscViewer viewer)
1188 {
1189 PetscFunctionBegin;
1190 if (PetscDefined(USE_DEBUG)) {
1191 PetscCall(PetscViewerASCIIPrintf(viewer, "\n\n"));
1192 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n"));
1193 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n"));
1194 PetscCall(PetscViewerASCIIPrintf(viewer, " # WARNING!!! #\n"));
1195 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n"));
1196 PetscCall(PetscViewerASCIIPrintf(viewer, " # This code was compiled with a debugging option. #\n"));
1197 PetscCall(PetscViewerASCIIPrintf(viewer, " # To get timing results run ./configure #\n"));
1198 PetscCall(PetscViewerASCIIPrintf(viewer, " # using --with-debugging=no, the performance will #\n"));
1199 PetscCall(PetscViewerASCIIPrintf(viewer, " # be generally two or three times faster. #\n"));
1200 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n"));
1201 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n\n\n"));
1202 }
1203 PetscFunctionReturn(PETSC_SUCCESS);
1204 }
1205
PetscLogViewWarnNoGpuAwareMpi(PetscViewer viewer)1206 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(PetscViewer viewer)
1207 {
1208 #if defined(PETSC_HAVE_DEVICE)
1209 PetscMPIInt size;
1210 PetscBool deviceInitialized = PETSC_FALSE;
1211
1212 PetscFunctionBegin;
1213 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size));
1214 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) {
1215 const PetscDeviceType dtype = PetscDeviceTypeCast(i);
1216 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */
1217 deviceInitialized = PETSC_TRUE;
1218 break;
1219 }
1220 }
1221 /* the last condition says PETSc is configured with device but it is a pure CPU run, so don't print misleading warnings */
1222 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS);
1223 PetscCall(PetscViewerASCIIPrintf(viewer, "\n\n"));
1224 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n"));
1225 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n"));
1226 PetscCall(PetscViewerASCIIPrintf(viewer, " # WARNING!!! #\n"));
1227 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n"));
1228 PetscCall(PetscViewerASCIIPrintf(viewer, " # This code was compiled with GPU support and you've #\n"));
1229 PetscCall(PetscViewerASCIIPrintf(viewer, " # created PETSc/GPU objects, but you intentionally #\n"));
1230 PetscCall(PetscViewerASCIIPrintf(viewer, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n"));
1231 PetscCall(PetscViewerASCIIPrintf(viewer, " # additional data between the GPU and CPU. To obtain #\n"));
1232 PetscCall(PetscViewerASCIIPrintf(viewer, " # meaningful timing results on multi-rank runs, use #\n"));
1233 PetscCall(PetscViewerASCIIPrintf(viewer, " # GPU-aware MPI instead. #\n"));
1234 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n"));
1235 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n\n\n"));
1236 PetscFunctionReturn(PETSC_SUCCESS);
1237 #else
1238 return PETSC_SUCCESS;
1239 #endif
1240 }
1241
PetscLogViewWarnGpuTime(PetscViewer viewer)1242 static PetscErrorCode PetscLogViewWarnGpuTime(PetscViewer viewer)
1243 {
1244 #if defined(PETSC_HAVE_DEVICE) && !defined(PETSC_HAVE_KOKKOS_WITHOUT_GPU)
1245
1246 PetscFunctionBegin;
1247 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS);
1248 PetscCall(PetscViewerASCIIPrintf(viewer, "\n\n"));
1249 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n"));
1250 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n"));
1251 PetscCall(PetscViewerASCIIPrintf(viewer, " # WARNING!!! #\n"));
1252 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n"));
1253 PetscCall(PetscViewerASCIIPrintf(viewer, " # This code was run with -log_view_gpu_time #\n"));
1254 PetscCall(PetscViewerASCIIPrintf(viewer, " # This provides accurate timing within the GPU kernels #\n"));
1255 PetscCall(PetscViewerASCIIPrintf(viewer, " # but can slow down the entire computation by a #\n"));
1256 PetscCall(PetscViewerASCIIPrintf(viewer, " # measurable amount. For fastest runs we recommend #\n"));
1257 PetscCall(PetscViewerASCIIPrintf(viewer, " # not using this option. #\n"));
1258 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n"));
1259 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n\n\n"));
1260 PetscFunctionReturn(PETSC_SUCCESS);
1261 #else
1262 return PETSC_SUCCESS;
1263 #endif
1264 }
1265
1266 PETSC_INTERN int PetscGlobalArgc;
1267 PETSC_INTERN char **PetscGlobalArgs;
1268
PetscLogHandlerView_Default_Info(PetscLogHandler handler,PetscViewer viewer)1269 static PetscErrorCode PetscLogHandlerView_Default_Info(PetscLogHandler handler, PetscViewer viewer)
1270 {
1271 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data;
1272 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
1273 PetscLogDouble locTotalTime, TotalTime, TotalFlops;
1274 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions;
1275 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red;
1276 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed;
1277 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed;
1278 PetscLogDouble min, max, tot, ratio, avg, x, y;
1279 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax;
1280 #if defined(PETSC_HAVE_DEVICE)
1281 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */
1282 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops, gjoules, geff;
1283 #endif
1284 PetscMPIInt minC, maxC;
1285 PetscMPIInt size, rank;
1286 PetscBool *localStageUsed, *stageUsed;
1287 PetscBool *localStageVisible, *stageVisible;
1288 PetscInt numStages, numEvents;
1289 int stage, oclass;
1290 PetscLogEvent event;
1291 char version[256];
1292 MPI_Comm comm;
1293 #if defined(PETSC_HAVE_DEVICE) && !defined(PETSC_HAVE_KOKKOS_WITHOUT_GPU)
1294 PetscInt64 nas = 0x7FF0000000000002;
1295 #endif
1296 PetscLogGlobalNames global_stages, global_events;
1297 PetscEventPerfInfo zero_info;
1298 PetscLogState state;
1299
1300 PetscFunctionBegin;
1301 PetscCall(PetscLogHandlerGetState(handler, &state));
1302 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1303 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
1304 PetscCallMPI(MPI_Comm_size(comm, &size));
1305 PetscCallMPI(MPI_Comm_rank(comm, &rank));
1306 /* Get the total elapsed time */
1307 PetscCall(PetscTime(&locTotalTime));
1308 locTotalTime -= petsc_BaseTime;
1309
1310 PetscCall(PetscViewerASCIIPrintf(viewer, "****************************************************************************************************************************************************************\n"));
1311 PetscCall(PetscViewerASCIIPrintf(viewer, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n"));
1312 PetscCall(PetscViewerASCIIPrintf(viewer, "****************************************************************************************************************************************************************\n"));
1313 PetscCall(PetscViewerASCIIPrintf(viewer, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n"));
1314 PetscCall(PetscLogViewWarnSync(viewer));
1315 PetscCall(PetscLogViewWarnDebugging(viewer));
1316 PetscCall(PetscLogViewWarnNoGpuAwareMpi(viewer));
1317 PetscCall(PetscLogViewWarnGpuTime(viewer));
1318 PetscCall(PetscGetArchType(arch, sizeof(arch)));
1319 PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
1320 PetscCall(PetscGetUserName(username, sizeof(username)));
1321 PetscCall(PetscGetProgramName(pname, sizeof(pname)));
1322 PetscCall(PetscGetDate(date, sizeof(date)));
1323 PetscCall(PetscGetVersion(version, sizeof(version)));
1324
1325 #if defined(PETSC_HAVE_CUPM)
1326 const char *cupm = PetscDefined(HAVE_CUDA) ? "CUDA" : "HIP";
1327 if (PetscDeviceCUPMRuntimeArch)
1328 PetscCall(PetscViewerASCIIPrintf(viewer, "%s on a %s named %s with %d process%s and %s architecture %d, by %s on %s\n", pname, arch, hostname, size, size > 1 ? "es" : "", cupm, PetscDeviceCUPMRuntimeArch, username, date));
1329 else
1330 #endif
1331 PetscCall(PetscViewerASCIIPrintf(viewer, "%s on a %s named %s with %d process%s, by %s on %s\n", pname, arch, hostname, size, size > 1 ? "es" : "", username, date));
1332
1333 #if defined(PETSC_HAVE_OPENMP)
1334 PetscCall(PetscViewerASCIIPrintf(viewer, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads));
1335 #endif
1336 PetscCall(PetscViewerASCIIPrintf(viewer, "Using %s\n", version));
1337
1338 /* Must preserve reduction count before we go on */
1339 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
1340
1341 /* Calculate summary information */
1342 PetscCall(PetscViewerASCIIPrintf(viewer, "\n Max Max/Min Avg Total\n"));
1343 /* Time */
1344 PetscCallMPI(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1345 PetscCallMPI(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1346 PetscCallMPI(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1347 avg = tot / ((PetscLogDouble)size);
1348 if (min != 0.0) ratio = max / min;
1349 else ratio = 0.0;
1350 PetscCall(PetscViewerASCIIPrintf(viewer, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg));
1351 TotalTime = tot;
1352 /* Objects */
1353 {
1354 PetscInt num_objects;
1355
1356 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL));
1357 avg = (PetscLogDouble)num_objects;
1358 }
1359 PetscCallMPI(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1360 PetscCallMPI(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1361 PetscCallMPI(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1362 avg = tot / ((PetscLogDouble)size);
1363 if (min != 0.0) ratio = max / min;
1364 else ratio = 0.0;
1365 PetscCall(PetscViewerASCIIPrintf(viewer, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg));
1366 /* Flops */
1367 PetscCallMPI(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1368 PetscCallMPI(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1369 PetscCallMPI(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1370 avg = tot / ((PetscLogDouble)size);
1371 if (min != 0.0) ratio = max / min;
1372 else ratio = 0.0;
1373 PetscCall(PetscViewerASCIIPrintf(viewer, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot));
1374 TotalFlops = tot;
1375 /* Flops/sec -- Must talk to Barry here */
1376 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime;
1377 else flops = 0.0;
1378 PetscCallMPI(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1379 PetscCallMPI(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1380 PetscCallMPI(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1381 avg = tot / ((PetscLogDouble)size);
1382 if (min != 0.0) ratio = max / min;
1383 else ratio = 0.0;
1384 PetscCall(PetscViewerASCIIPrintf(viewer, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot));
1385 /* Memory */
1386 PetscCall(PetscMallocGetMaximumUsage(&mem));
1387 if (mem > 0.0) {
1388 PetscCallMPI(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1389 PetscCallMPI(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1390 PetscCallMPI(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1391 avg = tot / ((PetscLogDouble)size);
1392 if (min != 0.0) ratio = max / min;
1393 else ratio = 0.0;
1394 PetscCall(PetscViewerASCIIPrintf(viewer, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot));
1395 }
1396 /* Messages */
1397 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
1398 PetscCallMPI(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1399 PetscCallMPI(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1400 PetscCallMPI(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1401 avg = tot / ((PetscLogDouble)size);
1402 if (min != 0.0) ratio = max / min;
1403 else ratio = 0.0;
1404 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot));
1405 numMessages = tot;
1406 /* Message Lengths */
1407 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
1408 PetscCallMPI(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1409 PetscCallMPI(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1410 PetscCallMPI(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1411 if (numMessages != 0) avg = tot / numMessages;
1412 else avg = 0.0;
1413 if (min != 0.0) ratio = max / min;
1414 else ratio = 0.0;
1415 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot));
1416 messageLength = tot;
1417 /* Reductions */
1418 PetscCallMPI(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1419 PetscCallMPI(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1420 PetscCallMPI(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1421 if (min != 0.0) ratio = max / min;
1422 else ratio = 0.0;
1423 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI Reductions: %5.3e %7.3f\n", max, ratio));
1424 numReductions = red; /* wrong because uses count from process zero */
1425 PetscCall(PetscViewerASCIIPrintf(viewer, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n"));
1426 PetscCall(PetscViewerASCIIPrintf(viewer, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n"));
1427 PetscCall(PetscViewerASCIIPrintf(viewer, " and VecAXPY() for complex vectors of length N --> 8N flops\n"));
1428
1429 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages));
1430 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events));
1431 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages));
1432 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents));
1433 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info)));
1434 PetscCall(PetscMalloc1(numStages, &localStageUsed));
1435 PetscCall(PetscMalloc1(numStages, &stageUsed));
1436 PetscCall(PetscMalloc1(numStages, &localStageVisible));
1437 PetscCall(PetscMalloc1(numStages, &stageVisible));
1438 if (numStages > 0) {
1439 for (stage = 0; stage < numStages; stage++) {
1440 PetscInt stage_id;
1441
1442 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1443 if (stage_id >= 0) {
1444 PetscStagePerf *stage_info;
1445
1446 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info));
1447 localStageUsed[stage] = stage_info->used;
1448 localStageVisible[stage] = stage_info->perfInfo.visible;
1449 } else {
1450 localStageUsed[stage] = PETSC_FALSE;
1451 localStageVisible[stage] = PETSC_TRUE;
1452 }
1453 }
1454 PetscCallMPI(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPI_C_BOOL, MPI_LOR, comm));
1455 PetscCallMPI(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPI_C_BOOL, MPI_LAND, comm));
1456 for (stage = 0; stage < numStages; stage++) {
1457 if (stageUsed[stage] && stageVisible[stage]) {
1458 PetscCall(PetscViewerASCIIPrintf(viewer, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n"));
1459 PetscCall(PetscViewerASCIIPrintf(viewer, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n"));
1460 break;
1461 }
1462 }
1463 for (stage = 0; stage < numStages; stage++) {
1464 PetscInt stage_id;
1465 PetscEventPerfInfo *stage_info;
1466 const char *stage_name;
1467
1468 if (!(stageUsed[stage] && stageVisible[stage])) continue;
1469 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1470 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1471 stage_info = &zero_info;
1472 if (localStageUsed[stage]) {
1473 PetscStagePerf *stage_perf_info;
1474
1475 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_perf_info));
1476 stage_info = &stage_perf_info->perfInfo;
1477 }
1478 PetscCallMPI(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1479 PetscCallMPI(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1480 PetscCallMPI(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1481 PetscCallMPI(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1482 PetscCallMPI(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1483 mess *= 0.5;
1484 messLen *= 0.5;
1485 red /= size;
1486 if (TotalTime != 0.0) fracTime = stageTime / TotalTime;
1487 else fracTime = 0.0;
1488 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops;
1489 else fracFlops = 0.0;
1490 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */
1491 if (numMessages != 0.0) fracMessages = mess / numMessages;
1492 else fracMessages = 0.0;
1493 if (mess != 0.0) avgMessLen = messLen / mess;
1494 else avgMessLen = 0.0;
1495 if (messageLength != 0.0) fracLength = messLen / messageLength;
1496 else fracLength = 0.0;
1497 if (numReductions != 0.0) fracReductions = red / numReductions;
1498 else fracReductions = 0.0;
1499 PetscCall(PetscViewerASCIIPrintf(viewer, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, stage_name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions));
1500 }
1501 }
1502
1503 PetscCall(PetscViewerASCIIPrintf(viewer, "\n------------------------------------------------------------------------------------------------------------------------\n"));
1504 PetscCall(PetscViewerASCIIPrintf(viewer, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n"));
1505 PetscCall(PetscViewerASCIIPrintf(viewer, "Phase summary info:\n"));
1506 PetscCall(PetscViewerASCIIPrintf(viewer, " Count: number of times phase was executed\n"));
1507 PetscCall(PetscViewerASCIIPrintf(viewer, " Time and Flop: Max - maximum over all processes\n"));
1508 PetscCall(PetscViewerASCIIPrintf(viewer, " Ratio - ratio of maximum to minimum over all processes\n"));
1509 PetscCall(PetscViewerASCIIPrintf(viewer, " Mess: number of messages sent\n"));
1510 PetscCall(PetscViewerASCIIPrintf(viewer, " AvgLen: average message length (bytes)\n"));
1511 PetscCall(PetscViewerASCIIPrintf(viewer, " Reduct: number of global reductions\n"));
1512 PetscCall(PetscViewerASCIIPrintf(viewer, " Global: entire computation\n"));
1513 PetscCall(PetscViewerASCIIPrintf(viewer, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n"));
1514 PetscCall(PetscViewerASCIIPrintf(viewer, " %%T - percent time in this phase %%F - percent flop in this phase\n"));
1515 PetscCall(PetscViewerASCIIPrintf(viewer, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n"));
1516 PetscCall(PetscViewerASCIIPrintf(viewer, " %%R - percent reductions in this phase\n"));
1517 PetscCall(PetscViewerASCIIPrintf(viewer, " Total Mflop/s: 1e-6 * (sum of flop over all processes)/(max time over all processes)\n"));
1518 if (PetscLogMemory) {
1519 PetscCall(PetscViewerASCIIPrintf(viewer, " Memory usage is summed over all MPI processes, it is given in mega-bytes\n"));
1520 PetscCall(PetscViewerASCIIPrintf(viewer, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n"));
1521 PetscCall(PetscViewerASCIIPrintf(viewer, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n"));
1522 PetscCall(PetscViewerASCIIPrintf(viewer, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n"));
1523 PetscCall(PetscViewerASCIIPrintf(viewer, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n"));
1524 }
1525 #if defined(PETSC_HAVE_DEVICE)
1526 PetscCall(PetscViewerASCIIPrintf(viewer, " GPU Mflop/s: 1e-6 * (sum of flop on GPU over all processes)/(max GPU time over all processes)\n"));
1527 PetscCall(PetscViewerASCIIPrintf(viewer, " CpuToGpu Count: total number of CPU to GPU copies per processor\n"));
1528 PetscCall(PetscViewerASCIIPrintf(viewer, " CpuToGpu Size (Mbytes): 1e-6 * (total size of CPU to GPU copies per processor)\n"));
1529 PetscCall(PetscViewerASCIIPrintf(viewer, " GpuToCpu Count: total number of GPU to CPU copies per processor\n"));
1530 PetscCall(PetscViewerASCIIPrintf(viewer, " GpuToCpu Size (Mbytes): 1e-6 * (total size of GPU to CPU copies per processor)\n"));
1531 PetscCall(PetscViewerASCIIPrintf(viewer, " GPU %%F: percent flops on GPU in this event\n"));
1532 #endif
1533 PetscCall(PetscViewerASCIIPrintf(viewer, "------------------------------------------------------------------------------------------------------------------------\n"));
1534
1535 PetscCall(PetscLogViewWarnDebugging(viewer));
1536
1537 /* Report events */
1538 PetscCall(PetscViewerASCIIPrintf(viewer, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total"));
1539 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, " Malloc EMalloc MMalloc RMI"));
1540 #if defined(PETSC_HAVE_DEVICE)
1541 PetscCall(PetscViewerASCIIPrintf(viewer, " GPU - CpuToGpu - - GpuToCpu - GPU"));
1542 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCall(PetscViewerASCIIPrintf(viewer, " GPU GPU"));
1543 #endif
1544 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1545 PetscCall(PetscViewerASCIIPrintf(viewer, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s"));
1546 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, " Mbytes Mbytes Mbytes Mbytes"));
1547 #if defined(PETSC_HAVE_DEVICE)
1548 PetscCall(PetscViewerASCIIPrintf(viewer, " Mflop/s Count Size Count Size %%F"));
1549 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCall(PetscViewerASCIIPrintf(viewer, " Joule Mflop/J"));
1550 #endif
1551 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1552 PetscCall(PetscViewerASCIIPrintf(viewer, "------------------------------------------------------------------------------------------------------------------------"));
1553 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, "-----------------------------"));
1554 #if defined(PETSC_HAVE_DEVICE)
1555 PetscCall(PetscViewerASCIIPrintf(viewer, "---------------------------------------"));
1556 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCall(PetscViewerASCIIPrintf(viewer, "-------------------"));
1557 #endif
1558 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1559
1560 #if defined(PETSC_HAVE_DEVICE)
1561 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */
1562 PetscCall(PetscLogStateGetEventFromName(state, "TaoSolve", &TAO_Solve));
1563 PetscCall(PetscLogStateGetEventFromName(state, "TSStep", &TS_Step));
1564 PetscCall(PetscLogStateGetEventFromName(state, "SNESSolve", &SNES_Solve));
1565 PetscCall(PetscLogStateGetEventFromName(state, "KSPSolve", &KSP_Solve));
1566 #endif
1567
1568 for (stage = 0; stage < numStages; stage++) {
1569 PetscInt stage_id;
1570 PetscEventPerfInfo *stage_info;
1571 const char *stage_name;
1572
1573 if (!(stageVisible[stage] && stageUsed[stage])) continue;
1574 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id));
1575 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1576 PetscCall(PetscViewerASCIIPrintf(viewer, "\n--- Event Stage %d: %s\n\n", stage, stage_name));
1577 stage_info = &zero_info;
1578 if (localStageUsed[stage]) {
1579 PetscStagePerf *stage_perf_info;
1580
1581 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_perf_info));
1582 stage_info = &stage_perf_info->perfInfo;
1583 }
1584 PetscCallMPI(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1585 PetscCallMPI(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1586 PetscCallMPI(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1587 PetscCallMPI(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1588 PetscCallMPI(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1589 mess *= 0.5;
1590 messLen *= 0.5;
1591 red /= size;
1592
1593 for (event = 0; event < numEvents; event++) {
1594 PetscInt event_id;
1595 PetscEventPerfInfo *event_info = &zero_info;
1596 PetscBool is_zero = PETSC_FALSE;
1597 const char *event_name;
1598
1599 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id));
1600 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name));
1601 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &event_info));
1602 PetscCall(PetscArraycmp(event_info, &zero_info, 1, &is_zero));
1603 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPI_C_BOOL, MPI_LAND, comm));
1604 if (!is_zero) {
1605 flopr = event_info->flops;
1606 PetscCallMPI(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1607 PetscCallMPI(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1608 PetscCallMPI(MPIU_Allreduce(&event_info->flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1609 PetscCallMPI(MPIU_Allreduce(&event_info->time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
1610 PetscCallMPI(MPIU_Allreduce(&event_info->time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1611 PetscCallMPI(MPIU_Allreduce(&event_info->time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1612 PetscCallMPI(MPIU_Allreduce(&event_info->numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1613 PetscCallMPI(MPIU_Allreduce(&event_info->messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1614 PetscCallMPI(MPIU_Allreduce(&event_info->numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1615 PetscCallMPI(MPIU_Allreduce(&event_info->count, &minC, 1, MPI_INT, MPI_MIN, comm));
1616 PetscCallMPI(MPIU_Allreduce(&event_info->count, &maxC, 1, MPI_INT, MPI_MAX, comm));
1617 if (PetscLogMemory) {
1618 PetscCallMPI(MPIU_Allreduce(&event_info->memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1619 PetscCallMPI(MPIU_Allreduce(&event_info->mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1620 PetscCallMPI(MPIU_Allreduce(&event_info->mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1621 PetscCallMPI(MPIU_Allreduce(&event_info->mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1622 }
1623 #if defined(PETSC_HAVE_DEVICE)
1624 PetscCallMPI(MPIU_Allreduce(&event_info->CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1625 PetscCallMPI(MPIU_Allreduce(&event_info->GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1626 PetscCallMPI(MPIU_Allreduce(&event_info->CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1627 PetscCallMPI(MPIU_Allreduce(&event_info->GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1628 PetscCallMPI(MPIU_Allreduce(&event_info->GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1629 PetscCallMPI(MPIU_Allreduce(&event_info->GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
1630 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCallMPI(MPIU_Allreduce(&event_info->GpuEnergy, &gjoules, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
1631 #endif
1632 if (mint < 0.0) {
1633 PetscCall(PetscViewerASCIIPrintf(viewer, "WARNING!!! Minimum time %g over all processes for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, event_name));
1634 mint = 0;
1635 }
1636 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processes for %s is negative! Not possible!", minf, event_name);
1637 #if defined(PETSC_HAVE_DEVICE) && !defined(PETSC_HAVE_KOKKOS_WITHOUT_GPU)
1638 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */
1639 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) {
1640 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble));
1641 if (event_id != SNES_Solve && event_id != KSP_Solve && event_id != TS_Step && event_id != TAO_Solve) {
1642 memcpy(&mint, &nas, sizeof(PetscLogDouble));
1643 memcpy(&maxt, &nas, sizeof(PetscLogDouble));
1644 }
1645 }
1646 #endif
1647 totm *= 0.5;
1648 totml *= 0.5;
1649 totr /= size;
1650
1651 if (maxC != 0) {
1652 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC;
1653 else ratC = 0.0;
1654 if (mint != 0.0) ratt = maxt / mint;
1655 else ratt = 0.0;
1656 if (minf != 0.0) ratf = maxf / minf;
1657 else ratf = 0.0;
1658 if (TotalTime != 0.0) fracTime = tott / TotalTime;
1659 else fracTime = 0.0;
1660 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops;
1661 else fracFlops = 0.0;
1662 if (stageTime != 0.0) fracStageTime = tott / stageTime;
1663 else fracStageTime = 0.0;
1664 if (flops != 0.0) fracStageFlops = totf / flops;
1665 else fracStageFlops = 0.0;
1666 if (numMessages != 0.0) fracMess = totm / numMessages;
1667 else fracMess = 0.0;
1668 if (messageLength != 0.0) fracMessLen = totml / messageLength;
1669 else fracMessLen = 0.0;
1670 if (numReductions != 0.0) fracRed = totr / numReductions;
1671 else fracRed = 0.0;
1672 if (mess != 0.0) fracStageMess = totm / mess;
1673 else fracStageMess = 0.0;
1674 if (messLen != 0.0) fracStageMessLen = totml / messLen;
1675 else fracStageMessLen = 0.0;
1676 if (red != 0.0) fracStageRed = totr / red;
1677 else fracStageRed = 0.0;
1678 if (totm != 0.0) totml /= totm;
1679 else totml = 0.0;
1680 if (maxt != 0.0) flopr = totf / maxt;
1681 else flopr = 0.0;
1682 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) {
1683 if (PetscIsNanReal(maxt))
1684 PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f n/a n/a %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages n/a", event_name, maxC, ratC, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed));
1685 else
1686 PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6));
1687 } else {
1688 if (PetscIsNanReal(maxt)) { // when maxt, ratt, flopr are NaN (i.e., run with GPUs but without -log_view_gpu_time), replace the confusing "nan" with "n/a"
1689 PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f n/a n/a %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f n/a", event_name, maxC, ratC, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed));
1690 } else {
1691 PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6));
1692 }
1693 }
1694 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6));
1695 #if defined(PETSC_HAVE_DEVICE)
1696 if (totf != 0.0) fracgflops = gflops / totf;
1697 else fracgflops = 0.0;
1698 if (gmaxt != 0.0) gflopr = gflops / gmaxt;
1699 else gflopr = 0.0;
1700 if (PetscIsNanReal(gflopr)) {
1701 PetscCall(PetscViewerASCIIPrintf(viewer, " n/a %4.0f %3.2e %4.0f %3.2e %2.0f", cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops));
1702 } else {
1703 PetscCall(PetscViewerASCIIPrintf(viewer, " %5.0f %4.0f %3.2e %4.0f %3.2e %2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops));
1704 }
1705 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) {
1706 if (gjoules != 0.0) geff = gflops / gjoules;
1707 else geff = 0.0;
1708 PetscCall(PetscViewerASCIIPrintf(viewer, " %4.2e %4.2e", gjoules, geff));
1709 }
1710 #endif
1711 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1712 }
1713 }
1714 }
1715 }
1716
1717 /* Memory usage and object creation */
1718 PetscCall(PetscViewerASCIIPrintf(viewer, "------------------------------------------------------------------------------------------------------------------------"));
1719 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, "-----------------------------"));
1720 #if defined(PETSC_HAVE_DEVICE)
1721 PetscCall(PetscViewerASCIIPrintf(viewer, "---------------------------------------"));
1722 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCall(PetscViewerASCIIPrintf(viewer, "-------------------"));
1723 #endif
1724 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1725 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1726
1727 /* Right now, only stages on the first processor are reported here, meaning only objects associated with
1728 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then
1729 stats for stages local to processor sets.
1730 */
1731 /* We should figure out the longest object name here (now 20 characters) */
1732 PetscCall(PetscViewerASCIIPrintf(viewer, "Object Type Creations Destructions. Reports information only for process 0.\n"));
1733 for (stage = 0; stage < numStages; stage++) {
1734 const char *stage_name;
1735
1736 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name));
1737 PetscCall(PetscViewerASCIIPrintf(viewer, "\n--- Event Stage %d: %s\n\n", stage, stage_name));
1738 if (localStageUsed[stage]) {
1739 PetscInt num_classes;
1740
1741 PetscCall(PetscLogStateGetNumClasses(state, &num_classes));
1742 for (oclass = 0; oclass < num_classes; oclass++) {
1743 PetscClassPerf *class_perf_info;
1744
1745 PetscCall(PetscLogHandlerDefaultGetClassPerf(handler, stage, oclass, &class_perf_info));
1746 if (class_perf_info->creations > 0 || class_perf_info->destructions > 0) {
1747 PetscLogClassInfo class_reg_info;
1748 PetscBool flg = PETSC_FALSE;
1749
1750 PetscCall(PetscLogStateClassGetInfo(state, oclass, &class_reg_info));
1751 if (stage == 0 && oclass == num_classes - 1) {
1752 if (PetscGlobalArgc == 0 && PetscGlobalArgs == NULL) {
1753 PetscCall(PetscStrcmp(class_reg_info.name, "Viewer", &flg));
1754 if (flg && class_perf_info->creations == PetscLogNumViewersCreated && class_perf_info->destructions == PetscLogNumViewersDestroyed) continue;
1755 }
1756 }
1757 PetscCall(PetscViewerASCIIPrintf(viewer, "%20s %5d %5d\n", class_reg_info.name, class_perf_info->creations, class_perf_info->destructions));
1758 }
1759 }
1760 }
1761 }
1762
1763 PetscCall(PetscFree(localStageUsed));
1764 PetscCall(PetscFree(stageUsed));
1765 PetscCall(PetscFree(localStageVisible));
1766 PetscCall(PetscFree(stageVisible));
1767 PetscCall(PetscLogGlobalNamesDestroy(&global_stages));
1768 PetscCall(PetscLogGlobalNamesDestroy(&global_events));
1769
1770 /* Information unrelated to this particular run */
1771 PetscCall(PetscViewerASCIIPrintf(viewer, "========================================================================================================================\n"));
1772 PetscCall(PetscTime(&y));
1773 PetscCall(PetscTime(&x));
1774 PetscCall(PetscTime(&y));
1775 PetscCall(PetscTime(&y));
1776 PetscCall(PetscTime(&y));
1777 PetscCall(PetscTime(&y));
1778 PetscCall(PetscTime(&y));
1779 PetscCall(PetscTime(&y));
1780 PetscCall(PetscTime(&y));
1781 PetscCall(PetscTime(&y));
1782 PetscCall(PetscTime(&y));
1783 PetscCall(PetscTime(&y));
1784 PetscCall(PetscViewerASCIIPrintf(viewer, "Average time to get PetscTime(): %g\n", (y - x) / 10.0));
1785 /* MPI information */
1786 if (size > 1) {
1787 MPI_Status status;
1788 PetscMPIInt tag;
1789 MPI_Comm newcomm;
1790
1791 PetscCallMPI(MPI_Barrier(comm));
1792 PetscCall(PetscTime(&x));
1793 PetscCallMPI(MPI_Barrier(comm));
1794 PetscCallMPI(MPI_Barrier(comm));
1795 PetscCallMPI(MPI_Barrier(comm));
1796 PetscCallMPI(MPI_Barrier(comm));
1797 PetscCallMPI(MPI_Barrier(comm));
1798 PetscCall(PetscTime(&y));
1799 PetscCall(PetscViewerASCIIPrintf(viewer, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0));
1800 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag));
1801 PetscCallMPI(MPI_Barrier(comm));
1802 if (rank) {
1803 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status));
1804 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm));
1805 } else {
1806 PetscCall(PetscTime(&x));
1807 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm));
1808 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status));
1809 PetscCall(PetscTime(&y));
1810 PetscCall(PetscViewerASCIIPrintf(viewer, "Average time for zero size MPI_Send(): %g\n", (y - x) / size));
1811 }
1812 PetscCall(PetscCommDestroy(&newcomm));
1813 }
1814 PetscCall(PetscOptionsView(NULL, viewer));
1815
1816 /* Machine and compile information */
1817 if (PetscDefined(USE_FORTRAN_KERNELS)) {
1818 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with FORTRAN kernels\n"));
1819 } else {
1820 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled without FORTRAN kernels\n"));
1821 }
1822 if (PetscDefined(USE_64BIT_INDICES)) {
1823 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with 64-bit PetscInt\n"));
1824 } else if (PetscDefined(USE___FLOAT128)) {
1825 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with 32-bit PetscInt\n"));
1826 }
1827 if (PetscDefined(USE_REAL_SINGLE)) {
1828 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with single precision PetscScalar and PetscReal\n"));
1829 } else if (PetscDefined(USE___FLOAT128)) {
1830 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with 128 bit precision PetscScalar and PetscReal\n"));
1831 }
1832 if (PetscDefined(USE_REAL_MAT_SINGLE)) {
1833 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with single precision matrices\n"));
1834 } else {
1835 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with full precision matrices (default)\n"));
1836 }
1837 PetscCall(PetscViewerASCIIPrintf(viewer, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt)));
1838
1839 PetscCall(PetscViewerASCIIPrintf(viewer, "Configure options: %s", petscconfigureoptions));
1840 PetscCall(PetscViewerASCIIPrintf(viewer, "%s", petscmachineinfo));
1841 PetscCall(PetscViewerASCIIPrintf(viewer, "%s", petsccompilerinfo));
1842 PetscCall(PetscViewerASCIIPrintf(viewer, "%s", petsccompilerflagsinfo));
1843 PetscCall(PetscViewerASCIIPrintf(viewer, "%s", petsclinkerinfo));
1844
1845 /* Cleanup */
1846 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1847 PetscCall(PetscLogViewWarnNoGpuAwareMpi(viewer));
1848 PetscCall(PetscLogViewWarnDebugging(viewer));
1849 PetscCall(PetscFPTrapPop());
1850 PetscFunctionReturn(PETSC_SUCCESS);
1851 }
PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()1852 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END()
1853
1854 static PetscErrorCode PetscLogHandlerView_Default(PetscLogHandler handler, PetscViewer viewer)
1855 {
1856 PetscViewerFormat format;
1857
1858 PetscFunctionBegin;
1859 PetscCall(PetscViewerGetFormat(viewer, &format));
1860 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) {
1861 PetscCall(PetscLogHandlerView_Default_Info(handler, viewer));
1862 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1863 PetscCall(PetscLogHandlerView_Default_Detailed(handler, viewer));
1864 } else if (format == PETSC_VIEWER_ASCII_CSV) {
1865 PetscCall(PetscLogHandlerView_Default_CSV(handler, viewer));
1866 }
1867 PetscFunctionReturn(PETSC_SUCCESS);
1868 }
1869
1870 /*MC
1871 PETSCLOGHANDLERDEFAULT - PETSCLOGHANDLERDEFAULT = "default" - A `PetscLogHandler` that collects data for PETSc
1872 default profiling log viewers (`PetscLogView()` and `PetscLogDump()`). A log handler of this type is
1873 created and started (`PetscLogHandlerStart()`) by `PetscLogDefaultBegin()`.
1874
1875 Options Database Keys:
1876 + -log_include_actions - include a growing list of actions (event beginnings and endings, object creations and destructions) in `PetscLogDump()` (`PetscLogActions()`).
1877 - -log_include_objects - include a growing list of object creations and destructions in `PetscLogDump()` (`PetscLogObjects()`).
1878
1879 Level: developer
1880
1881 .seealso: [](ch_profiling), `PetscLogHandler`
1882 M*/
1883
PetscLogHandlerCreate_Default(PetscLogHandler handler)1884 PETSC_INTERN PetscErrorCode PetscLogHandlerCreate_Default(PetscLogHandler handler)
1885 {
1886 PetscFunctionBegin;
1887 PetscCall(PetscLogHandlerContextCreate_Default((PetscLogHandler_Default *)&handler->data));
1888 handler->ops->destroy = PetscLogHandlerDestroy_Default;
1889 handler->ops->eventbegin = PetscLogHandlerEventBegin_Default;
1890 handler->ops->eventend = PetscLogHandlerEventEnd_Default;
1891 handler->ops->eventsync = PetscLogHandlerEventSync_Default;
1892 handler->ops->objectcreate = PetscLogHandlerObjectCreate_Default;
1893 handler->ops->objectdestroy = PetscLogHandlerObjectDestroy_Default;
1894 handler->ops->stagepush = PetscLogHandlerStagePush_Default;
1895 handler->ops->stagepop = PetscLogHandlerStagePop_Default;
1896 handler->ops->view = PetscLogHandlerView_Default;
1897 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerGetEventPerfInfo_C", PetscLogHandlerGetEventPerfInfo_Default));
1898 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerGetStagePerfInfo_C", PetscLogHandlerGetStagePerfInfo_Default));
1899 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerSetLogActions_C", PetscLogHandlerSetLogActions_Default));
1900 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerSetLogObjects_C", PetscLogHandlerSetLogObjects_Default));
1901 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerLogObjectState_C", PetscLogHandlerLogObjectState_Default));
1902 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerGetNumObjects_C", PetscLogHandlerGetNumObjects_Default));
1903 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventDeactivatePush_C", PetscLogHandlerEventDeactivatePush_Default));
1904 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventDeactivatePop_C", PetscLogHandlerEventDeactivatePop_Default));
1905 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventsPause_C", PetscLogHandlerEventsPause_Default));
1906 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventsResume_C", PetscLogHandlerEventsResume_Default));
1907 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerDump_C", PetscLogHandlerDump_Default));
1908 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerStageSetVisible_C", PetscLogHandlerStageSetVisible_Default));
1909 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerStageGetVisible_C", PetscLogHandlerStageGetVisible_Default));
1910 PetscFunctionReturn(PETSC_SUCCESS);
1911 }
1912