xref: /petsc/src/sys/logging/handler/impls/nested/xmlviewer.c (revision 57d508425293f0bb93f59574d14951d8faac9af8)
1 /*************************************************************************************
2  *    M A R I T I M E  R E S E A R C H  I N S T I T U T E  N E T H E R L A N D S     *
3  *************************************************************************************
4  *    authors: Koos Huijssen, Christiaan M. Klaij                                    *
5  *************************************************************************************
6  *    content: Viewer for writing XML output                                         *
7  *************************************************************************************/
8 #include <petscviewer.h>
9 #include <petsc/private/logimpl.h>
10 #include "xmlviewer.h"
11 #include "lognested.h"
12 
13 static PetscErrorCode PetscViewerXMLStartSection(PetscViewer viewer, const char *name, const char *desc)
14 {
15   PetscInt XMLSectionDepthPetsc;
16   int      XMLSectionDepth;
17 
18   PetscFunctionBegin;
19   PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc));
20   PetscCall(PetscCIntCast(XMLSectionDepthPetsc, &XMLSectionDepth));
21   if (!desc) {
22     PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s>\n", 2 * XMLSectionDepth, "", name));
23   } else {
24     PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s desc=\"%s\">\n", 2 * XMLSectionDepth, "", name, desc));
25   }
26   PetscCall(PetscViewerASCIIPushTab(viewer));
27   PetscFunctionReturn(PETSC_SUCCESS);
28 }
29 
30 /* Initialize a viewer to XML, and initialize the XMLDepth static parameter */
31 static PetscErrorCode PetscViewerInitASCII_XML(PetscViewer viewer)
32 {
33   MPI_Comm comm;
34   char     PerfScript[PETSC_MAX_PATH_LEN + 40];
35 
36   PetscFunctionBegin;
37   PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
38   PetscCall(PetscViewerASCIIPrintf(viewer, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"));
39   PetscCall(PetscStrreplace(comm, "<?xml-stylesheet type=\"text/xsl\" href=\"performance_xml2html.xsl\"?>", PerfScript, sizeof(PerfScript)));
40   PetscCall(PetscViewerASCIIPrintf(viewer, "%s\n", PerfScript));
41   PetscCall(PetscViewerXMLStartSection(viewer, "root", NULL));
42   PetscFunctionReturn(PETSC_SUCCESS);
43 }
44 
45 static PetscErrorCode PetscViewerXMLEndSection(PetscViewer viewer, const char *name)
46 {
47   PetscInt XMLSectionDepthPetsc;
48   int      XMLSectionDepth;
49 
50   PetscFunctionBegin;
51   PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc));
52   PetscCall(PetscCIntCast(XMLSectionDepthPetsc, &XMLSectionDepth));
53   if (XMLSectionDepth > 0) PetscCall(PetscViewerASCIIPopTab(viewer));
54   PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc));
55   PetscCall(PetscCIntCast(XMLSectionDepthPetsc, &XMLSectionDepth));
56   PetscCall(PetscViewerASCIIPrintf(viewer, "%*s</%s>\n", 2 * XMLSectionDepth, "", name));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /* Initialize a viewer to XML, and initialize the XMLDepth static parameter */
61 static PetscErrorCode PetscViewerFinalASCII_XML(PetscViewer viewer)
62 {
63   PetscFunctionBegin;
64   PetscCall(PetscViewerXMLEndSection(viewer, "root"));
65   PetscFunctionReturn(PETSC_SUCCESS);
66 }
67 
68 static PetscErrorCode PetscViewerXMLPutString(PetscViewer viewer, const char *name, const char *desc, const char *value)
69 {
70   PetscInt XMLSectionDepthPetsc;
71   int      XMLSectionDepth;
72 
73   PetscFunctionBegin;
74   PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc));
75   PetscCall(PetscCIntCast(XMLSectionDepthPetsc, &XMLSectionDepth));
76   if (!desc) {
77     PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s>%s</%s>\n", 2 * XMLSectionDepth, "", name, value, name));
78   } else {
79     PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s desc=\"%s\">%s</%s>\n", 2 * XMLSectionDepth, "", name, desc, value, name));
80   }
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 static PetscErrorCode PetscViewerXMLPutInt(PetscViewer viewer, const char *name, const char *desc, int value)
85 {
86   PetscInt XMLSectionDepthPetsc;
87   int      XMLSectionDepth;
88 
89   PetscFunctionBegin;
90   PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc));
91   PetscCall(PetscCIntCast(XMLSectionDepthPetsc, &XMLSectionDepth));
92   if (!desc) {
93     PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s>%d</%s>\n", 2 * XMLSectionDepth, "", name, value, name));
94   } else {
95     PetscCall(PetscViewerASCIIPrintf(viewer, "%*s<%s desc=\"%s\">%d</%s>\n", 2 * XMLSectionDepth, "", name, desc, value, name));
96   }
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode PetscViewerXMLPutDouble(PetscViewer viewer, const char *name, PetscLogDouble value, const char *format)
101 {
102   PetscInt XMLSectionDepthPetsc;
103   int      XMLSectionDepth;
104   char     buffer[1024];
105 
106   PetscFunctionBegin;
107   PetscCall(PetscViewerASCIIGetTab(viewer, &XMLSectionDepthPetsc));
108   PetscCall(PetscCIntCast(XMLSectionDepthPetsc, &XMLSectionDepth));
109   PetscCall(PetscSNPrintf(buffer, sizeof(buffer), "%*s<%s>%s</%s>\n", 2 * XMLSectionDepth, "", name, format, name));
110   PetscCall(PetscViewerASCIIPrintf(viewer, buffer, value));
111   PetscFunctionReturn(PETSC_SUCCESS);
112 }
113 
114 static PetscErrorCode PetscPrintExeSpecs(PetscViewer viewer)
115 {
116   char        arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128];
117   char        version[256], buildoptions[128] = "";
118   PetscMPIInt size;
119   size_t      len;
120 
121   PetscFunctionBegin;
122   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size));
123   PetscCall(PetscGetArchType(arch, sizeof(arch)));
124   PetscCall(PetscGetHostName(hostname, sizeof(hostname)));
125   PetscCall(PetscGetUserName(username, sizeof(username)));
126   PetscCall(PetscGetProgramName(pname, sizeof(pname)));
127   PetscCall(PetscGetDate(date, sizeof(date)));
128   PetscCall(PetscGetVersion(version, sizeof(version)));
129 
130   PetscCall(PetscViewerXMLStartSection(viewer, "runspecification", "Run Specification"));
131   PetscCall(PetscViewerXMLPutString(viewer, "executable", "Executable", pname));
132   PetscCall(PetscViewerXMLPutString(viewer, "architecture", "Architecture", arch));
133   PetscCall(PetscViewerXMLPutString(viewer, "hostname", "Host", hostname));
134   PetscCall(PetscViewerXMLPutInt(viewer, "nprocesses", "Number of processes", size));
135   PetscCall(PetscViewerXMLPutString(viewer, "user", "Run by user", username));
136   PetscCall(PetscViewerXMLPutString(viewer, "date", "Started at", date));
137   PetscCall(PetscViewerXMLPutString(viewer, "petscrelease", "PETSc Release", version));
138 
139   if (PetscDefined(USE_DEBUG)) PetscCall(PetscStrlcat(buildoptions, "Debug ", sizeof(buildoptions)));
140   if (PetscDefined(USE_COMPLEX)) PetscCall(PetscStrlcat(buildoptions, "Complex ", sizeof(buildoptions)));
141   if (PetscDefined(USE_REAL_SINGLE)) {
142     PetscCall(PetscStrlcat(buildoptions, "Single ", sizeof(buildoptions)));
143   } else if (PetscDefined(USE_REAL___FLOAT128)) {
144     PetscCall(PetscStrlcat(buildoptions, "Quadruple ", sizeof(buildoptions)));
145   } else if (PetscDefined(USE_REAL___FP16)) {
146     PetscCall(PetscStrlcat(buildoptions, "Half ", sizeof(buildoptions)));
147   }
148   if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscStrlcat(buildoptions, "Int64 ", sizeof(buildoptions)));
149 #if defined(__cplusplus)
150   PetscCall(PetscStrlcat(buildoptions, "C++ ", sizeof(buildoptions)));
151 #endif
152   PetscCall(PetscStrlen(buildoptions, &len));
153   if (len) PetscCall(PetscViewerXMLPutString(viewer, "petscbuildoptions", "PETSc build options", buildoptions));
154   PetscCall(PetscViewerXMLEndSection(viewer, "runspecification"));
155   PetscFunctionReturn(PETSC_SUCCESS);
156 }
157 
158 static PetscErrorCode PetscPrintXMLGlobalPerformanceElement(PetscViewer viewer, const char *name, const char *desc, PetscLogDouble local_val, const PetscBool print_average, const PetscBool print_total)
159 {
160   PetscLogDouble min, tot, ratio, avg;
161   MPI_Comm       comm;
162   PetscMPIInt    rank, size;
163   PetscLogDouble valrank[2], max[2];
164 
165   PetscFunctionBegin;
166   PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
167   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size));
168   PetscCallMPI(MPI_Comm_rank(comm, &rank));
169 
170   valrank[0] = local_val;
171   valrank[1] = (PetscLogDouble)rank;
172   PetscCallMPI(MPIU_Allreduce(&local_val, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm));
173   PetscCallMPI(MPIU_Allreduce(valrank, &max, 1, MPIU_2PETSCLOGDOUBLE, MPI_MAXLOC, comm));
174   PetscCallMPI(MPIU_Allreduce(&local_val, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
175   avg = tot / ((PetscLogDouble)size);
176   if (min != 0.0) ratio = max[0] / min;
177   else ratio = 0.0;
178 
179   PetscCall(PetscViewerXMLStartSection(viewer, name, desc));
180   PetscCall(PetscViewerXMLPutDouble(viewer, "max", max[0], "%e"));
181   PetscCall(PetscViewerXMLPutInt(viewer, "maxrank", "rank at which max was found", (PetscMPIInt)max[1]));
182   PetscCall(PetscViewerXMLPutDouble(viewer, "ratio", ratio, "%f"));
183   if (print_average) PetscCall(PetscViewerXMLPutDouble(viewer, "average", avg, "%e"));
184   if (print_total) PetscCall(PetscViewerXMLPutDouble(viewer, "total", tot, "%e"));
185   PetscCall(PetscViewerXMLEndSection(viewer, name));
186   PetscFunctionReturn(PETSC_SUCCESS);
187 }
188 
189 /* Print the global performance: max, max/min, average and total of
190  *      time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
191  */
192 static PetscErrorCode PetscPrintGlobalPerformance(PetscViewer viewer, PetscLogDouble locTotalTime, PetscLogHandler default_handler)
193 {
194   PetscLogDouble  flops, mem, red, mess;
195   PetscInt        num_objects;
196   const PetscBool print_total_yes = PETSC_TRUE, print_total_no = PETSC_FALSE, print_average_no = PETSC_FALSE, print_average_yes = PETSC_TRUE;
197 
198   PetscFunctionBegin;
199   /* Must preserve reduction count before we go on */
200   red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct;
201 
202   /* Calculate summary information */
203   PetscCall(PetscViewerXMLStartSection(viewer, "globalperformance", "Global performance"));
204 
205   /*   Time */
206   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "time", "Time (sec)", locTotalTime, print_average_yes, print_total_no));
207 
208   /*   Objects */
209   PetscCall(PetscLogHandlerGetNumObjects(default_handler, &num_objects));
210   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "objects", "Objects", (PetscLogDouble)num_objects, print_average_yes, print_total_no));
211 
212   /*   Flop */
213   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "mflop", "MFlop", petsc_TotalFlops / 1.0E6, print_average_yes, print_total_yes));
214 
215   /*   Flop/sec -- Must talk to Barry here */
216   if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime;
217   else flops = 0.0;
218   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "mflops", "MFlop/sec", flops / 1.0E6, print_average_yes, print_total_yes));
219 
220   /*   Memory */
221   PetscCall(PetscMallocGetMaximumUsage(&mem));
222   if (mem > 0.0) PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "memory", "Memory (MiB)", mem / 1024.0 / 1024.0, print_average_yes, print_total_yes));
223   /*   Messages */
224   mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct);
225   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "messagetransfers", "MPI Message Transfers", mess, print_average_yes, print_total_yes));
226 
227   /*   Message Volume */
228   mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len);
229   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "messagevolume", "MPI Message Volume (MiB)", mess / 1024.0 / 1024.0, print_average_yes, print_total_yes));
230 
231   /*   Reductions */
232   PetscCall(PetscPrintXMLGlobalPerformanceElement(viewer, "reductions", "MPI Reductions", red, print_average_no, print_total_no));
233   PetscCall(PetscViewerXMLEndSection(viewer, "globalperformance"));
234   PetscFunctionReturn(PETSC_SUCCESS);
235 }
236 
237 /* Print the global performance: max, max/min, average and total of
238  *      time, objects, flops, flops/sec, memory, MPI messages, MPI message lengths, MPI reductions.
239  */
240 static PetscErrorCode PetscPrintXMLNestedLinePerfResults(PetscViewer viewer, const char *name, PetscLogDouble value, PetscLogDouble minthreshold, PetscLogDouble maxthreshold, PetscLogDouble minmaxtreshold)
241 {
242   MPI_Comm       comm; /* MPI communicator in reduction */
243   PetscMPIInt    rank; /* rank of this process */
244   PetscLogDouble val_in[2], max[2], min[2];
245   PetscLogDouble minvalue, maxvalue, tot;
246   PetscMPIInt    size;
247   PetscMPIInt    minLoc, maxLoc;
248 
249   PetscFunctionBegin;
250   PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
251   PetscCallMPI(MPI_Comm_size(comm, &size));
252   PetscCallMPI(MPI_Comm_rank(comm, &rank));
253   val_in[0] = value;
254   val_in[1] = (PetscLogDouble)rank;
255   PetscCallMPI(MPIU_Allreduce(val_in, max, 1, MPIU_2PETSCLOGDOUBLE, MPI_MAXLOC, comm));
256   PetscCallMPI(MPIU_Allreduce(val_in, min, 1, MPIU_2PETSCLOGDOUBLE, MPI_MINLOC, comm));
257   maxvalue = max[0];
258   maxLoc   = (PetscMPIInt)max[1];
259   minvalue = min[0];
260   minLoc   = (PetscMPIInt)min[1];
261   PetscCallMPI(MPIU_Allreduce(&value, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm));
262 
263   if (maxvalue < maxthreshold && minvalue >= minthreshold) {
264     /* One call per parent or NO value: don't print */
265   } else {
266     PetscCall(PetscViewerXMLStartSection(viewer, name, NULL));
267     if (maxvalue > minvalue * minmaxtreshold) {
268       PetscCall(PetscViewerXMLPutDouble(viewer, "avgvalue", tot / size, "%g"));
269       PetscCall(PetscViewerXMLPutDouble(viewer, "minvalue", minvalue, "%g"));
270       PetscCall(PetscViewerXMLPutDouble(viewer, "maxvalue", maxvalue, "%g"));
271       PetscCall(PetscViewerXMLPutInt(viewer, "minloc", NULL, minLoc));
272       PetscCall(PetscViewerXMLPutInt(viewer, "maxloc", NULL, maxLoc));
273     } else {
274       PetscCall(PetscViewerXMLPutDouble(viewer, "value", tot / size, "%g"));
275     }
276     PetscCall(PetscViewerXMLEndSection(viewer, name));
277   }
278   PetscFunctionReturn(PETSC_SUCCESS);
279 }
280 
281 static PetscErrorCode PetscLogNestedTreePrintLine(PetscViewer viewer, const PetscEventPerfInfo *perfInfo, int childCount, int parentCount, const char *name, PetscLogDouble totalTime)
282 {
283   PetscLogDouble time = perfInfo->time;
284   PetscLogDouble timeMx;
285   MPI_Comm       comm;
286 
287   PetscFunctionBegin;
288   PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
289   PetscCallMPI(MPIU_Allreduce(&time, &timeMx, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm));
290   PetscCall(PetscViewerXMLPutString(viewer, "name", NULL, name));
291   PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "time", time / totalTime * 100.0, 0, 0, 1.02));
292   PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "ncalls", parentCount > 0 ? ((PetscLogDouble)childCount) / ((PetscLogDouble)parentCount) : 1.0, 0.99, 1.01, 1.02));
293   PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "mflops", time >= timeMx * 0.001 ? 1e-6 * perfInfo->flops / time : 0, 0, 0.01, 1.05));
294   PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "mbps", time >= timeMx * 0.001 ? perfInfo->messageLength / (1024 * 1024 * time) : 0, 0, 0.01, 1.05));
295   PetscCall(PetscPrintXMLNestedLinePerfResults(viewer, "nreductsps", time >= timeMx * 0.001 ? perfInfo->numReductions / time : 0, 0, 0.01, 1.05));
296   PetscFunctionReturn(PETSC_SUCCESS);
297 }
298 
299 static PetscErrorCode PetscNestedNameGetBase(const char name[], const char *base[])
300 {
301   size_t n;
302 
303   PetscFunctionBegin;
304   PetscCall(PetscStrlen(name, &n));
305   while (n > 0 && name[n - 1] != ';') n--;
306   *base = &name[n];
307   PetscFunctionReturn(PETSC_SUCCESS);
308 }
309 
310 static PetscErrorCode PetscLogNestedTreePrint(PetscViewer viewer, double total_time, double threshold_time, const PetscNestedEventNode *parent_node, PetscEventPerfInfo *parent_info, const PetscNestedEventNode tree[], PetscEventPerfInfo perf[], PetscLogNestedType type, PetscBool print_events)
311 {
312   PetscInt           num_children = 0, num_printed;
313   PetscInt           num_nodes    = parent_node->num_descendants;
314   PetscInt          *perm;
315   PetscReal         *times; // Not PetscLogDouble, to reuse PetscSortRealWithArrayInt() below
316   PetscEventPerfInfo other;
317 
318   PetscFunctionBegin;
319   for (PetscInt node = 0; node < num_nodes; node += 1 + tree[node].num_descendants) {
320     PetscAssert(tree[node].parent == parent_node->id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed tree invariant");
321     num_children++;
322   }
323   num_printed = num_children;
324   PetscCall(PetscMemzero(&other, sizeof(other)));
325   PetscCall(PetscMalloc2(num_children + 2, &times, num_children + 2, &perm));
326   for (PetscInt i = 0, node = 0; node < num_nodes; i++, node += 1 + tree[node].num_descendants) {
327     PetscLogDouble child_time = perf[node].time;
328 
329     perm[i] = node;
330     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &child_time, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)viewer)));
331     times[i] = -child_time;
332 
333     parent_info->time -= perf[node].time;
334     parent_info->flops -= perf[node].flops;
335     parent_info->numMessages -= perf[node].numMessages;
336     parent_info->messageLength -= perf[node].messageLength;
337     parent_info->numReductions -= perf[node].numReductions;
338     if (child_time < threshold_time) {
339       PetscEventPerfInfo *add_to = (type == PETSC_LOG_NESTED_XML) ? &other : parent_info;
340 
341       add_to->time += perf[node].time;
342       add_to->flops += perf[node].flops;
343       add_to->numMessages += perf[node].numMessages;
344       add_to->messageLength += perf[node].messageLength;
345       add_to->numReductions += perf[node].numReductions;
346       add_to->count += perf[node].count;
347       num_printed--;
348     }
349   }
350   perm[num_children]      = -1;
351   times[num_children]     = -parent_info->time;
352   perm[num_children + 1]  = -2;
353   times[num_children + 1] = -other.time;
354   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &times[num_children], 2, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)viewer)));
355   // Sync the time with allreduce results, otherwise it could result in code path divergence through num_printed and early return.
356   other.time = -times[num_children + 1];
357   if (type == PETSC_LOG_NESTED_FLAMEGRAPH) {
358     /* The output is given as an integer in microseconds because otherwise the file cannot be read
359      * by apps such as speedscope (https://speedscope.app/). */
360     PetscCall(PetscViewerASCIIPrintf(viewer, "%s %" PetscInt64_FMT "\n", parent_node->name, (PetscInt64)(-times[num_children] * 1e6)));
361   }
362   if (other.time > 0.0) num_printed++;
363   // number of items other than "self" that will be printed
364   if (num_printed == 0) {
365     PetscCall(PetscFree2(times, perm));
366     PetscFunctionReturn(PETSC_SUCCESS);
367   }
368   // sort descending by time
369   PetscCall(PetscSortRealWithArrayInt(num_children + 2, times, perm));
370   if (type == PETSC_LOG_NESTED_XML && print_events) PetscCall(PetscViewerXMLStartSection(viewer, "events", NULL));
371   for (PetscInt i = 0; i < num_children + 2; i++) {
372     PetscInt       node       = perm[i];
373     PetscLogDouble child_time = -times[i];
374 
375     if (child_time >= threshold_time || (node < 0 && child_time > 0.0)) {
376       if (type == PETSC_LOG_NESTED_XML) {
377         PetscCall(PetscViewerXMLStartSection(viewer, "event", NULL));
378         if (node == -1) {
379           PetscCall(PetscLogNestedTreePrintLine(viewer, parent_info, 0, 0, "self", total_time));
380         } else if (node == -2) {
381           PetscCall(PetscLogNestedTreePrintLine(viewer, &other, other.count, parent_info->count, "other", total_time));
382         } else {
383           const char *base_name = NULL;
384           PetscCall(PetscNestedNameGetBase(tree[node].name, &base_name));
385           PetscCall(PetscLogNestedTreePrintLine(viewer, &perf[node], perf[node].count, parent_info->count, base_name, total_time));
386           PetscCall(PetscLogNestedTreePrint(viewer, total_time, threshold_time, &tree[node], &perf[node], &tree[node + 1], &perf[node + 1], type, PETSC_TRUE));
387         }
388         PetscCall(PetscViewerXMLEndSection(viewer, "event"));
389       } else if (node >= 0) {
390         PetscCall(PetscLogNestedTreePrint(viewer, total_time, threshold_time, &tree[node], &perf[node], &tree[node + 1], &perf[node + 1], type, PETSC_FALSE));
391       }
392     }
393   }
394   if (type == PETSC_LOG_NESTED_XML && print_events) PetscCall(PetscViewerXMLEndSection(viewer, "events"));
395   PetscCall(PetscFree2(times, perm));
396   PetscFunctionReturn(PETSC_SUCCESS);
397 }
398 
399 static PetscErrorCode PetscLogNestedTreePrintTop(PetscViewer viewer, PetscNestedEventTree *tree, PetscLogDouble threshold, PetscLogNestedType type)
400 {
401   PetscNestedEventNode *main_stage;
402   PetscNestedEventNode *tree_rem;
403   PetscEventPerfInfo   *main_stage_perf;
404   PetscEventPerfInfo   *perf_rem;
405   PetscLogDouble        time;
406   PetscLogDouble        threshold_time;
407 
408   PetscFunctionBegin;
409   main_stage      = &tree->nodes[0];
410   tree_rem        = &tree->nodes[1];
411   main_stage_perf = &tree->perf[0];
412   perf_rem        = &tree->perf[1];
413   time            = main_stage_perf->time;
414   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &time, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, tree->comm));
415   /* Print (or ignore) the children in ascending order of total time */
416   if (type == PETSC_LOG_NESTED_XML) {
417     PetscCall(PetscViewerXMLStartSection(viewer, "timertree", "Timings tree"));
418     PetscCall(PetscViewerXMLPutDouble(viewer, "totaltime", time, "%f"));
419     PetscCall(PetscViewerXMLPutDouble(viewer, "timethreshold", threshold, "%f"));
420   }
421   threshold_time = time * (threshold / 100.0 + 1.e-12);
422   PetscCall(PetscLogNestedTreePrint(viewer, time, threshold_time, main_stage, main_stage_perf, tree_rem, perf_rem, type, PETSC_FALSE));
423   if (type == PETSC_LOG_NESTED_XML) PetscCall(PetscViewerXMLEndSection(viewer, "timertree"));
424   PetscFunctionReturn(PETSC_SUCCESS);
425 }
426 
427 PETSC_INTERN PetscErrorCode PetscLogHandlerView_Nested_XML(PetscLogHandler_Nested nested, PetscNestedEventTree *tree, PetscViewer viewer)
428 {
429   PetscFunctionBegin;
430   PetscCall(PetscViewerInitASCII_XML(viewer));
431   PetscCall(PetscViewerASCIIPrintf(viewer, "<!-- PETSc Performance Summary: -->\n"));
432   PetscCall(PetscViewerXMLStartSection(viewer, "petscroot", NULL));
433 
434   // Print global information about this run
435   PetscCall(PetscPrintExeSpecs(viewer));
436 
437   if (PetscDefined(USE_LOG)) {
438     PetscEventPerfInfo *main_stage_info;
439     PetscLogDouble      locTotalTime;
440 
441     PetscCall(PetscLogHandlerGetEventPerfInfo(nested->handler, 0, 0, &main_stage_info));
442     locTotalTime = main_stage_info->time;
443     PetscCall(PetscPrintGlobalPerformance(viewer, locTotalTime, nested->handler));
444   }
445   PetscCall(PetscLogNestedTreePrintTop(viewer, tree, nested->threshold, PETSC_LOG_NESTED_XML));
446   PetscCall(PetscViewerXMLEndSection(viewer, "petscroot"));
447   PetscCall(PetscViewerFinalASCII_XML(viewer));
448   PetscFunctionReturn(PETSC_SUCCESS);
449 }
450 
451 PETSC_INTERN PetscErrorCode PetscLogHandlerView_Nested_Flamegraph(PetscLogHandler_Nested nested, PetscNestedEventTree *tree, PetscViewer viewer)
452 {
453   PetscFunctionBegin;
454   PetscCall(PetscLogNestedTreePrintTop(viewer, tree, nested->threshold, PETSC_LOG_NESTED_FLAMEGRAPH));
455   PetscFunctionReturn(PETSC_SUCCESS);
456 }
457