1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 #include <petscdevice.h> 15 #include <petsc/private/deviceimpl.h> 16 17 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 18 19 #if defined(PETSC_USE_LOG) 20 #include <petscmachineinfo.h> 21 #include <petscconfiginfo.h> 22 23 /* used in the MPI_XXX() count macros in petsclog.h */ 24 25 /* Action and object logging variables */ 26 Action *petsc_actions = NULL; 27 Object *petsc_objects = NULL; 28 PetscBool petsc_logActions = PETSC_FALSE; 29 PetscBool petsc_logObjects = PETSC_FALSE; 30 int petsc_numActions = 0, petsc_maxActions = 100; 31 int petsc_numObjects = 0, petsc_maxObjects = 100; 32 int petsc_numObjectsDestroyed = 0; 33 34 /* Global counters */ 35 PetscLogDouble petsc_BaseTime = 0.0; 36 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 37 PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */ 38 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 39 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 40 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 41 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 42 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 43 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 44 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 45 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 46 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 47 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 48 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 49 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 50 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 51 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 52 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 53 #if defined(PETSC_HAVE_DEVICE) 54 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 55 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 56 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 57 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 58 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 59 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 60 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 61 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 62 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 63 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 64 #endif 65 66 /* Logging functions */ 67 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 68 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 69 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 70 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 71 72 /* Tracing event logging variables */ 73 FILE *petsc_tracefile = NULL; 74 int petsc_tracelevel = 0; 75 const char *petsc_traceblanks = " "; 76 char petsc_tracespace[128] = " "; 77 PetscLogDouble petsc_tracetime = 0.0; 78 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 79 80 static PetscIntStack current_log_event_stack = NULL; 81 82 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 83 { 84 int stage; 85 PetscBool opt; 86 87 PetscFunctionBegin; 88 if (PetscLogInitializeCalled) PetscFunctionReturn(0); 89 PetscLogInitializeCalled = PETSC_TRUE; 90 91 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 92 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 93 if (opt) petsc_logActions = PETSC_FALSE; 94 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 95 if (opt) petsc_logObjects = PETSC_FALSE; 96 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 97 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 98 PetscLogPHC = PetscLogObjCreateDefault; 99 PetscLogPHD = PetscLogObjDestroyDefault; 100 /* Setup default logging structures */ 101 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 102 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 103 104 /* All processors sync here for more consistent logging */ 105 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 106 PetscTime(&petsc_BaseTime); 107 PetscCall(PetscLogStagePush(stage)); 108 PetscFunctionReturn(0); 109 } 110 111 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 112 { 113 PetscStageLog stageLog; 114 115 PetscFunctionBegin; 116 PetscCall(PetscFree(petsc_actions)); 117 PetscCall(PetscFree(petsc_objects)); 118 PetscCall(PetscLogNestedEnd()); 119 PetscCall(PetscLogSet(NULL, NULL)); 120 121 /* Resetting phase */ 122 PetscCall(PetscLogGetStageLog(&stageLog)); 123 PetscCall(PetscStageLogDestroy(stageLog)); 124 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 125 current_log_event_stack = NULL; 126 127 petsc_TotalFlops = 0.0; 128 petsc_numActions = 0; 129 petsc_numObjects = 0; 130 petsc_numObjectsDestroyed = 0; 131 petsc_maxActions = 100; 132 petsc_maxObjects = 100; 133 petsc_actions = NULL; 134 petsc_objects = NULL; 135 petsc_logActions = PETSC_FALSE; 136 petsc_logObjects = PETSC_FALSE; 137 petsc_BaseTime = 0.0; 138 petsc_TotalFlops = 0.0; 139 petsc_tmp_flops = 0.0; 140 petsc_send_ct = 0.0; 141 petsc_recv_ct = 0.0; 142 petsc_send_len = 0.0; 143 petsc_recv_len = 0.0; 144 petsc_isend_ct = 0.0; 145 petsc_irecv_ct = 0.0; 146 petsc_isend_len = 0.0; 147 petsc_irecv_len = 0.0; 148 petsc_wait_ct = 0.0; 149 petsc_wait_any_ct = 0.0; 150 petsc_wait_all_ct = 0.0; 151 petsc_sum_of_waits_ct = 0.0; 152 petsc_allreduce_ct = 0.0; 153 petsc_gather_ct = 0.0; 154 petsc_scatter_ct = 0.0; 155 #if defined(PETSC_HAVE_DEVICE) 156 petsc_ctog_ct = 0.0; 157 petsc_gtoc_ct = 0.0; 158 petsc_ctog_sz = 0.0; 159 petsc_gtoc_sz = 0.0; 160 petsc_gflops = 0.0; 161 petsc_gtime = 0.0; 162 #endif 163 PETSC_LARGEST_EVENT = PETSC_EVENT; 164 PetscLogPHC = NULL; 165 PetscLogPHD = NULL; 166 petsc_tracefile = NULL; 167 petsc_tracelevel = 0; 168 petsc_traceblanks = " "; 169 petsc_tracespace[0] = ' '; 170 petsc_tracespace[1] = 0; 171 petsc_tracetime = 0.0; 172 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 173 PETSC_OBJECT_CLASSID = 0; 174 petsc_stageLog = NULL; 175 PetscLogInitializeCalled = PETSC_FALSE; 176 PetscFunctionReturn(0); 177 } 178 179 /*@C 180 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 181 182 Not Collective 183 184 Input Parameters: 185 + b - The function called at beginning of event 186 - e - The function called at end of event 187 188 Level: developer 189 190 Developer Note: 191 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 192 193 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 194 @*/ 195 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 196 { 197 PetscFunctionBegin; 198 PetscLogPLB = b; 199 PetscLogPLE = e; 200 PetscFunctionReturn(0); 201 } 202 203 /*@C 204 PetscLogIsActive - Check if logging is currently in progress. 205 206 Not Collective 207 208 Output Parameter: 209 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 210 211 Level: beginner 212 213 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 214 @*/ 215 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 216 { 217 PetscFunctionBegin; 218 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 219 PetscFunctionReturn(0); 220 } 221 222 /*@C 223 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 224 rates and object creation and should not slow programs down too much. 225 This routine may be called more than once. 226 227 Logically Collective over `PETSC_COMM_WORLD` 228 229 Options Database Key: 230 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 231 screen (for code configured with --with-log=1 (which is the default)) 232 233 Usage: 234 .vb 235 PetscInitialize(...); 236 PetscLogDefaultBegin(); 237 ... code ... 238 PetscLogView(viewer); or PetscLogDump(); 239 PetscFinalize(); 240 .ve 241 242 Level: advanced 243 244 Note: 245 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 246 the logging information. 247 248 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 249 @*/ 250 PetscErrorCode PetscLogDefaultBegin(void) 251 { 252 PetscFunctionBegin; 253 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 254 PetscFunctionReturn(0); 255 } 256 257 /*@C 258 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 259 all events. This creates large log files and slows the program down. 260 261 Logically Collective on `PETSC_COMM_WORLD` 262 263 Options Database Key: 264 . -log_all - Prints extensive log information 265 266 Usage: 267 .vb 268 PetscInitialize(...); 269 PetscLogAllBegin(); 270 ... code ... 271 PetscLogDump(filename); 272 PetscFinalize(); 273 .ve 274 275 Level: advanced 276 277 Note: 278 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 279 intended for production runs since it logs only flop rates and object 280 creation (and shouldn't significantly slow the programs). 281 282 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 283 @*/ 284 PetscErrorCode PetscLogAllBegin(void) 285 { 286 PetscFunctionBegin; 287 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 288 PetscFunctionReturn(0); 289 } 290 291 /*@C 292 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 293 begins or ends, the event name is printed. 294 295 Logically Collective on `PETSC_COMM_WORLD` 296 297 Input Parameter: 298 . file - The file to print trace in (e.g. stdout) 299 300 Options Database Key: 301 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 302 303 Level: intermediate 304 305 Notes: 306 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 307 then "Event begin:" or "Event end:" followed by the event name. 308 309 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 310 to determine where a program is hanging without running in the 311 debugger. Can be used in conjunction with the -info option. 312 313 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 314 @*/ 315 PetscErrorCode PetscLogTraceBegin(FILE *file) 316 { 317 PetscFunctionBegin; 318 petsc_tracefile = file; 319 320 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 321 PetscFunctionReturn(0); 322 } 323 324 /*@ 325 PetscLogActions - Determines whether actions are logged for the graphical viewer. 326 327 Not Collective 328 329 Input Parameter: 330 . flag - `PETSC_TRUE` if actions are to be logged 331 332 Options Database Key: 333 . -log_exclude_actions - Turns off actions logging 334 335 Level: intermediate 336 337 Note: 338 Logging of actions continues to consume more memory as the program 339 runs. Long running programs should consider turning this feature off. 340 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 341 @*/ 342 PetscErrorCode PetscLogActions(PetscBool flag) 343 { 344 PetscFunctionBegin; 345 petsc_logActions = flag; 346 PetscFunctionReturn(0); 347 } 348 349 /*@ 350 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 351 352 Not Collective 353 354 Input Parameter: 355 . flag - `PETSC_TRUE` if objects are to be logged 356 357 Options Database Key: 358 . -log_exclude_objects - Turns off objects logging 359 360 Level: intermediate 361 362 Note: 363 Logging of objects continues to consume more memory as the program 364 runs. Long running programs should consider turning this feature off. 365 366 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 367 @*/ 368 PetscErrorCode PetscLogObjects(PetscBool flag) 369 { 370 PetscFunctionBegin; 371 petsc_logObjects = flag; 372 PetscFunctionReturn(0); 373 } 374 375 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 376 /*@C 377 PetscLogStageRegister - Attaches a character string name to a logging stage. 378 379 Not Collective 380 381 Input Parameter: 382 . sname - The name to associate with that stage 383 384 Output Parameter: 385 . stage - The stage number 386 387 Level: intermediate 388 389 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 390 @*/ 391 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) 392 { 393 PetscStageLog stageLog; 394 PetscLogEvent event; 395 396 PetscFunctionBegin; 397 PetscCall(PetscLogGetStageLog(&stageLog)); 398 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 399 /* Copy events already changed in the main stage, this sucks */ 400 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 401 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 402 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 403 PetscFunctionReturn(0); 404 } 405 406 /*@C 407 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 408 409 Not Collective 410 411 Input Parameter: 412 . stage - The stage on which to log 413 414 Usage: 415 If the option -log_view is used to run the program containing the 416 following code, then 2 sets of summary data will be printed during 417 PetscFinalize(). 418 .vb 419 PetscInitialize(int *argc,char ***args,0,0); 420 [stage 0 of code] 421 PetscLogStagePush(1); 422 [stage 1 of code] 423 PetscLogStagePop(); 424 PetscBarrier(...); 425 [more stage 0 of code] 426 PetscFinalize(); 427 .ve 428 429 Level: intermediate 430 431 Note: 432 Use `PetscLogStageRegister()` to register a stage. 433 434 .seealso: [](ch_profiling), `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 435 @*/ 436 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 437 { 438 PetscStageLog stageLog; 439 440 PetscFunctionBegin; 441 PetscCall(PetscLogGetStageLog(&stageLog)); 442 PetscCall(PetscStageLogPush(stageLog, stage)); 443 PetscFunctionReturn(0); 444 } 445 446 /*@C 447 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 448 449 Not Collective 450 451 Usage: 452 If the option -log_view is used to run the program containing the 453 following code, then 2 sets of summary data will be printed during 454 PetscFinalize(). 455 .vb 456 PetscInitialize(int *argc,char ***args,0,0); 457 [stage 0 of code] 458 PetscLogStagePush(1); 459 [stage 1 of code] 460 PetscLogStagePop(); 461 PetscBarrier(...); 462 [more stage 0 of code] 463 PetscFinalize(); 464 .ve 465 466 Level: intermediate 467 468 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 469 @*/ 470 PetscErrorCode PetscLogStagePop(void) 471 { 472 PetscStageLog stageLog; 473 474 PetscFunctionBegin; 475 PetscCall(PetscLogGetStageLog(&stageLog)); 476 PetscCall(PetscStageLogPop(stageLog)); 477 PetscFunctionReturn(0); 478 } 479 480 /*@ 481 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 482 483 Not Collective 484 485 Input Parameters: 486 + stage - The stage 487 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 488 489 Level: intermediate 490 491 Note: 492 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 493 494 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 495 @*/ 496 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 497 { 498 PetscStageLog stageLog; 499 500 PetscFunctionBegin; 501 PetscCall(PetscLogGetStageLog(&stageLog)); 502 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 503 PetscFunctionReturn(0); 504 } 505 506 /*@ 507 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 508 509 Not Collective 510 511 Input Parameter: 512 . stage - The stage 513 514 Output Parameter: 515 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 516 517 Level: intermediate 518 519 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 520 @*/ 521 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 522 { 523 PetscStageLog stageLog; 524 525 PetscFunctionBegin; 526 PetscCall(PetscLogGetStageLog(&stageLog)); 527 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 528 PetscFunctionReturn(0); 529 } 530 531 /*@ 532 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 533 534 Not Collective 535 536 Input Parameters: 537 + stage - The stage 538 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 539 540 Level: intermediate 541 542 Developer Note: 543 What does visible mean, needs to be documented. 544 545 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 546 @*/ 547 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 548 { 549 PetscStageLog stageLog; 550 551 PetscFunctionBegin; 552 PetscCall(PetscLogGetStageLog(&stageLog)); 553 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 554 PetscFunctionReturn(0); 555 } 556 557 /*@ 558 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 559 560 Not Collective 561 562 Input Parameter: 563 . stage - The stage 564 565 Output Parameter: 566 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 567 568 Level: intermediate 569 570 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 571 @*/ 572 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 573 { 574 PetscStageLog stageLog; 575 576 PetscFunctionBegin; 577 PetscCall(PetscLogGetStageLog(&stageLog)); 578 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 579 PetscFunctionReturn(0); 580 } 581 582 /*@C 583 PetscLogStageGetId - Returns the stage id when given the stage name. 584 585 Not Collective 586 587 Input Parameter: 588 . name - The stage name 589 590 Output Parameter: 591 . stage - The stage, , or -1 if no stage with that name exists 592 593 Level: intermediate 594 595 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 596 @*/ 597 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 598 { 599 PetscStageLog stageLog; 600 601 PetscFunctionBegin; 602 PetscCall(PetscLogGetStageLog(&stageLog)); 603 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 604 PetscFunctionReturn(0); 605 } 606 607 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 608 609 /*@C 610 PetscLogEventRegister - Registers an event name for logging operations 611 612 Not Collective 613 614 Input Parameters: 615 + name - The name associated with the event 616 - classid - The classid associated to the class for this event, obtain either with 617 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 618 are only available in C code 619 620 Output Parameter: 621 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 622 623 Example of Usage: 624 .vb 625 PetscLogEvent USER_EVENT; 626 PetscClassId classid; 627 PetscLogDouble user_event_flops; 628 PetscClassIdRegister("class name",&classid); 629 PetscLogEventRegister("User event name",classid,&USER_EVENT); 630 PetscLogEventBegin(USER_EVENT,0,0,0,0); 631 [code segment to monitor] 632 PetscLogFlops(user_event_flops); 633 PetscLogEventEnd(USER_EVENT,0,0,0,0); 634 .ve 635 636 Level: intermediate 637 638 Notes: 639 PETSc automatically logs library events if the code has been 640 configured with --with-log (which is the default) and 641 -log_view or -log_all is specified. `PetscLogEventRegister()` is 642 intended for logging user events to supplement this PETSc 643 information. 644 645 PETSc can gather data for use with the utilities Jumpshot 646 (part of the MPICH distribution). If PETSc has been compiled 647 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 648 MPICH), the user can employ another command line option, -log_mpe, 649 to create a logfile, "mpe.log", which can be visualized 650 Jumpshot. 651 652 The classid is associated with each event so that classes of events 653 can be disabled simultaneously, such as all matrix events. The user 654 can either use an existing classid, such as `MAT_CLASSID`, or create 655 their own as shown in the example. 656 657 If an existing event with the same name exists, its event handle is 658 returned instead of creating a new event. 659 660 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 661 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 662 @*/ 663 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) 664 { 665 PetscStageLog stageLog; 666 int stage; 667 668 PetscFunctionBegin; 669 *event = PETSC_DECIDE; 670 PetscCall(PetscLogGetStageLog(&stageLog)); 671 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 672 if (*event > 0) PetscFunctionReturn(0); 673 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 674 for (stage = 0; stage < stageLog->numStages; stage++) { 675 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 676 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 677 } 678 PetscFunctionReturn(0); 679 } 680 681 /*@ 682 PetscLogEventSetCollective - Indicates that a particular event is collective. 683 684 Not Collective 685 686 Input Parameters: 687 + event - The event id 688 - collective - Boolean flag indicating whether a particular event is collective 689 690 Level: developer 691 692 Notes: 693 New events returned from `PetscLogEventRegister()` are collective by default. 694 695 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 696 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 697 to be performed. This option is useful to debug imbalance within the computations or communications 698 699 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 700 @*/ 701 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) 702 { 703 PetscStageLog stageLog; 704 PetscEventRegLog eventRegLog; 705 706 PetscFunctionBegin; 707 PetscCall(PetscLogGetStageLog(&stageLog)); 708 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 709 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 710 eventRegLog->eventInfo[event].collective = collective; 711 PetscFunctionReturn(0); 712 } 713 714 /*@ 715 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 716 717 Not Collective 718 719 Input Parameter: 720 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 721 722 Level: developer 723 724 .seealso: [](ch_profiling), `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 725 @*/ 726 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 727 { 728 PetscStageLog stageLog; 729 int stage; 730 731 PetscFunctionBegin; 732 PetscCall(PetscLogGetStageLog(&stageLog)); 733 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 734 PetscFunctionReturn(0); 735 } 736 737 /*@ 738 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 739 740 Not Collective 741 742 Input Parameter: 743 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 744 745 Level: developer 746 747 Note: 748 If a class is excluded then events associated with that class are not logged. 749 750 .seealso: [](ch_profiling), `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 751 @*/ 752 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 753 { 754 PetscStageLog stageLog; 755 int stage; 756 757 PetscFunctionBegin; 758 PetscCall(PetscLogGetStageLog(&stageLog)); 759 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 760 PetscFunctionReturn(0); 761 } 762 763 /*@ 764 PetscLogEventActivate - Indicates that a particular event should be logged. 765 766 Not Collective 767 768 Input Parameter: 769 . event - The event id 770 771 Usage: 772 .vb 773 PetscLogEventDeactivate(VEC_SetValues); 774 [code where you do not want to log VecSetValues()] 775 PetscLogEventActivate(VEC_SetValues); 776 [code where you do want to log VecSetValues()] 777 .ve 778 779 Level: advanced 780 781 Note: 782 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 783 or an event number obtained with `PetscLogEventRegister()`. 784 785 .seealso: [](ch_profiling), `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 786 @*/ 787 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 788 { 789 PetscStageLog stageLog; 790 int stage; 791 792 PetscFunctionBegin; 793 PetscCall(PetscLogGetStageLog(&stageLog)); 794 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 795 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 796 PetscFunctionReturn(0); 797 } 798 799 /*@ 800 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 801 802 Not Collective 803 804 Input Parameter: 805 . event - The event id 806 807 Usage: 808 .vb 809 PetscLogEventDeactivate(VEC_SetValues); 810 [code where you do not want to log VecSetValues()] 811 PetscLogEventActivate(VEC_SetValues); 812 [code where you do want to log VecSetValues()] 813 .ve 814 815 Level: advanced 816 817 Note: 818 The event may be either a pre-defined PETSc event (found in 819 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 820 821 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 822 @*/ 823 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 824 { 825 PetscStageLog stageLog; 826 int stage; 827 828 PetscFunctionBegin; 829 PetscCall(PetscLogGetStageLog(&stageLog)); 830 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 831 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 832 PetscFunctionReturn(0); 833 } 834 835 /*@ 836 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 837 838 Not Collective 839 840 Input Parameter: 841 . event - The event id 842 843 Usage: 844 .vb 845 PetscLogEventDeactivatePush(VEC_SetValues); 846 [code where you do not want to log VecSetValues()] 847 PetscLogEventDeactivatePop(VEC_SetValues); 848 [code where you do want to log VecSetValues()] 849 .ve 850 851 Level: advanced 852 853 Note: 854 The event may be either a pre-defined PETSc event (found in 855 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 856 857 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 858 @*/ 859 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 860 { 861 PetscStageLog stageLog; 862 int stage; 863 864 PetscFunctionBegin; 865 PetscCall(PetscLogGetStageLog(&stageLog)); 866 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 867 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 868 PetscFunctionReturn(0); 869 } 870 871 /*@ 872 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 873 874 Not Collective 875 876 Input Parameter: 877 . event - The event id 878 879 Usage: 880 .vb 881 PetscLogEventDeactivatePush(VEC_SetValues); 882 [code where you do not want to log VecSetValues()] 883 PetscLogEventDeactivatePop(VEC_SetValues); 884 [code where you do want to log VecSetValues()] 885 .ve 886 887 Level: advanced 888 889 Note: 890 The event may be either a pre-defined PETSc event (found in 891 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 892 893 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 894 @*/ 895 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 896 { 897 PetscStageLog stageLog; 898 int stage; 899 900 PetscFunctionBegin; 901 PetscCall(PetscLogGetStageLog(&stageLog)); 902 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 903 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 904 PetscFunctionReturn(0); 905 } 906 907 /*@ 908 PetscLogEventSetActiveAll - Turns on logging of all events 909 910 Not Collective 911 912 Input Parameters: 913 + event - The event id 914 - isActive - The activity flag determining whether the event is logged 915 916 Level: advanced 917 918 .seealso: [](ch_profiling), `PlogEventActivate()`, `PlogEventDeactivate()` 919 @*/ 920 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 921 { 922 PetscStageLog stageLog; 923 int stage; 924 925 PetscFunctionBegin; 926 PetscCall(PetscLogGetStageLog(&stageLog)); 927 for (stage = 0; stage < stageLog->numStages; stage++) { 928 if (isActive) { 929 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 930 } else { 931 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 932 } 933 } 934 PetscFunctionReturn(0); 935 } 936 937 /*@ 938 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 939 940 Not Collective 941 942 Input Parameter: 943 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 944 945 Level: developer 946 947 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 948 @*/ 949 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 950 { 951 PetscStageLog stageLog; 952 int stage; 953 954 PetscFunctionBegin; 955 PetscCall(PetscLogGetStageLog(&stageLog)); 956 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 957 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 958 PetscFunctionReturn(0); 959 } 960 961 /*@ 962 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 963 964 Not Collective 965 966 Input Parameter: 967 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 968 969 Level: developer 970 971 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 972 @*/ 973 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 974 { 975 PetscStageLog stageLog; 976 int stage; 977 978 PetscFunctionBegin; 979 PetscCall(PetscLogGetStageLog(&stageLog)); 980 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 981 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 982 PetscFunctionReturn(0); 983 } 984 985 /*MC 986 PetscLogEventSync - Synchronizes the beginning of a user event. 987 988 Synopsis: 989 #include <petsclog.h> 990 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 991 992 Collective 993 994 Input Parameters: 995 + e - integer associated with the event obtained from PetscLogEventRegister() 996 - comm - an MPI communicator 997 998 Usage: 999 .vb 1000 PetscLogEvent USER_EVENT; 1001 PetscLogEventRegister("User event",0,&USER_EVENT); 1002 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1003 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1004 [code segment to monitor] 1005 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1006 .ve 1007 1008 Level: developer 1009 1010 Note: 1011 This routine should be called only if there is not a 1012 `PetscObject` available to pass to `PetscLogEventBegin()`. 1013 1014 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1015 M*/ 1016 1017 /*MC 1018 PetscLogEventBegin - Logs the beginning of a user event. 1019 1020 Synopsis: 1021 #include <petsclog.h> 1022 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1023 1024 Not Collective 1025 1026 Input Parameters: 1027 + e - integer associated with the event obtained from PetscLogEventRegister() 1028 - o1,o2,o3,o4 - objects associated with the event, or 0 1029 1030 Fortran Synopsis: 1031 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1032 1033 Usage: 1034 .vb 1035 PetscLogEvent USER_EVENT; 1036 PetscLogDouble user_event_flops; 1037 PetscLogEventRegister("User event",0,&USER_EVENT); 1038 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1039 [code segment to monitor] 1040 PetscLogFlops(user_event_flops); 1041 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1042 .ve 1043 1044 Level: intermediate 1045 1046 Developer Note: 1047 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the 1048 errors that occur in the macro directly because other packages that use this macros have used them in their 1049 own functions or methods that do not return error codes and it would be disruptive to change the current 1050 behavior. 1051 1052 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1053 M*/ 1054 1055 /*MC 1056 PetscLogEventEnd - Log the end of a user event. 1057 1058 Synopsis: 1059 #include <petsclog.h> 1060 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1061 1062 Not Collective 1063 1064 Input Parameters: 1065 + e - integer associated with the event obtained with PetscLogEventRegister() 1066 - o1,o2,o3,o4 - objects associated with the event, or 0 1067 1068 Fortran Synopsis: 1069 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1070 1071 Usage: 1072 .vb 1073 PetscLogEvent USER_EVENT; 1074 PetscLogDouble user_event_flops; 1075 PetscLogEventRegister("User event",0,&USER_EVENT,); 1076 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1077 [code segment to monitor] 1078 PetscLogFlops(user_event_flops); 1079 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1080 .ve 1081 1082 Level: intermediate 1083 1084 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1085 M*/ 1086 1087 /*@C 1088 PetscLogEventGetId - Returns the event id when given the event name. 1089 1090 Not Collective 1091 1092 Input Parameter: 1093 . name - The event name 1094 1095 Output Parameter: 1096 . event - The event, or -1 if no event with that name exists 1097 1098 Level: intermediate 1099 1100 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1101 @*/ 1102 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1103 { 1104 PetscStageLog stageLog; 1105 1106 PetscFunctionBegin; 1107 PetscCall(PetscLogGetStageLog(&stageLog)); 1108 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) 1113 { 1114 PetscFunctionBegin; 1115 PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) 1120 { 1121 PetscFunctionBegin; 1122 PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) 1127 { 1128 PetscBool empty; 1129 1130 PetscFunctionBegin; 1131 PetscValidIntPointer(event, 1); 1132 *event = PETSC_DECIDE; 1133 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1134 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) 1139 { 1140 PetscFunctionBegin; 1141 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) 1146 { 1147 PetscStageLog stageLog; 1148 PetscEventPerfLog eventLog; 1149 int stage; 1150 1151 PetscFunctionBegin; 1152 if (event == PETSC_DECIDE) PetscFunctionReturn(0); 1153 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1154 PetscCall(PetscLogGetStageLog(&stageLog)); 1155 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1156 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1157 eventLog->eventInfo[event].count--; 1158 PetscFunctionReturn(0); 1159 } 1160 1161 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1162 /*@C 1163 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1164 be read by bin/petscview. This program no longer exists. 1165 1166 Collective on `PETSC_COMM_WORLD` 1167 1168 Input Parameter: 1169 . name - an optional file name 1170 1171 Usage: 1172 .vb 1173 PetscInitialize(...); 1174 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1175 ... code ... 1176 PetscLogDump(filename); 1177 PetscFinalize(); 1178 .ve 1179 1180 Level: advanced 1181 1182 Note: 1183 The default file name is 1184 $ Log.<rank> 1185 where <rank> is the processor number. If no name is specified, 1186 this file will be used. 1187 1188 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1189 @*/ 1190 PetscErrorCode PetscLogDump(const char sname[]) 1191 { 1192 PetscStageLog stageLog; 1193 PetscEventPerfInfo *eventInfo; 1194 FILE *fd; 1195 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1196 PetscLogDouble flops, _TotalTime; 1197 PetscMPIInt rank; 1198 int action, object, curStage; 1199 PetscLogEvent event; 1200 1201 PetscFunctionBegin; 1202 /* Calculate the total elapsed time */ 1203 PetscTime(&_TotalTime); 1204 _TotalTime -= petsc_BaseTime; 1205 /* Open log file */ 1206 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1207 if (sname && sname[0]) sprintf(file, "%s.%d", sname, rank); 1208 else sprintf(file, "Log.%d", rank); 1209 PetscCall(PetscFixFilename(file, fname)); 1210 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1211 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1212 /* Output totals */ 1213 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1214 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1215 /* Output actions */ 1216 if (petsc_logActions) { 1217 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1218 for (action = 0; action < petsc_numActions; action++) { 1219 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1220 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1221 } 1222 } 1223 /* Output objects */ 1224 if (petsc_logObjects) { 1225 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1226 for (object = 0; object < petsc_numObjects; object++) { 1227 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1228 if (!petsc_objects[object].name[0]) { 1229 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1230 } else { 1231 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1232 } 1233 if (petsc_objects[object].info[0] != 0) { 1234 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1235 } else { 1236 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1237 } 1238 } 1239 } 1240 /* Output events */ 1241 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1242 PetscCall(PetscLogGetStageLog(&stageLog)); 1243 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1244 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1245 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1246 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1247 else flops = 0.0; 1248 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1249 } 1250 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 /* 1255 PetscLogView_Detailed - Each process prints the times for its own events 1256 1257 */ 1258 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1259 { 1260 PetscStageLog stageLog; 1261 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1262 PetscLogDouble locTotalTime, numRed, maxMem; 1263 int numStages, numEvents, stage, event; 1264 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1265 PetscMPIInt rank, size; 1266 1267 PetscFunctionBegin; 1268 PetscCallMPI(MPI_Comm_size(comm, &size)); 1269 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1270 /* Must preserve reduction count before we go on */ 1271 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1272 /* Get the total elapsed time */ 1273 PetscTime(&locTotalTime); 1274 locTotalTime -= petsc_BaseTime; 1275 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1276 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1277 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1278 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1279 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1280 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1281 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1282 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1283 PetscCall(PetscLogGetStageLog(&stageLog)); 1284 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1285 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1286 for (stage = 0; stage < numStages; stage++) { 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1288 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1289 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1290 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1291 } 1292 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1293 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1294 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1295 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1296 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1297 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1298 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1299 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1301 PetscCall(PetscViewerFlush(viewer)); 1302 for (stage = 0; stage < numStages; stage++) { 1303 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1304 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1305 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1306 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1307 for (event = 0; event < numEvents; event++) { 1308 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1310 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1311 eventInfo->flops)); 1312 if (eventInfo->dof[0] >= 0.) { 1313 PetscInt d, e; 1314 1315 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1316 for (d = 0; d < 8; ++d) { 1317 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1318 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1319 } 1320 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1322 for (e = 0; e < 8; ++e) { 1323 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1324 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1325 } 1326 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1327 } 1328 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1329 } 1330 } 1331 PetscCall(PetscViewerFlush(viewer)); 1332 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 /* 1337 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1338 */ 1339 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1340 { 1341 PetscStageLog stageLog; 1342 PetscEventPerfInfo *eventInfo = NULL; 1343 PetscLogDouble locTotalTime, maxMem; 1344 int numStages, numEvents, stage, event; 1345 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1346 PetscMPIInt rank, size; 1347 1348 PetscFunctionBegin; 1349 PetscCallMPI(MPI_Comm_size(comm, &size)); 1350 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1351 /* Must preserve reduction count before we go on */ 1352 /* Get the total elapsed time */ 1353 PetscTime(&locTotalTime); 1354 locTotalTime -= petsc_BaseTime; 1355 PetscCall(PetscLogGetStageLog(&stageLog)); 1356 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1357 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1358 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1359 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1360 PetscCall(PetscViewerFlush(viewer)); 1361 for (stage = 0; stage < numStages; stage++) { 1362 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1363 1364 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1365 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1366 for (event = 0; event < numEvents; event++) { 1367 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1368 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1369 eventInfo->numReductions, eventInfo->flops)); 1370 if (eventInfo->dof[0] >= 0.) { 1371 PetscInt d, e; 1372 1373 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1374 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1375 } 1376 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1377 } 1378 } 1379 PetscCall(PetscViewerFlush(viewer)); 1380 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1385 { 1386 PetscFunctionBegin; 1387 if (!PetscLogSyncOn) PetscFunctionReturn(0); 1388 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1389 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1390 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1391 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1392 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1393 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1394 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1395 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1396 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1397 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1398 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1403 { 1404 PetscFunctionBegin; 1405 if (PetscDefined(USE_DEBUG)) { 1406 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1407 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1408 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1409 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1410 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1411 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1412 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1413 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1414 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1415 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1416 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1422 { 1423 #if defined(PETSC_HAVE_DEVICE) 1424 PetscMPIInt size; 1425 PetscBool deviceInitialized = PETSC_FALSE; 1426 1427 PetscFunctionBegin; 1428 PetscCallMPI(MPI_Comm_size(comm, &size)); 1429 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1430 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1431 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1432 deviceInitialized = PETSC_TRUE; 1433 break; 1434 } 1435 } 1436 /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1437 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(0); 1438 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1439 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1440 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1441 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1442 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1443 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1444 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1445 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1446 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1447 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1448 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1449 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1450 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1451 PetscFunctionReturn(0); 1452 #else 1453 return 0; 1454 #endif 1455 } 1456 1457 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1458 { 1459 #if defined(PETSC_HAVE_DEVICE) 1460 1461 PetscFunctionBegin; 1462 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(0); 1463 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1464 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1465 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1466 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1467 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1468 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1469 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1470 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1471 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1472 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1473 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1474 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1475 PetscFunctionReturn(0); 1476 #else 1477 return 0; 1478 #endif 1479 } 1480 1481 PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1482 { 1483 FILE *fd; 1484 PetscLogDouble zero = 0.0; 1485 PetscStageLog stageLog; 1486 PetscStageInfo *stageInfo = NULL; 1487 PetscEventPerfInfo *eventInfo = NULL; 1488 PetscClassPerfInfo *classInfo; 1489 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1490 const char *name; 1491 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1492 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1493 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1494 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1495 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1496 PetscLogDouble min, max, tot, ratio, avg, x, y; 1497 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1498 #if defined(PETSC_HAVE_DEVICE) 1499 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1500 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1501 #endif 1502 PetscMPIInt minC, maxC; 1503 PetscMPIInt size, rank; 1504 PetscBool *localStageUsed, *stageUsed; 1505 PetscBool *localStageVisible, *stageVisible; 1506 int numStages, localNumEvents, numEvents; 1507 int stage, oclass; 1508 PetscLogEvent event; 1509 PetscErrorCode ierr = 0; 1510 char version[256]; 1511 MPI_Comm comm; 1512 #if defined(PETSC_HAVE_DEVICE) 1513 PetscLogEvent eventid; 1514 PetscInt64 nas = 0x7FF0000000000002; 1515 #endif 1516 1517 PetscFunctionBegin; 1518 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1519 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1520 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1521 PetscCallMPI(MPI_Comm_size(comm, &size)); 1522 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1523 /* Get the total elapsed time */ 1524 PetscTime(&locTotalTime); 1525 locTotalTime -= petsc_BaseTime; 1526 1527 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1528 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1529 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1530 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1531 PetscCall(PetscLogViewWarnSync(comm, fd)); 1532 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1533 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1534 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1535 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1536 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1537 PetscCall(PetscGetUserName(username, sizeof(username))); 1538 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1539 PetscCall(PetscGetDate(date, sizeof(date))); 1540 PetscCall(PetscGetVersion(version, sizeof(version))); 1541 if (size == 1) { 1542 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1543 } else { 1544 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1545 } 1546 #if defined(PETSC_HAVE_OPENMP) 1547 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1548 #endif 1549 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1550 1551 /* Must preserve reduction count before we go on */ 1552 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1553 1554 /* Calculate summary information */ 1555 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1556 /* Time */ 1557 PetscCallMPI(MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1558 PetscCallMPI(MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1559 PetscCallMPI(MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1560 avg = tot / ((PetscLogDouble)size); 1561 if (min != 0.0) ratio = max / min; 1562 else ratio = 0.0; 1563 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1564 TotalTime = tot; 1565 /* Objects */ 1566 avg = (PetscLogDouble)petsc_numObjects; 1567 PetscCallMPI(MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1568 PetscCallMPI(MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1569 PetscCallMPI(MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1570 avg = tot / ((PetscLogDouble)size); 1571 if (min != 0.0) ratio = max / min; 1572 else ratio = 0.0; 1573 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1574 /* Flops */ 1575 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1576 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1577 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1578 avg = tot / ((PetscLogDouble)size); 1579 if (min != 0.0) ratio = max / min; 1580 else ratio = 0.0; 1581 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1582 TotalFlops = tot; 1583 /* Flops/sec -- Must talk to Barry here */ 1584 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1585 else flops = 0.0; 1586 PetscCallMPI(MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1587 PetscCallMPI(MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1588 PetscCallMPI(MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1589 avg = tot / ((PetscLogDouble)size); 1590 if (min != 0.0) ratio = max / min; 1591 else ratio = 0.0; 1592 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1593 /* Memory */ 1594 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1595 if (mem > 0.0) { 1596 PetscCallMPI(MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1597 PetscCallMPI(MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1598 PetscCallMPI(MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1599 avg = tot / ((PetscLogDouble)size); 1600 if (min != 0.0) ratio = max / min; 1601 else ratio = 0.0; 1602 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1603 } 1604 /* Messages */ 1605 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1606 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1607 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1608 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1609 avg = tot / ((PetscLogDouble)size); 1610 if (min != 0.0) ratio = max / min; 1611 else ratio = 0.0; 1612 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1613 numMessages = tot; 1614 /* Message Lengths */ 1615 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1616 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1617 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1618 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1619 if (numMessages != 0) avg = tot / numMessages; 1620 else avg = 0.0; 1621 if (min != 0.0) ratio = max / min; 1622 else ratio = 0.0; 1623 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1624 messageLength = tot; 1625 /* Reductions */ 1626 PetscCallMPI(MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1627 PetscCallMPI(MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1628 PetscCallMPI(MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1629 if (min != 0.0) ratio = max / min; 1630 else ratio = 0.0; 1631 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1632 numReductions = red; /* wrong because uses count from process zero */ 1633 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1634 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1635 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1636 1637 /* Get total number of stages -- 1638 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1639 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1640 This seems best accomplished by assoicating a communicator with each stage. 1641 */ 1642 PetscCall(PetscLogGetStageLog(&stageLog)); 1643 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1644 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1645 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1646 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1647 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1648 if (numStages > 0) { 1649 stageInfo = stageLog->stageInfo; 1650 for (stage = 0; stage < numStages; stage++) { 1651 if (stage < stageLog->numStages) { 1652 localStageUsed[stage] = stageInfo[stage].used; 1653 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1654 } else { 1655 localStageUsed[stage] = PETSC_FALSE; 1656 localStageVisible[stage] = PETSC_TRUE; 1657 } 1658 } 1659 PetscCallMPI(MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1660 PetscCallMPI(MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1661 for (stage = 0; stage < numStages; stage++) { 1662 if (stageUsed[stage]) { 1663 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1664 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1665 break; 1666 } 1667 } 1668 for (stage = 0; stage < numStages; stage++) { 1669 if (!stageUsed[stage]) continue; 1670 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1671 if (localStageUsed[stage]) { 1672 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1673 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1674 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1675 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1676 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1677 name = stageInfo[stage].name; 1678 } else { 1679 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1680 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1681 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1682 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1683 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1684 name = ""; 1685 } 1686 mess *= 0.5; 1687 messLen *= 0.5; 1688 red /= size; 1689 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1690 else fracTime = 0.0; 1691 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1692 else fracFlops = 0.0; 1693 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1694 if (numMessages != 0.0) fracMessages = mess / numMessages; 1695 else fracMessages = 0.0; 1696 if (mess != 0.0) avgMessLen = messLen / mess; 1697 else avgMessLen = 0.0; 1698 if (messageLength != 0.0) fracLength = messLen / messageLength; 1699 else fracLength = 0.0; 1700 if (numReductions != 0.0) fracReductions = red / numReductions; 1701 else fracReductions = 0.0; 1702 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1703 } 1704 } 1705 1706 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1707 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1708 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1709 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1710 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1711 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1712 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1713 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1714 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1715 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1716 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1717 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1718 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1719 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1720 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1721 if (PetscLogMemory) { 1722 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1723 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1724 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1725 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1726 } 1727 #if defined(PETSC_HAVE_DEVICE) 1728 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1729 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1730 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1731 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1732 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1733 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1734 #endif 1735 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1736 1737 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1738 1739 /* Report events */ 1740 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1741 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1742 #if defined(PETSC_HAVE_DEVICE) 1743 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1744 #endif 1745 PetscCall(PetscFPrintf(comm, fd, "\n")); 1746 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1747 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1748 #if defined(PETSC_HAVE_DEVICE) 1749 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1750 #endif 1751 PetscCall(PetscFPrintf(comm, fd, "\n")); 1752 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1753 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1754 #if defined(PETSC_HAVE_DEVICE) 1755 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1756 #endif 1757 PetscCall(PetscFPrintf(comm, fd, "\n")); 1758 1759 #if defined(PETSC_HAVE_DEVICE) 1760 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1761 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1762 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1763 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1764 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1765 #endif 1766 1767 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1768 for (stage = 0; stage < numStages; stage++) { 1769 if (!stageVisible[stage]) continue; 1770 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1771 if (localStageUsed[stage]) { 1772 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1773 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1774 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1775 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1776 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1777 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1778 } else { 1779 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1780 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1781 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1782 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1783 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1784 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1785 } 1786 mess *= 0.5; 1787 messLen *= 0.5; 1788 red /= size; 1789 1790 /* Get total number of events in this stage -- 1791 Currently, a single processor can register more events than another, but events must all be registered in order, 1792 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1793 on the event ID. This seems best accomplished by associating a communicator with each stage. 1794 1795 Problem: If the event did not happen on proc 1, its name will not be available. 1796 Problem: Event visibility is not implemented 1797 */ 1798 if (localStageUsed[stage]) { 1799 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1800 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1801 } else localNumEvents = 0; 1802 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1803 for (event = 0; event < numEvents; event++) { 1804 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1805 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1806 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1807 else flopr = 0.0; 1808 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1809 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1810 PetscCallMPI(MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1811 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1812 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1813 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1814 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1815 PetscCallMPI(MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1816 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1817 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1818 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1819 if (PetscLogMemory) { 1820 PetscCallMPI(MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1821 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1822 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1823 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1824 } 1825 #if defined(PETSC_HAVE_DEVICE) 1826 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1827 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1828 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1829 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1830 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1831 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1832 #endif 1833 name = stageLog->eventLog->eventInfo[event].name; 1834 } else { 1835 flopr = 0.0; 1836 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1837 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1838 PetscCallMPI(MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1839 PetscCallMPI(MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1840 PetscCallMPI(MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1841 PetscCallMPI(MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1842 PetscCallMPI(MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1843 PetscCallMPI(MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1844 PetscCallMPI(MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1845 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1846 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1847 if (PetscLogMemory) { 1848 PetscCallMPI(MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1849 PetscCallMPI(MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1850 PetscCallMPI(MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1851 PetscCallMPI(MPI_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1852 } 1853 #if defined(PETSC_HAVE_DEVICE) 1854 PetscCallMPI(MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1855 PetscCallMPI(MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1856 PetscCallMPI(MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1857 PetscCallMPI(MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1858 PetscCallMPI(MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1859 PetscCallMPI(MPI_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1860 #endif 1861 name = ""; 1862 } 1863 if (mint < 0.0) { 1864 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 1865 mint = 0; 1866 } 1867 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 1868 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1869 #if defined(PETSC_HAVE_DEVICE) 1870 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1871 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 1872 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 1873 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 1874 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 1875 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 1876 } 1877 } 1878 #endif 1879 totm *= 0.5; 1880 totml *= 0.5; 1881 totr /= size; 1882 1883 if (maxC != 0) { 1884 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 1885 else ratC = 0.0; 1886 if (mint != 0.0) ratt = maxt / mint; 1887 else ratt = 0.0; 1888 if (minf != 0.0) ratf = maxf / minf; 1889 else ratf = 0.0; 1890 if (TotalTime != 0.0) fracTime = tott / TotalTime; 1891 else fracTime = 0.0; 1892 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 1893 else fracFlops = 0.0; 1894 if (stageTime != 0.0) fracStageTime = tott / stageTime; 1895 else fracStageTime = 0.0; 1896 if (flops != 0.0) fracStageFlops = totf / flops; 1897 else fracStageFlops = 0.0; 1898 if (numMessages != 0.0) fracMess = totm / numMessages; 1899 else fracMess = 0.0; 1900 if (messageLength != 0.0) fracMessLen = totml / messageLength; 1901 else fracMessLen = 0.0; 1902 if (numReductions != 0.0) fracRed = totr / numReductions; 1903 else fracRed = 0.0; 1904 if (mess != 0.0) fracStageMess = totm / mess; 1905 else fracStageMess = 0.0; 1906 if (messLen != 0.0) fracStageMessLen = totml / messLen; 1907 else fracStageMessLen = 0.0; 1908 if (red != 0.0) fracStageRed = totr / red; 1909 else fracStageRed = 0.0; 1910 if (totm != 0.0) totml /= totm; 1911 else totml = 0.0; 1912 if (maxt != 0.0) flopr = totf / maxt; 1913 else flopr = 0.0; 1914 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 1915 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 1916 else 1917 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 1918 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 1919 #if defined(PETSC_HAVE_DEVICE) 1920 if (totf != 0.0) fracgflops = gflops / totf; 1921 else fracgflops = 0.0; 1922 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 1923 else gflopr = 0.0; 1924 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1925 #endif 1926 PetscCall(PetscFPrintf(comm, fd, "\n")); 1927 } 1928 } 1929 } 1930 1931 /* Memory usage and object creation */ 1932 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1933 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1934 #if defined(PETSC_HAVE_DEVICE) 1935 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1936 #endif 1937 PetscCall(PetscFPrintf(comm, fd, "\n")); 1938 PetscCall(PetscFPrintf(comm, fd, "\n")); 1939 1940 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1941 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1942 stats for stages local to processor sets. 1943 */ 1944 /* We should figure out the longest object name here (now 20 characters) */ 1945 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1946 for (stage = 0; stage < numStages; stage++) { 1947 if (localStageUsed[stage]) { 1948 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 1949 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1950 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 1951 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 1952 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 1953 } 1954 } 1955 } else { 1956 if (!localStageVisible[stage]) continue; 1957 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1958 } 1959 } 1960 1961 PetscCall(PetscFree(localStageUsed)); 1962 PetscCall(PetscFree(stageUsed)); 1963 PetscCall(PetscFree(localStageVisible)); 1964 PetscCall(PetscFree(stageVisible)); 1965 1966 /* Information unrelated to this particular run */ 1967 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1968 PetscTime(&y); 1969 PetscTime(&x); 1970 PetscTime(&y); 1971 PetscTime(&y); 1972 PetscTime(&y); 1973 PetscTime(&y); 1974 PetscTime(&y); 1975 PetscTime(&y); 1976 PetscTime(&y); 1977 PetscTime(&y); 1978 PetscTime(&y); 1979 PetscTime(&y); 1980 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 1981 /* MPI information */ 1982 if (size > 1) { 1983 MPI_Status status; 1984 PetscMPIInt tag; 1985 MPI_Comm newcomm; 1986 1987 PetscCallMPI(MPI_Barrier(comm)); 1988 PetscTime(&x); 1989 PetscCallMPI(MPI_Barrier(comm)); 1990 PetscCallMPI(MPI_Barrier(comm)); 1991 PetscCallMPI(MPI_Barrier(comm)); 1992 PetscCallMPI(MPI_Barrier(comm)); 1993 PetscCallMPI(MPI_Barrier(comm)); 1994 PetscTime(&y); 1995 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 1996 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 1997 PetscCallMPI(MPI_Barrier(comm)); 1998 if (rank) { 1999 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 2000 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 2001 } else { 2002 PetscTime(&x); 2003 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 2004 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 2005 PetscTime(&y); 2006 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 2007 } 2008 PetscCall(PetscCommDestroy(&newcomm)); 2009 } 2010 PetscCall(PetscOptionsView(NULL, viewer)); 2011 2012 /* Machine and compile information */ 2013 #if defined(PETSC_USE_FORTRAN_KERNELS) 2014 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 2015 #else 2016 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 2017 #endif 2018 #if defined(PETSC_USE_64BIT_INDICES) 2019 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n")); 2020 #elif defined(PETSC_USE___FLOAT128) 2021 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n")); 2022 #endif 2023 #if defined(PETSC_USE_REAL_SINGLE) 2024 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2025 #elif defined(PETSC_USE___FLOAT128) 2026 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2027 #endif 2028 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2029 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2030 #else 2031 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2032 #endif 2033 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 2034 2035 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 2036 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2037 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2038 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2039 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2040 2041 /* Cleanup */ 2042 PetscCall(PetscFPrintf(comm, fd, "\n")); 2043 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 2044 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 2045 PetscCall(PetscFPTrapPop()); 2046 PetscFunctionReturn(0); 2047 } 2048 2049 /*@C 2050 PetscLogView - Prints a summary of the logging. 2051 2052 Collective over MPI_Comm 2053 2054 Input Parameter: 2055 . viewer - an ASCII viewer 2056 2057 Options Database Keys: 2058 + -log_view [:filename] - Prints summary of log information 2059 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2060 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2061 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2062 . -log_view_memory - Also display memory usage in each event 2063 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2064 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2065 - -log_trace [filename] - Displays a trace of what each process is doing 2066 2067 Level: beginner 2068 2069 Notes: 2070 It is possible to control the logging programmatically but we recommend using the options database approach whenever possible 2071 By default the summary is printed to stdout. 2072 2073 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2074 2075 If PETSc is configured with --with-logging=0 then this functionality is not available 2076 2077 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2078 directory then open filename.xml with your browser. Specific notes for certain browsers 2079 $ Firefox and Internet explorer - simply open the file 2080 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2081 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2082 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2083 your browser. 2084 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2085 window and render the XML log file contents. 2086 2087 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2088 2089 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2090 or using speedscope (https://www.speedscope.app). 2091 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2092 2093 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogDump()` 2094 @*/ 2095 PetscErrorCode PetscLogView(PetscViewer viewer) 2096 { 2097 PetscBool isascii; 2098 PetscViewerFormat format; 2099 int stage, lastStage; 2100 PetscStageLog stageLog; 2101 2102 PetscFunctionBegin; 2103 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2104 /* Pop off any stages the user forgot to remove */ 2105 lastStage = 0; 2106 PetscCall(PetscLogGetStageLog(&stageLog)); 2107 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2108 while (stage >= 0) { 2109 lastStage = stage; 2110 PetscCall(PetscStageLogPop(stageLog)); 2111 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2112 } 2113 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2114 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2115 PetscCall(PetscViewerGetFormat(viewer, &format)); 2116 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2117 PetscCall(PetscLogView_Default(viewer)); 2118 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2119 PetscCall(PetscLogView_Detailed(viewer)); 2120 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2121 PetscCall(PetscLogView_CSV(viewer)); 2122 } else if (format == PETSC_VIEWER_ASCII_XML) { 2123 PetscCall(PetscLogView_Nested(viewer)); 2124 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2125 PetscCall(PetscLogView_Flamegraph(viewer)); 2126 } 2127 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2128 PetscFunctionReturn(0); 2129 } 2130 2131 /*@C 2132 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2133 2134 Collective on `PETSC_COMM_WORLD` 2135 2136 Level: developer 2137 2138 .seealso: [](ch_profiling), `PetscLogView()` 2139 @*/ 2140 PetscErrorCode PetscLogViewFromOptions(void) 2141 { 2142 PetscViewer viewer; 2143 PetscBool flg; 2144 PetscViewerFormat format; 2145 2146 PetscFunctionBegin; 2147 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2148 if (flg) { 2149 PetscCall(PetscViewerPushFormat(viewer, format)); 2150 PetscCall(PetscLogView(viewer)); 2151 PetscCall(PetscViewerPopFormat(viewer)); 2152 PetscCall(PetscViewerDestroy(&viewer)); 2153 } 2154 PetscFunctionReturn(0); 2155 } 2156 2157 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2158 /*@C 2159 PetscGetFlops - Returns the number of flops used on this processor 2160 since the program began. 2161 2162 Not Collective 2163 2164 Output Parameter: 2165 flops - number of floating point operations 2166 2167 Level: intermediate 2168 2169 Notes: 2170 A global counter logs all PETSc flop counts. The user can use 2171 `PetscLogFlops()` to increment this counter to include flops for the 2172 application code. 2173 2174 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2175 2176 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2177 @*/ 2178 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2179 { 2180 PetscFunctionBegin; 2181 *flops = petsc_TotalFlops; 2182 PetscFunctionReturn(0); 2183 } 2184 2185 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2186 { 2187 size_t fullLength; 2188 va_list Argp; 2189 2190 PetscFunctionBegin; 2191 if (!petsc_logObjects) PetscFunctionReturn(0); 2192 va_start(Argp, format); 2193 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2194 va_end(Argp); 2195 PetscFunctionReturn(0); 2196 } 2197 2198 /*MC 2199 PetscLogFlops - Adds floating point operations to the global counter. 2200 2201 Synopsis: 2202 #include <petsclog.h> 2203 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2204 2205 Not Collective 2206 2207 Input Parameter: 2208 . f - flop counter 2209 2210 Usage: 2211 .vb 2212 PetscLogEvent USER_EVENT; 2213 PetscLogEventRegister("User event",0,&USER_EVENT); 2214 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2215 [code segment to monitor] 2216 PetscLogFlops(user_flops) 2217 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2218 .ve 2219 2220 Level: intermediate 2221 2222 Note: 2223 A global counter logs all PETSc flop counts. The user can use 2224 PetscLogFlops() to increment this counter to include flops for the 2225 application code. 2226 2227 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2228 M*/ 2229 2230 /*MC 2231 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2232 to get accurate timings 2233 2234 Synopsis: 2235 #include <petsclog.h> 2236 void PetscPreLoadBegin(PetscBool flag,char *name); 2237 2238 Not Collective 2239 2240 Input Parameters: 2241 + flag - PETSC_TRUE to run twice, `PETSC_FALSE` to run once, may be overridden 2242 with command line option -preload true or -preload false 2243 - name - name of first stage (lines of code timed separately with -log_view) to 2244 be preloaded 2245 2246 Usage: 2247 .vb 2248 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2249 lines of code 2250 PetscPreLoadStage("second stage"); 2251 lines of code 2252 PetscPreLoadEnd(); 2253 .ve 2254 2255 Level: intermediate 2256 2257 Note: 2258 Only works in C/C++, not Fortran 2259 2260 Flags available within the macro. 2261 + PetscPreLoadingUsed - true if we are or have done preloading 2262 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2263 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2264 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2265 The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin() 2266 and PetscPreLoadEnd() 2267 2268 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2269 M*/ 2270 2271 /*MC 2272 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2273 to get accurate timings 2274 2275 Synopsis: 2276 #include <petsclog.h> 2277 void PetscPreLoadEnd(void); 2278 2279 Not Collective 2280 2281 Usage: 2282 .vb 2283 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2284 lines of code 2285 PetscPreLoadStage("second stage"); 2286 lines of code 2287 PetscPreLoadEnd(); 2288 .ve 2289 2290 Level: intermediate 2291 2292 Note: 2293 Only works in C/C++ not fortran 2294 2295 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2296 M*/ 2297 2298 /*MC 2299 PetscPreLoadStage - Start a new segment of code to be timed separately. 2300 to get accurate timings 2301 2302 Synopsis: 2303 #include <petsclog.h> 2304 void PetscPreLoadStage(char *name); 2305 2306 Not Collective 2307 2308 Usage: 2309 .vb 2310 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2311 lines of code 2312 PetscPreLoadStage("second stage"); 2313 lines of code 2314 PetscPreLoadEnd(); 2315 .ve 2316 2317 Level: intermediate 2318 2319 Note: 2320 Only works in C/C++ not fortran 2321 2322 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2323 M*/ 2324 2325 #if PetscDefined(HAVE_DEVICE) 2326 #include <petsc/private/deviceimpl.h> 2327 2328 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2329 2330 /* 2331 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2332 because it will result in timing results that cannot be interpreted. 2333 */ 2334 static PetscErrorCode PetscLogGpuTime_Off(void) 2335 { 2336 PetscLogGpuTimeFlag = PETSC_FALSE; 2337 return 0; 2338 } 2339 2340 /*@C 2341 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2342 2343 Options Database Key: 2344 . -log_view_gpu_time - provide the GPU times in the -log_view output 2345 2346 Level: advanced 2347 2348 Notes: 2349 Turning on the timing of the 2350 GPU kernels can slow down the entire computation and should only be used when studying the performance 2351 of operations on GPU such as vector operations and matrix-vector operations. 2352 2353 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2354 2355 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2356 @*/ 2357 PetscErrorCode PetscLogGpuTime(void) 2358 { 2359 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2360 PetscLogGpuTimeFlag = PETSC_TRUE; 2361 return 0; 2362 } 2363 2364 /*@C 2365 PetscLogGpuTimeBegin - Start timer for device 2366 2367 Level: intermediate 2368 2369 Notes: 2370 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2371 2372 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2373 2374 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()` 2375 2376 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2377 2378 The regular logging captures the time for data transfers and any CPU activities during the event 2379 2380 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2381 2382 Developer Notes: 2383 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2384 2385 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the 2386 event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution, 2387 but not continued GPU execution, until the timer event is recorded. 2388 2389 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2390 @*/ 2391 PetscErrorCode PetscLogGpuTimeBegin(void) 2392 { 2393 PetscFunctionBegin; 2394 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2395 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2396 PetscDeviceContext dctx; 2397 2398 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2399 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2400 } else { 2401 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2402 } 2403 PetscFunctionReturn(0); 2404 } 2405 2406 /*@C 2407 PetscLogGpuTimeEnd - Stop timer for device 2408 2409 Level: intermediate 2410 2411 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2412 @*/ 2413 PetscErrorCode PetscLogGpuTimeEnd(void) 2414 { 2415 PetscFunctionBegin; 2416 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2417 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2418 PetscDeviceContext dctx; 2419 PetscLogDouble elapsed; 2420 2421 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2422 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2423 petsc_gtime += (elapsed / 1000.0); 2424 } else { 2425 PetscCall(PetscTimeAdd(&petsc_gtime)); 2426 } 2427 PetscFunctionReturn(0); 2428 } 2429 #endif /* end of PETSC_HAVE_DEVICE */ 2430 2431 #else /* end of -DPETSC_USE_LOG section */ 2432 2433 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2434 { 2435 PetscFunctionBegin; 2436 PetscFunctionReturn(0); 2437 } 2438 2439 #endif /* PETSC_USE_LOG*/ 2440 2441 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2442 PetscClassId PETSC_OBJECT_CLASSID = 0; 2443 2444 /*@C 2445 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2446 2447 Not Collective 2448 2449 Input Parameter: 2450 . name - The class name 2451 2452 Output Parameter: 2453 . oclass - The class id or classid 2454 2455 Level: developer 2456 2457 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2458 @*/ 2459 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) 2460 { 2461 #if defined(PETSC_USE_LOG) 2462 PetscStageLog stageLog; 2463 PetscInt stage; 2464 #endif 2465 2466 PetscFunctionBegin; 2467 *oclass = ++PETSC_LARGEST_CLASSID; 2468 #if defined(PETSC_USE_LOG) 2469 PetscCall(PetscLogGetStageLog(&stageLog)); 2470 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2471 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2472 #endif 2473 PetscFunctionReturn(0); 2474 } 2475 2476 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2477 #include <mpe.h> 2478 2479 PetscBool PetscBeganMPE = PETSC_FALSE; 2480 2481 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2482 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2483 2484 /*@C 2485 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2486 and slows the program down. 2487 2488 Collective over `PETSC_COMM_WORLD` 2489 2490 Options Database Key: 2491 . -log_mpe - Prints extensive log information 2492 2493 Level: advanced 2494 2495 Note: 2496 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 2497 intended for production runs since it logs only flop rates and object 2498 creation (and should not significantly slow the programs). 2499 2500 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2501 `PetscLogEventDeactivate()` 2502 @*/ 2503 PetscErrorCode PetscLogMPEBegin(void) 2504 { 2505 PetscFunctionBegin; 2506 /* Do MPE initialization */ 2507 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2508 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2509 PetscCall(MPE_Init_log()); 2510 2511 PetscBeganMPE = PETSC_TRUE; 2512 } else { 2513 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2514 } 2515 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2516 PetscFunctionReturn(0); 2517 } 2518 2519 /*@C 2520 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2521 2522 Collective over `PETSC_COMM_WORLD` 2523 2524 Level: advanced 2525 2526 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2527 @*/ 2528 PetscErrorCode PetscLogMPEDump(const char sname[]) 2529 { 2530 char name[PETSC_MAX_PATH_LEN]; 2531 2532 PetscFunctionBegin; 2533 if (PetscBeganMPE) { 2534 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2535 if (sname) { 2536 PetscCall(PetscStrcpy(name, sname)); 2537 } else { 2538 PetscCall(PetscGetProgramName(name, sizeof(name))); 2539 } 2540 PetscCall(MPE_Finish_log(name)); 2541 } else { 2542 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2543 } 2544 PetscFunctionReturn(0); 2545 } 2546 2547 #define PETSC_RGB_COLORS_MAX 39 2548 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2549 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2550 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2551 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2552 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2553 2554 /*@C 2555 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2556 2557 Not collective. Maybe it should be? 2558 2559 Output Parameter: 2560 . str - character string representing the color 2561 2562 Level: developer 2563 2564 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2565 @*/ 2566 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2567 { 2568 static int idx = 0; 2569 2570 PetscFunctionBegin; 2571 *str = PetscLogMPERGBColors[idx]; 2572 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2573 PetscFunctionReturn(0); 2574 } 2575 2576 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2577