1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 #include <petscdevice.h> 15 #include <petsc/private/deviceimpl.h> 16 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 17 #include <../src/sys/perfstubs/timer.h> 18 #endif 19 20 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 21 22 #if defined(PETSC_USE_LOG) 23 #include <petscmachineinfo.h> 24 #include <petscconfiginfo.h> 25 26 /* used in the MPI_XXX() count macros in petsclog.h */ 27 28 /* Action and object logging variables */ 29 Action *petsc_actions = NULL; 30 Object *petsc_objects = NULL; 31 PetscBool petsc_logActions = PETSC_FALSE; 32 PetscBool petsc_logObjects = PETSC_FALSE; 33 int petsc_numActions = 0, petsc_maxActions = 100; 34 int petsc_numObjects = 0, petsc_maxObjects = 100; 35 int petsc_numObjectsDestroyed = 0; 36 37 /* Global counters */ 38 PetscLogDouble petsc_BaseTime = 0.0; 39 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 40 PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */ 41 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 42 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 43 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 44 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 45 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 46 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 47 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 48 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 49 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 50 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 51 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 52 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 53 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 54 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 55 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 56 #if defined(PETSC_HAVE_DEVICE) 57 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 58 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 59 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 60 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 61 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 62 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 63 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 64 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 65 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 66 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 67 #endif 68 69 /* Logging functions */ 70 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 71 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 72 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 73 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 74 75 /* Tracing event logging variables */ 76 FILE *petsc_tracefile = NULL; 77 int petsc_tracelevel = 0; 78 const char *petsc_traceblanks = " "; 79 char petsc_tracespace[128] = " "; 80 PetscLogDouble petsc_tracetime = 0.0; 81 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 82 83 static PetscIntStack current_log_event_stack = NULL; 84 85 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 86 { 87 int stage; 88 PetscBool opt; 89 90 PetscFunctionBegin; 91 if (PetscLogInitializeCalled) PetscFunctionReturn(0); 92 PetscLogInitializeCalled = PETSC_TRUE; 93 94 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 95 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 96 if (opt) petsc_logActions = PETSC_FALSE; 97 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 98 if (opt) petsc_logObjects = PETSC_FALSE; 99 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 100 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 101 PetscLogPHC = PetscLogObjCreateDefault; 102 PetscLogPHD = PetscLogObjDestroyDefault; 103 /* Setup default logging structures */ 104 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 105 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 106 107 /* All processors sync here for more consistent logging */ 108 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 109 PetscTime(&petsc_BaseTime); 110 PetscCall(PetscLogStagePush(stage)); 111 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 112 PetscStackCallExternalVoid("ps_initialize_", ps_initialize_()); 113 #endif 114 PetscFunctionReturn(0); 115 } 116 117 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 118 { 119 PetscStageLog stageLog; 120 121 PetscFunctionBegin; 122 PetscCall(PetscFree(petsc_actions)); 123 PetscCall(PetscFree(petsc_objects)); 124 PetscCall(PetscLogNestedEnd()); 125 PetscCall(PetscLogSet(NULL, NULL)); 126 127 /* Resetting phase */ 128 PetscCall(PetscLogGetStageLog(&stageLog)); 129 PetscCall(PetscStageLogDestroy(stageLog)); 130 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 131 current_log_event_stack = NULL; 132 133 petsc_TotalFlops = 0.0; 134 petsc_numActions = 0; 135 petsc_numObjects = 0; 136 petsc_numObjectsDestroyed = 0; 137 petsc_maxActions = 100; 138 petsc_maxObjects = 100; 139 petsc_actions = NULL; 140 petsc_objects = NULL; 141 petsc_logActions = PETSC_FALSE; 142 petsc_logObjects = PETSC_FALSE; 143 petsc_BaseTime = 0.0; 144 petsc_TotalFlops = 0.0; 145 petsc_tmp_flops = 0.0; 146 petsc_send_ct = 0.0; 147 petsc_recv_ct = 0.0; 148 petsc_send_len = 0.0; 149 petsc_recv_len = 0.0; 150 petsc_isend_ct = 0.0; 151 petsc_irecv_ct = 0.0; 152 petsc_isend_len = 0.0; 153 petsc_irecv_len = 0.0; 154 petsc_wait_ct = 0.0; 155 petsc_wait_any_ct = 0.0; 156 petsc_wait_all_ct = 0.0; 157 petsc_sum_of_waits_ct = 0.0; 158 petsc_allreduce_ct = 0.0; 159 petsc_gather_ct = 0.0; 160 petsc_scatter_ct = 0.0; 161 #if defined(PETSC_HAVE_DEVICE) 162 petsc_ctog_ct = 0.0; 163 petsc_gtoc_ct = 0.0; 164 petsc_ctog_sz = 0.0; 165 petsc_gtoc_sz = 0.0; 166 petsc_gflops = 0.0; 167 petsc_gtime = 0.0; 168 #endif 169 PETSC_LARGEST_EVENT = PETSC_EVENT; 170 PetscLogPHC = NULL; 171 PetscLogPHD = NULL; 172 petsc_tracefile = NULL; 173 petsc_tracelevel = 0; 174 petsc_traceblanks = " "; 175 petsc_tracespace[0] = ' '; 176 petsc_tracespace[1] = 0; 177 petsc_tracetime = 0.0; 178 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 179 PETSC_OBJECT_CLASSID = 0; 180 petsc_stageLog = NULL; 181 PetscLogInitializeCalled = PETSC_FALSE; 182 PetscFunctionReturn(0); 183 } 184 185 /*@C 186 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 187 188 Not Collective 189 190 Input Parameters: 191 + b - The function called at beginning of event 192 - e - The function called at end of event 193 194 Level: developer 195 196 Developer Note: 197 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 198 199 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 200 @*/ 201 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 202 { 203 PetscFunctionBegin; 204 PetscLogPLB = b; 205 PetscLogPLE = e; 206 PetscFunctionReturn(0); 207 } 208 209 /*@C 210 PetscLogIsActive - Check if logging is currently in progress. 211 212 Not Collective 213 214 Output Parameter: 215 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 216 217 Level: beginner 218 219 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 220 @*/ 221 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 222 { 223 PetscFunctionBegin; 224 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 225 PetscFunctionReturn(0); 226 } 227 228 /*@C 229 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 230 rates and object creation and should not slow programs down too much. 231 This routine may be called more than once. 232 233 Logically Collective over `PETSC_COMM_WORLD` 234 235 Options Database Key: 236 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 237 screen (for code configured with --with-log=1 (which is the default)) 238 239 Usage: 240 .vb 241 PetscInitialize(...); 242 PetscLogDefaultBegin(); 243 ... code ... 244 PetscLogView(viewer); or PetscLogDump(); 245 PetscFinalize(); 246 .ve 247 248 Note: 249 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 250 the logging information. 251 252 Level: advanced 253 254 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 255 @*/ 256 PetscErrorCode PetscLogDefaultBegin(void) 257 { 258 PetscFunctionBegin; 259 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 260 PetscFunctionReturn(0); 261 } 262 263 /*@C 264 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 265 all events. This creates large log files and slows the program down. 266 267 Logically Collective on `PETSC_COMM_WORLD` 268 269 Options Database Key: 270 . -log_all - Prints extensive log information 271 272 Usage: 273 .vb 274 PetscInitialize(...); 275 PetscLogAllBegin(); 276 ... code ... 277 PetscLogDump(filename); 278 PetscFinalize(); 279 .ve 280 281 Note: 282 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 283 intended for production runs since it logs only flop rates and object 284 creation (and shouldn't significantly slow the programs). 285 286 Level: advanced 287 288 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 289 @*/ 290 PetscErrorCode PetscLogAllBegin(void) 291 { 292 PetscFunctionBegin; 293 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 294 PetscFunctionReturn(0); 295 } 296 297 /*@C 298 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 299 begins or ends, the event name is printed. 300 301 Logically Collective on `PETSC_COMM_WORLD` 302 303 Input Parameter: 304 . file - The file to print trace in (e.g. stdout) 305 306 Options Database Key: 307 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 308 309 Notes: 310 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 311 then "Event begin:" or "Event end:" followed by the event name. 312 313 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 314 to determine where a program is hanging without running in the 315 debugger. Can be used in conjunction with the -info option. 316 317 Level: intermediate 318 319 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 320 @*/ 321 PetscErrorCode PetscLogTraceBegin(FILE *file) 322 { 323 PetscFunctionBegin; 324 petsc_tracefile = file; 325 326 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 327 PetscFunctionReturn(0); 328 } 329 330 /*@ 331 PetscLogActions - Determines whether actions are logged for the graphical viewer. 332 333 Not Collective 334 335 Input Parameter: 336 . flag - `PETSC_TRUE` if actions are to be logged 337 338 Options Database Key: 339 . -log_exclude_actions - Turns off actions logging 340 341 Level: intermediate 342 343 Note: 344 Logging of actions continues to consume more memory as the program 345 runs. Long running programs should consider turning this feature off. 346 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 347 @*/ 348 PetscErrorCode PetscLogActions(PetscBool flag) 349 { 350 PetscFunctionBegin; 351 petsc_logActions = flag; 352 PetscFunctionReturn(0); 353 } 354 355 /*@ 356 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 357 358 Not Collective 359 360 Input Parameter: 361 . flag - `PETSC_TRUE` if objects are to be logged 362 363 Options Database Key: 364 . -log_exclude_objects - Turns off objects logging 365 366 Level: intermediate 367 368 Note: 369 Logging of objects continues to consume more memory as the program 370 runs. Long running programs should consider turning this feature off. 371 372 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 373 @*/ 374 PetscErrorCode PetscLogObjects(PetscBool flag) 375 { 376 PetscFunctionBegin; 377 petsc_logObjects = flag; 378 PetscFunctionReturn(0); 379 } 380 381 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 382 /*@C 383 PetscLogStageRegister - Attaches a character string name to a logging stage. 384 385 Not Collective 386 387 Input Parameter: 388 . sname - The name to associate with that stage 389 390 Output Parameter: 391 . stage - The stage number 392 393 Level: intermediate 394 395 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 396 @*/ 397 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) 398 { 399 PetscStageLog stageLog; 400 PetscLogEvent event; 401 402 PetscFunctionBegin; 403 PetscCall(PetscLogGetStageLog(&stageLog)); 404 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 405 /* Copy events already changed in the main stage, this sucks */ 406 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 407 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 408 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 409 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 410 if (perfstubs_initialized == PERFSTUBS_SUCCESS) PetscStackCallExternalVoid("ps_timer_create_", stageLog->stageInfo[*stage].timer = ps_timer_create_(sname)); 411 #endif 412 PetscFunctionReturn(0); 413 } 414 415 /*@C 416 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 417 418 Not Collective 419 420 Input Parameter: 421 . stage - The stage on which to log 422 423 Usage: 424 If the option -log_view is used to run the program containing the 425 following code, then 2 sets of summary data will be printed during 426 PetscFinalize(). 427 .vb 428 PetscInitialize(int *argc,char ***args,0,0); 429 [stage 0 of code] 430 PetscLogStagePush(1); 431 [stage 1 of code] 432 PetscLogStagePop(); 433 PetscBarrier(...); 434 [more stage 0 of code] 435 PetscFinalize(); 436 .ve 437 438 Note: 439 Use `PetscLogStageRegister()` to register a stage. 440 441 Level: intermediate 442 443 .seealso: `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 444 @*/ 445 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 446 { 447 PetscStageLog stageLog; 448 449 PetscFunctionBegin; 450 PetscCall(PetscLogGetStageLog(&stageLog)); 451 PetscCall(PetscStageLogPush(stageLog, stage)); 452 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 453 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stage].timer != NULL) PetscStackCallExternalVoid("ps_timer_start_", ps_timer_start_(stageLog->stageInfo[stage].timer)); 454 #endif 455 PetscFunctionReturn(0); 456 } 457 458 /*@C 459 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 460 461 Not Collective 462 463 Usage: 464 If the option -log_view is used to run the program containing the 465 following code, then 2 sets of summary data will be printed during 466 PetscFinalize(). 467 .vb 468 PetscInitialize(int *argc,char ***args,0,0); 469 [stage 0 of code] 470 PetscLogStagePush(1); 471 [stage 1 of code] 472 PetscLogStagePop(); 473 PetscBarrier(...); 474 [more stage 0 of code] 475 PetscFinalize(); 476 .ve 477 478 Level: intermediate 479 480 .seealso: `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 481 @*/ 482 PetscErrorCode PetscLogStagePop(void) 483 { 484 PetscStageLog stageLog; 485 486 PetscFunctionBegin; 487 PetscCall(PetscLogGetStageLog(&stageLog)); 488 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 489 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stageLog->curStage].timer != NULL) PetscStackCallExternalVoid("ps_timer_stop_", ps_timer_stop_(stageLog->stageInfo[stageLog->curStage].timer)); 490 #endif 491 PetscCall(PetscStageLogPop(stageLog)); 492 PetscFunctionReturn(0); 493 } 494 495 /*@ 496 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 497 498 Not Collective 499 500 Input Parameters: 501 + stage - The stage 502 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 503 504 Level: intermediate 505 506 Note: 507 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 508 509 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 510 @*/ 511 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 512 { 513 PetscStageLog stageLog; 514 515 PetscFunctionBegin; 516 PetscCall(PetscLogGetStageLog(&stageLog)); 517 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 518 PetscFunctionReturn(0); 519 } 520 521 /*@ 522 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 523 524 Not Collective 525 526 Input Parameter: 527 . stage - The stage 528 529 Output Parameter: 530 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 531 532 Level: intermediate 533 534 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 535 @*/ 536 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 537 { 538 PetscStageLog stageLog; 539 540 PetscFunctionBegin; 541 PetscCall(PetscLogGetStageLog(&stageLog)); 542 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 543 PetscFunctionReturn(0); 544 } 545 546 /*@ 547 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 548 549 Not Collective 550 551 Input Parameters: 552 + stage - The stage 553 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 554 555 Level: intermediate 556 557 Developer Note: 558 What does visible mean, needs to be documented. 559 560 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 561 @*/ 562 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 563 { 564 PetscStageLog stageLog; 565 566 PetscFunctionBegin; 567 PetscCall(PetscLogGetStageLog(&stageLog)); 568 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 569 PetscFunctionReturn(0); 570 } 571 572 /*@ 573 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 574 575 Not Collective 576 577 Input Parameter: 578 . stage - The stage 579 580 Output Parameter: 581 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 582 583 Level: intermediate 584 585 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 586 @*/ 587 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 588 { 589 PetscStageLog stageLog; 590 591 PetscFunctionBegin; 592 PetscCall(PetscLogGetStageLog(&stageLog)); 593 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 594 PetscFunctionReturn(0); 595 } 596 597 /*@C 598 PetscLogStageGetId - Returns the stage id when given the stage name. 599 600 Not Collective 601 602 Input Parameter: 603 . name - The stage name 604 605 Output Parameter: 606 . stage - The stage, , or -1 if no stage with that name exists 607 608 Level: intermediate 609 610 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 611 @*/ 612 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 613 { 614 PetscStageLog stageLog; 615 616 PetscFunctionBegin; 617 PetscCall(PetscLogGetStageLog(&stageLog)); 618 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 619 PetscFunctionReturn(0); 620 } 621 622 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 623 624 /*@C 625 PetscLogEventRegister - Registers an event name for logging operations 626 627 Not Collective 628 629 Input Parameters: 630 + name - The name associated with the event 631 - classid - The classid associated to the class for this event, obtain either with 632 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 633 are only available in C code 634 635 Output Parameter: 636 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 637 638 Example of Usage: 639 .vb 640 PetscLogEvent USER_EVENT; 641 PetscClassId classid; 642 PetscLogDouble user_event_flops; 643 PetscClassIdRegister("class name",&classid); 644 PetscLogEventRegister("User event name",classid,&USER_EVENT); 645 PetscLogEventBegin(USER_EVENT,0,0,0,0); 646 [code segment to monitor] 647 PetscLogFlops(user_event_flops); 648 PetscLogEventEnd(USER_EVENT,0,0,0,0); 649 .ve 650 651 Notes: 652 PETSc automatically logs library events if the code has been 653 configured with --with-log (which is the default) and 654 -log_view or -log_all is specified. `PetscLogEventRegister()` is 655 intended for logging user events to supplement this PETSc 656 information. 657 658 PETSc can gather data for use with the utilities Jumpshot 659 (part of the MPICH distribution). If PETSc has been compiled 660 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 661 MPICH), the user can employ another command line option, -log_mpe, 662 to create a logfile, "mpe.log", which can be visualized 663 Jumpshot. 664 665 The classid is associated with each event so that classes of events 666 can be disabled simultaneously, such as all matrix events. The user 667 can either use an existing classid, such as `MAT_CLASSID`, or create 668 their own as shown in the example. 669 670 If an existing event with the same name exists, its event handle is 671 returned instead of creating a new event. 672 673 Level: intermediate 674 675 .seealso: `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 676 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 677 @*/ 678 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) 679 { 680 PetscStageLog stageLog; 681 int stage; 682 683 PetscFunctionBegin; 684 *event = PETSC_DECIDE; 685 PetscCall(PetscLogGetStageLog(&stageLog)); 686 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 687 if (*event > 0) PetscFunctionReturn(0); 688 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 689 for (stage = 0; stage < stageLog->numStages; stage++) { 690 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 691 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 692 } 693 PetscFunctionReturn(0); 694 } 695 696 /*@ 697 PetscLogEventSetCollective - Indicates that a particular event is collective. 698 699 Not Collective 700 701 Input Parameters: 702 + event - The event id 703 - collective - Boolean flag indicating whether a particular event is collective 704 705 Notes: 706 New events returned from `PetscLogEventRegister()` are collective by default. 707 708 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 709 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 710 to be performed. This option is useful to debug imbalance within the computations or communications 711 712 Level: developer 713 714 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 715 @*/ 716 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) 717 { 718 PetscStageLog stageLog; 719 PetscEventRegLog eventRegLog; 720 721 PetscFunctionBegin; 722 PetscCall(PetscLogGetStageLog(&stageLog)); 723 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 724 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 725 eventRegLog->eventInfo[event].collective = collective; 726 PetscFunctionReturn(0); 727 } 728 729 /*@ 730 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 731 732 Not Collective 733 734 Input Parameter: 735 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 736 737 Level: developer 738 739 .seealso: `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 740 @*/ 741 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 742 { 743 PetscStageLog stageLog; 744 int stage; 745 746 PetscFunctionBegin; 747 PetscCall(PetscLogGetStageLog(&stageLog)); 748 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 749 PetscFunctionReturn(0); 750 } 751 752 /*@ 753 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 754 755 Not Collective 756 757 Input Parameter: 758 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 759 760 Level: developer 761 762 Note: 763 If a class is excluded then events associated with that class are not logged. 764 765 .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 766 @*/ 767 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 768 { 769 PetscStageLog stageLog; 770 int stage; 771 772 PetscFunctionBegin; 773 PetscCall(PetscLogGetStageLog(&stageLog)); 774 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 775 PetscFunctionReturn(0); 776 } 777 778 /*@ 779 PetscLogEventActivate - Indicates that a particular event should be logged. 780 781 Not Collective 782 783 Input Parameter: 784 . event - The event id 785 786 Usage: 787 .vb 788 PetscLogEventDeactivate(VEC_SetValues); 789 [code where you do not want to log VecSetValues()] 790 PetscLogEventActivate(VEC_SetValues); 791 [code where you do want to log VecSetValues()] 792 .ve 793 794 Note: 795 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 796 or an event number obtained with `PetscLogEventRegister()`. 797 798 Level: advanced 799 800 .seealso: `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 801 @*/ 802 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 803 { 804 PetscStageLog stageLog; 805 int stage; 806 807 PetscFunctionBegin; 808 PetscCall(PetscLogGetStageLog(&stageLog)); 809 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 810 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 811 PetscFunctionReturn(0); 812 } 813 814 /*@ 815 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 816 817 Not Collective 818 819 Input Parameter: 820 . event - The event id 821 822 Usage: 823 .vb 824 PetscLogEventDeactivate(VEC_SetValues); 825 [code where you do not want to log VecSetValues()] 826 PetscLogEventActivate(VEC_SetValues); 827 [code where you do want to log VecSetValues()] 828 .ve 829 830 Note: 831 The event may be either a pre-defined PETSc event (found in 832 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 833 834 Level: advanced 835 836 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 837 @*/ 838 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 839 { 840 PetscStageLog stageLog; 841 int stage; 842 843 PetscFunctionBegin; 844 PetscCall(PetscLogGetStageLog(&stageLog)); 845 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 846 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 847 PetscFunctionReturn(0); 848 } 849 850 /*@ 851 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 852 853 Not Collective 854 855 Input Parameter: 856 . event - The event id 857 858 Usage: 859 .vb 860 PetscLogEventDeactivatePush(VEC_SetValues); 861 [code where you do not want to log VecSetValues()] 862 PetscLogEventDeactivatePop(VEC_SetValues); 863 [code where you do want to log VecSetValues()] 864 .ve 865 866 Note: 867 The event may be either a pre-defined PETSc event (found in 868 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 869 870 Level: advanced 871 872 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 873 @*/ 874 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 875 { 876 PetscStageLog stageLog; 877 int stage; 878 879 PetscFunctionBegin; 880 PetscCall(PetscLogGetStageLog(&stageLog)); 881 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 882 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 883 PetscFunctionReturn(0); 884 } 885 886 /*@ 887 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 888 889 Not Collective 890 891 Input Parameter: 892 . event - The event id 893 894 Usage: 895 .vb 896 PetscLogEventDeactivatePush(VEC_SetValues); 897 [code where you do not want to log VecSetValues()] 898 PetscLogEventDeactivatePop(VEC_SetValues); 899 [code where you do want to log VecSetValues()] 900 .ve 901 902 Note: 903 The event may be either a pre-defined PETSc event (found in 904 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 905 906 Level: advanced 907 908 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 909 @*/ 910 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 911 { 912 PetscStageLog stageLog; 913 int stage; 914 915 PetscFunctionBegin; 916 PetscCall(PetscLogGetStageLog(&stageLog)); 917 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 918 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 919 PetscFunctionReturn(0); 920 } 921 922 /*@ 923 PetscLogEventSetActiveAll - Turns on logging of all events 924 925 Not Collective 926 927 Input Parameters: 928 + event - The event id 929 - isActive - The activity flag determining whether the event is logged 930 931 Level: advanced 932 933 .seealso: `PlogEventActivate()`, `PlogEventDeactivate()` 934 @*/ 935 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 936 { 937 PetscStageLog stageLog; 938 int stage; 939 940 PetscFunctionBegin; 941 PetscCall(PetscLogGetStageLog(&stageLog)); 942 for (stage = 0; stage < stageLog->numStages; stage++) { 943 if (isActive) { 944 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 945 } else { 946 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 947 } 948 } 949 PetscFunctionReturn(0); 950 } 951 952 /*@ 953 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 954 955 Not Collective 956 957 Input Parameter: 958 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 959 960 Level: developer 961 962 .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 963 @*/ 964 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 965 { 966 PetscStageLog stageLog; 967 int stage; 968 969 PetscFunctionBegin; 970 PetscCall(PetscLogGetStageLog(&stageLog)); 971 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 972 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 973 PetscFunctionReturn(0); 974 } 975 976 /*@ 977 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 978 979 Not Collective 980 981 Input Parameter: 982 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 983 984 Level: developer 985 986 .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 987 @*/ 988 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 989 { 990 PetscStageLog stageLog; 991 int stage; 992 993 PetscFunctionBegin; 994 PetscCall(PetscLogGetStageLog(&stageLog)); 995 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 996 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 997 PetscFunctionReturn(0); 998 } 999 1000 /*MC 1001 PetscLogEventSync - Synchronizes the beginning of a user event. 1002 1003 Synopsis: 1004 #include <petsclog.h> 1005 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 1006 1007 Collective 1008 1009 Input Parameters: 1010 + e - integer associated with the event obtained from PetscLogEventRegister() 1011 - comm - an MPI communicator 1012 1013 Usage: 1014 .vb 1015 PetscLogEvent USER_EVENT; 1016 PetscLogEventRegister("User event",0,&USER_EVENT); 1017 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1018 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1019 [code segment to monitor] 1020 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1021 .ve 1022 1023 Note: 1024 This routine should be called only if there is not a 1025 `PetscObject` available to pass to `PetscLogEventBegin()`. 1026 1027 Level: developer 1028 1029 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1030 M*/ 1031 1032 /*MC 1033 PetscLogEventBegin - Logs the beginning of a user event. 1034 1035 Synopsis: 1036 #include <petsclog.h> 1037 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1038 1039 Not Collective 1040 1041 Input Parameters: 1042 + e - integer associated with the event obtained from PetscLogEventRegister() 1043 - o1,o2,o3,o4 - objects associated with the event, or 0 1044 1045 Fortran Synopsis: 1046 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1047 1048 Usage: 1049 .vb 1050 PetscLogEvent USER_EVENT; 1051 PetscLogDouble user_event_flops; 1052 PetscLogEventRegister("User event",0,&USER_EVENT); 1053 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1054 [code segment to monitor] 1055 PetscLogFlops(user_event_flops); 1056 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1057 .ve 1058 1059 Developer Note: 1060 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the 1061 errors that occur in the macro directly because other packages that use this macros have used them in their 1062 own functions or methods that do not return error codes and it would be disruptive to change the current 1063 behavior. 1064 1065 Level: intermediate 1066 1067 .seealso: `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1068 M*/ 1069 1070 /*MC 1071 PetscLogEventEnd - Log the end of a user event. 1072 1073 Synopsis: 1074 #include <petsclog.h> 1075 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1076 1077 Not Collective 1078 1079 Input Parameters: 1080 + e - integer associated with the event obtained with PetscLogEventRegister() 1081 - o1,o2,o3,o4 - objects associated with the event, or 0 1082 1083 Fortran Synopsis: 1084 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1085 1086 Usage: 1087 .vb 1088 PetscLogEvent USER_EVENT; 1089 PetscLogDouble user_event_flops; 1090 PetscLogEventRegister("User event",0,&USER_EVENT,); 1091 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1092 [code segment to monitor] 1093 PetscLogFlops(user_event_flops); 1094 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1095 .ve 1096 1097 Level: intermediate 1098 1099 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1100 M*/ 1101 1102 /*@C 1103 PetscLogEventGetId - Returns the event id when given the event name. 1104 1105 Not Collective 1106 1107 Input Parameter: 1108 . name - The event name 1109 1110 Output Parameter: 1111 . event - The event, or -1 if no event with that name exists 1112 1113 Level: intermediate 1114 1115 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1116 @*/ 1117 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1118 { 1119 PetscStageLog stageLog; 1120 1121 PetscFunctionBegin; 1122 PetscCall(PetscLogGetStageLog(&stageLog)); 1123 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1124 PetscFunctionReturn(0); 1125 } 1126 1127 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) 1128 { 1129 PetscFunctionBegin; 1130 PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) 1135 { 1136 PetscFunctionBegin; 1137 PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) 1142 { 1143 PetscBool empty; 1144 1145 PetscFunctionBegin; 1146 PetscValidIntPointer(event, 1); 1147 *event = PETSC_DECIDE; 1148 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1149 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) 1154 { 1155 PetscFunctionBegin; 1156 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) 1161 { 1162 PetscStageLog stageLog; 1163 PetscEventPerfLog eventLog; 1164 int stage; 1165 1166 PetscFunctionBegin; 1167 if (event == PETSC_DECIDE) PetscFunctionReturn(0); 1168 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1169 PetscCall(PetscLogGetStageLog(&stageLog)); 1170 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1171 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1172 eventLog->eventInfo[event].count--; 1173 PetscFunctionReturn(0); 1174 } 1175 1176 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1177 /*@C 1178 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1179 be read by bin/petscview. This program no longer exists. 1180 1181 Collective on `PETSC_COMM_WORLD` 1182 1183 Input Parameter: 1184 . name - an optional file name 1185 1186 Usage: 1187 .vb 1188 PetscInitialize(...); 1189 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1190 ... code ... 1191 PetscLogDump(filename); 1192 PetscFinalize(); 1193 .ve 1194 1195 Note: 1196 The default file name is 1197 $ Log.<rank> 1198 where <rank> is the processor number. If no name is specified, 1199 this file will be used. 1200 1201 Level: advanced 1202 1203 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1204 @*/ 1205 PetscErrorCode PetscLogDump(const char sname[]) 1206 { 1207 PetscStageLog stageLog; 1208 PetscEventPerfInfo *eventInfo; 1209 FILE *fd; 1210 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1211 PetscLogDouble flops, _TotalTime; 1212 PetscMPIInt rank; 1213 int action, object, curStage; 1214 PetscLogEvent event; 1215 1216 PetscFunctionBegin; 1217 /* Calculate the total elapsed time */ 1218 PetscTime(&_TotalTime); 1219 _TotalTime -= petsc_BaseTime; 1220 /* Open log file */ 1221 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1222 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 1223 PetscCall(PetscFixFilename(file, fname)); 1224 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1225 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1226 /* Output totals */ 1227 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1228 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1229 /* Output actions */ 1230 if (petsc_logActions) { 1231 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1232 for (action = 0; action < petsc_numActions; action++) { 1233 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1234 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1235 } 1236 } 1237 /* Output objects */ 1238 if (petsc_logObjects) { 1239 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1240 for (object = 0; object < petsc_numObjects; object++) { 1241 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1242 if (!petsc_objects[object].name[0]) { 1243 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1244 } else { 1245 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1246 } 1247 if (petsc_objects[object].info[0] != 0) { 1248 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1249 } else { 1250 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1251 } 1252 } 1253 } 1254 /* Output events */ 1255 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1256 PetscCall(PetscLogGetStageLog(&stageLog)); 1257 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1258 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1259 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1260 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1261 else flops = 0.0; 1262 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1263 } 1264 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 /* 1269 PetscLogView_Detailed - Each process prints the times for its own events 1270 1271 */ 1272 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1273 { 1274 PetscStageLog stageLog; 1275 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1276 PetscLogDouble locTotalTime, numRed, maxMem; 1277 int numStages, numEvents, stage, event; 1278 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1279 PetscMPIInt rank, size; 1280 1281 PetscFunctionBegin; 1282 PetscCallMPI(MPI_Comm_size(comm, &size)); 1283 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1284 /* Must preserve reduction count before we go on */ 1285 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1286 /* Get the total elapsed time */ 1287 PetscTime(&locTotalTime); 1288 locTotalTime -= petsc_BaseTime; 1289 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1290 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1291 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1292 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1293 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1294 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1295 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1296 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1297 PetscCall(PetscLogGetStageLog(&stageLog)); 1298 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1299 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1300 for (stage = 0; stage < numStages; stage++) { 1301 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1303 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1304 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1305 } 1306 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1307 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1308 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1310 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1311 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1312 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1313 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1314 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1315 PetscCall(PetscViewerFlush(viewer)); 1316 for (stage = 0; stage < numStages; stage++) { 1317 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1318 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1319 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1320 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1321 for (event = 0; event < numEvents; event++) { 1322 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1323 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1324 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1325 eventInfo->flops)); 1326 if (eventInfo->dof[0] >= 0.) { 1327 PetscInt d, e; 1328 1329 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1330 for (d = 0; d < 8; ++d) { 1331 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1332 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1333 } 1334 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1335 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1336 for (e = 0; e < 8; ++e) { 1337 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1338 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1339 } 1340 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1341 } 1342 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1343 } 1344 } 1345 PetscCall(PetscViewerFlush(viewer)); 1346 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1347 PetscFunctionReturn(0); 1348 } 1349 1350 /* 1351 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1352 */ 1353 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1354 { 1355 PetscStageLog stageLog; 1356 PetscEventPerfInfo *eventInfo = NULL; 1357 PetscLogDouble locTotalTime, maxMem; 1358 int numStages, numEvents, stage, event; 1359 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1360 PetscMPIInt rank, size; 1361 1362 PetscFunctionBegin; 1363 PetscCallMPI(MPI_Comm_size(comm, &size)); 1364 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1365 /* Must preserve reduction count before we go on */ 1366 /* Get the total elapsed time */ 1367 PetscTime(&locTotalTime); 1368 locTotalTime -= petsc_BaseTime; 1369 PetscCall(PetscLogGetStageLog(&stageLog)); 1370 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1371 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1372 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1373 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1374 PetscCall(PetscViewerFlush(viewer)); 1375 for (stage = 0; stage < numStages; stage++) { 1376 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1377 1378 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1379 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1380 for (event = 0; event < numEvents; event++) { 1381 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1382 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1383 eventInfo->numReductions, eventInfo->flops)); 1384 if (eventInfo->dof[0] >= 0.) { 1385 PetscInt d, e; 1386 1387 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1388 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1389 } 1390 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1391 } 1392 } 1393 PetscCall(PetscViewerFlush(viewer)); 1394 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1395 PetscFunctionReturn(0); 1396 } 1397 1398 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1399 { 1400 PetscFunctionBegin; 1401 if (!PetscLogSyncOn) PetscFunctionReturn(0); 1402 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1403 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1404 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1405 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1406 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1407 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1408 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1409 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1410 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1411 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1412 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1417 { 1418 PetscFunctionBegin; 1419 if (PetscDefined(USE_DEBUG)) { 1420 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1421 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1422 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1423 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1424 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1425 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1426 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1427 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1428 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1429 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1430 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1431 } 1432 PetscFunctionReturn(0); 1433 } 1434 1435 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1436 { 1437 #if defined(PETSC_HAVE_DEVICE) 1438 PetscMPIInt size; 1439 PetscBool deviceInitialized = PETSC_FALSE; 1440 1441 PetscFunctionBegin; 1442 PetscCallMPI(MPI_Comm_size(comm, &size)); 1443 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1444 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1445 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1446 deviceInitialized = PETSC_TRUE; 1447 break; 1448 } 1449 } 1450 /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1451 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(0); 1452 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1453 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1454 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1455 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1456 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1457 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1458 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1459 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1460 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1461 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1462 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1463 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1464 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1465 PetscFunctionReturn(0); 1466 #else 1467 return 0; 1468 #endif 1469 } 1470 1471 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1472 { 1473 #if defined(PETSC_HAVE_DEVICE) 1474 1475 PetscFunctionBegin; 1476 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(0); 1477 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1478 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1479 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1480 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1481 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1482 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1483 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1484 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1485 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1486 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1487 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1488 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1489 PetscFunctionReturn(0); 1490 #else 1491 return 0; 1492 #endif 1493 } 1494 1495 PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1496 { 1497 FILE *fd; 1498 PetscLogDouble zero = 0.0; 1499 PetscStageLog stageLog; 1500 PetscStageInfo *stageInfo = NULL; 1501 PetscEventPerfInfo *eventInfo = NULL; 1502 PetscClassPerfInfo *classInfo; 1503 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1504 const char *name; 1505 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1506 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1507 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1508 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1509 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1510 PetscLogDouble min, max, tot, ratio, avg, x, y; 1511 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1512 #if defined(PETSC_HAVE_DEVICE) 1513 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1514 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1515 #endif 1516 PetscMPIInt minC, maxC; 1517 PetscMPIInt size, rank; 1518 PetscBool *localStageUsed, *stageUsed; 1519 PetscBool *localStageVisible, *stageVisible; 1520 int numStages, localNumEvents, numEvents; 1521 int stage, oclass; 1522 PetscLogEvent event; 1523 PetscErrorCode ierr = 0; 1524 char version[256]; 1525 MPI_Comm comm; 1526 #if defined(PETSC_HAVE_DEVICE) 1527 PetscLogEvent eventid; 1528 PetscInt64 nas = 0x7FF0000000000002; 1529 #endif 1530 1531 PetscFunctionBegin; 1532 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1533 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1534 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1535 PetscCallMPI(MPI_Comm_size(comm, &size)); 1536 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1537 /* Get the total elapsed time */ 1538 PetscTime(&locTotalTime); 1539 locTotalTime -= petsc_BaseTime; 1540 1541 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1542 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1543 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1544 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1545 PetscCall(PetscLogViewWarnSync(comm, fd)); 1546 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1547 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1548 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1549 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1550 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1551 PetscCall(PetscGetUserName(username, sizeof(username))); 1552 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1553 PetscCall(PetscGetDate(date, sizeof(date))); 1554 PetscCall(PetscGetVersion(version, sizeof(version))); 1555 if (size == 1) { 1556 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1557 } else { 1558 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1559 } 1560 #if defined(PETSC_HAVE_OPENMP) 1561 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1562 #endif 1563 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1564 1565 /* Must preserve reduction count before we go on */ 1566 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1567 1568 /* Calculate summary information */ 1569 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1570 /* Time */ 1571 PetscCallMPI(MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1572 PetscCallMPI(MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1573 PetscCallMPI(MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1574 avg = tot / ((PetscLogDouble)size); 1575 if (min != 0.0) ratio = max / min; 1576 else ratio = 0.0; 1577 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1578 TotalTime = tot; 1579 /* Objects */ 1580 avg = (PetscLogDouble)petsc_numObjects; 1581 PetscCallMPI(MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1582 PetscCallMPI(MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1583 PetscCallMPI(MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1584 avg = tot / ((PetscLogDouble)size); 1585 if (min != 0.0) ratio = max / min; 1586 else ratio = 0.0; 1587 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1588 /* Flops */ 1589 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1590 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1591 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1592 avg = tot / ((PetscLogDouble)size); 1593 if (min != 0.0) ratio = max / min; 1594 else ratio = 0.0; 1595 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1596 TotalFlops = tot; 1597 /* Flops/sec -- Must talk to Barry here */ 1598 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1599 else flops = 0.0; 1600 PetscCallMPI(MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1601 PetscCallMPI(MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1602 PetscCallMPI(MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1603 avg = tot / ((PetscLogDouble)size); 1604 if (min != 0.0) ratio = max / min; 1605 else ratio = 0.0; 1606 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1607 /* Memory */ 1608 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1609 if (mem > 0.0) { 1610 PetscCallMPI(MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1611 PetscCallMPI(MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1612 PetscCallMPI(MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1613 avg = tot / ((PetscLogDouble)size); 1614 if (min != 0.0) ratio = max / min; 1615 else ratio = 0.0; 1616 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1617 } 1618 /* Messages */ 1619 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1620 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1621 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1622 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1623 avg = tot / ((PetscLogDouble)size); 1624 if (min != 0.0) ratio = max / min; 1625 else ratio = 0.0; 1626 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1627 numMessages = tot; 1628 /* Message Lengths */ 1629 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1630 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1631 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1632 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1633 if (numMessages != 0) avg = tot / numMessages; 1634 else avg = 0.0; 1635 if (min != 0.0) ratio = max / min; 1636 else ratio = 0.0; 1637 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1638 messageLength = tot; 1639 /* Reductions */ 1640 PetscCallMPI(MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1641 PetscCallMPI(MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1642 PetscCallMPI(MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1643 if (min != 0.0) ratio = max / min; 1644 else ratio = 0.0; 1645 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1646 numReductions = red; /* wrong because uses count from process zero */ 1647 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1648 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1649 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1650 1651 /* Get total number of stages -- 1652 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1653 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1654 This seems best accomplished by assoicating a communicator with each stage. 1655 */ 1656 PetscCall(PetscLogGetStageLog(&stageLog)); 1657 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1658 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1659 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1660 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1661 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1662 if (numStages > 0) { 1663 stageInfo = stageLog->stageInfo; 1664 for (stage = 0; stage < numStages; stage++) { 1665 if (stage < stageLog->numStages) { 1666 localStageUsed[stage] = stageInfo[stage].used; 1667 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1668 } else { 1669 localStageUsed[stage] = PETSC_FALSE; 1670 localStageVisible[stage] = PETSC_TRUE; 1671 } 1672 } 1673 PetscCallMPI(MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1674 PetscCallMPI(MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1675 for (stage = 0; stage < numStages; stage++) { 1676 if (stageUsed[stage]) { 1677 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1678 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1679 break; 1680 } 1681 } 1682 for (stage = 0; stage < numStages; stage++) { 1683 if (!stageUsed[stage]) continue; 1684 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1685 if (localStageUsed[stage]) { 1686 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1687 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1688 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1689 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1690 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1691 name = stageInfo[stage].name; 1692 } else { 1693 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1694 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1695 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1696 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1697 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1698 name = ""; 1699 } 1700 mess *= 0.5; 1701 messLen *= 0.5; 1702 red /= size; 1703 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1704 else fracTime = 0.0; 1705 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1706 else fracFlops = 0.0; 1707 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1708 if (numMessages != 0.0) fracMessages = mess / numMessages; 1709 else fracMessages = 0.0; 1710 if (mess != 0.0) avgMessLen = messLen / mess; 1711 else avgMessLen = 0.0; 1712 if (messageLength != 0.0) fracLength = messLen / messageLength; 1713 else fracLength = 0.0; 1714 if (numReductions != 0.0) fracReductions = red / numReductions; 1715 else fracReductions = 0.0; 1716 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1717 } 1718 } 1719 1720 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1721 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1722 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1723 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1724 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1725 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1726 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1727 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1728 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1729 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1730 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1731 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1732 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1733 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1734 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1735 if (PetscLogMemory) { 1736 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1737 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1738 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1739 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1740 } 1741 #if defined(PETSC_HAVE_DEVICE) 1742 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1743 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1744 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1745 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1746 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1747 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1748 #endif 1749 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1750 1751 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1752 1753 /* Report events */ 1754 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1755 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1756 #if defined(PETSC_HAVE_DEVICE) 1757 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1758 #endif 1759 PetscCall(PetscFPrintf(comm, fd, "\n")); 1760 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1761 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1762 #if defined(PETSC_HAVE_DEVICE) 1763 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1764 #endif 1765 PetscCall(PetscFPrintf(comm, fd, "\n")); 1766 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1767 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1768 #if defined(PETSC_HAVE_DEVICE) 1769 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1770 #endif 1771 PetscCall(PetscFPrintf(comm, fd, "\n")); 1772 1773 #if defined(PETSC_HAVE_DEVICE) 1774 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1775 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1776 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1777 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1778 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1779 #endif 1780 1781 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1782 for (stage = 0; stage < numStages; stage++) { 1783 if (!stageVisible[stage]) continue; 1784 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1785 if (localStageUsed[stage]) { 1786 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1787 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1788 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1789 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1790 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1791 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1792 } else { 1793 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1794 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1795 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1796 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1797 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1798 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1799 } 1800 mess *= 0.5; 1801 messLen *= 0.5; 1802 red /= size; 1803 1804 /* Get total number of events in this stage -- 1805 Currently, a single processor can register more events than another, but events must all be registered in order, 1806 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1807 on the event ID. This seems best accomplished by associating a communicator with each stage. 1808 1809 Problem: If the event did not happen on proc 1, its name will not be available. 1810 Problem: Event visibility is not implemented 1811 */ 1812 if (localStageUsed[stage]) { 1813 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1814 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1815 } else localNumEvents = 0; 1816 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1817 for (event = 0; event < numEvents; event++) { 1818 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1819 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1820 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1821 else flopr = 0.0; 1822 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1823 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1824 PetscCallMPI(MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1825 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1826 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1827 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1828 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1829 PetscCallMPI(MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1830 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1831 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1832 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1833 if (PetscLogMemory) { 1834 PetscCallMPI(MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1835 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1836 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1837 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1838 } 1839 #if defined(PETSC_HAVE_DEVICE) 1840 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1841 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1842 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1843 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1844 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1845 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1846 #endif 1847 name = stageLog->eventLog->eventInfo[event].name; 1848 } else { 1849 flopr = 0.0; 1850 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1851 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1852 PetscCallMPI(MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1853 PetscCallMPI(MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1854 PetscCallMPI(MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1855 PetscCallMPI(MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1856 PetscCallMPI(MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1857 PetscCallMPI(MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1858 PetscCallMPI(MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1859 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1860 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1861 if (PetscLogMemory) { 1862 PetscCallMPI(MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1863 PetscCallMPI(MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1864 PetscCallMPI(MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1865 PetscCallMPI(MPI_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1866 } 1867 #if defined(PETSC_HAVE_DEVICE) 1868 PetscCallMPI(MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1869 PetscCallMPI(MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1870 PetscCallMPI(MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1871 PetscCallMPI(MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1872 PetscCallMPI(MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1873 PetscCallMPI(MPI_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1874 #endif 1875 name = ""; 1876 } 1877 if (mint < 0.0) { 1878 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 1879 mint = 0; 1880 } 1881 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 1882 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1883 #if defined(PETSC_HAVE_DEVICE) 1884 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1885 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 1886 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 1887 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 1888 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 1889 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 1890 } 1891 } 1892 #endif 1893 totm *= 0.5; 1894 totml *= 0.5; 1895 totr /= size; 1896 1897 if (maxC != 0) { 1898 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 1899 else ratC = 0.0; 1900 if (mint != 0.0) ratt = maxt / mint; 1901 else ratt = 0.0; 1902 if (minf != 0.0) ratf = maxf / minf; 1903 else ratf = 0.0; 1904 if (TotalTime != 0.0) fracTime = tott / TotalTime; 1905 else fracTime = 0.0; 1906 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 1907 else fracFlops = 0.0; 1908 if (stageTime != 0.0) fracStageTime = tott / stageTime; 1909 else fracStageTime = 0.0; 1910 if (flops != 0.0) fracStageFlops = totf / flops; 1911 else fracStageFlops = 0.0; 1912 if (numMessages != 0.0) fracMess = totm / numMessages; 1913 else fracMess = 0.0; 1914 if (messageLength != 0.0) fracMessLen = totml / messageLength; 1915 else fracMessLen = 0.0; 1916 if (numReductions != 0.0) fracRed = totr / numReductions; 1917 else fracRed = 0.0; 1918 if (mess != 0.0) fracStageMess = totm / mess; 1919 else fracStageMess = 0.0; 1920 if (messLen != 0.0) fracStageMessLen = totml / messLen; 1921 else fracStageMessLen = 0.0; 1922 if (red != 0.0) fracStageRed = totr / red; 1923 else fracStageRed = 0.0; 1924 if (totm != 0.0) totml /= totm; 1925 else totml = 0.0; 1926 if (maxt != 0.0) flopr = totf / maxt; 1927 else flopr = 0.0; 1928 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 1929 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 1930 else 1931 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 1932 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 1933 #if defined(PETSC_HAVE_DEVICE) 1934 if (totf != 0.0) fracgflops = gflops / totf; 1935 else fracgflops = 0.0; 1936 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 1937 else gflopr = 0.0; 1938 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1939 #endif 1940 PetscCall(PetscFPrintf(comm, fd, "\n")); 1941 } 1942 } 1943 } 1944 1945 /* Memory usage and object creation */ 1946 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1947 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1948 #if defined(PETSC_HAVE_DEVICE) 1949 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1950 #endif 1951 PetscCall(PetscFPrintf(comm, fd, "\n")); 1952 PetscCall(PetscFPrintf(comm, fd, "\n")); 1953 1954 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1955 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1956 stats for stages local to processor sets. 1957 */ 1958 /* We should figure out the longest object name here (now 20 characters) */ 1959 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1960 for (stage = 0; stage < numStages; stage++) { 1961 if (localStageUsed[stage]) { 1962 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 1963 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1964 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 1965 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 1966 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 1967 } 1968 } 1969 } else { 1970 if (!localStageVisible[stage]) continue; 1971 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1972 } 1973 } 1974 1975 PetscCall(PetscFree(localStageUsed)); 1976 PetscCall(PetscFree(stageUsed)); 1977 PetscCall(PetscFree(localStageVisible)); 1978 PetscCall(PetscFree(stageVisible)); 1979 1980 /* Information unrelated to this particular run */ 1981 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1982 PetscTime(&y); 1983 PetscTime(&x); 1984 PetscTime(&y); 1985 PetscTime(&y); 1986 PetscTime(&y); 1987 PetscTime(&y); 1988 PetscTime(&y); 1989 PetscTime(&y); 1990 PetscTime(&y); 1991 PetscTime(&y); 1992 PetscTime(&y); 1993 PetscTime(&y); 1994 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 1995 /* MPI information */ 1996 if (size > 1) { 1997 MPI_Status status; 1998 PetscMPIInt tag; 1999 MPI_Comm newcomm; 2000 2001 PetscCallMPI(MPI_Barrier(comm)); 2002 PetscTime(&x); 2003 PetscCallMPI(MPI_Barrier(comm)); 2004 PetscCallMPI(MPI_Barrier(comm)); 2005 PetscCallMPI(MPI_Barrier(comm)); 2006 PetscCallMPI(MPI_Barrier(comm)); 2007 PetscCallMPI(MPI_Barrier(comm)); 2008 PetscTime(&y); 2009 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 2010 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 2011 PetscCallMPI(MPI_Barrier(comm)); 2012 if (rank) { 2013 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 2014 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 2015 } else { 2016 PetscTime(&x); 2017 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 2018 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 2019 PetscTime(&y); 2020 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 2021 } 2022 PetscCall(PetscCommDestroy(&newcomm)); 2023 } 2024 PetscCall(PetscOptionsView(NULL, viewer)); 2025 2026 /* Machine and compile information */ 2027 #if defined(PETSC_USE_FORTRAN_KERNELS) 2028 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 2029 #else 2030 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 2031 #endif 2032 #if defined(PETSC_USE_64BIT_INDICES) 2033 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n")); 2034 #elif defined(PETSC_USE___FLOAT128) 2035 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n")); 2036 #endif 2037 #if defined(PETSC_USE_REAL_SINGLE) 2038 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2039 #elif defined(PETSC_USE___FLOAT128) 2040 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2041 #endif 2042 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2043 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2044 #else 2045 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2046 #endif 2047 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 2048 2049 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 2050 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2051 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2052 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2053 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2054 2055 /* Cleanup */ 2056 PetscCall(PetscFPrintf(comm, fd, "\n")); 2057 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 2058 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 2059 PetscCall(PetscFPTrapPop()); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 /*@C 2064 PetscLogView - Prints a summary of the logging. 2065 2066 Collective over MPI_Comm 2067 2068 Input Parameter: 2069 . viewer - an ASCII viewer 2070 2071 Options Database Keys: 2072 + -log_view [:filename] - Prints summary of log information 2073 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2074 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2075 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2076 . -log_view_memory - Also display memory usage in each event 2077 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2078 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2079 - -log_trace [filename] - Displays a trace of what each process is doing 2080 2081 Notes: 2082 It is possible to control the logging programatically but we recommend using the options database approach whenever possible 2083 By default the summary is printed to stdout. 2084 2085 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2086 2087 If PETSc is configured with --with-logging=0 then this functionality is not available 2088 2089 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2090 directory then open filename.xml with your browser. Specific notes for certain browsers 2091 $ Firefox and Internet explorer - simply open the file 2092 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2093 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2094 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2095 your browser. 2096 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2097 window and render the XML log file contents. 2098 2099 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2100 2101 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2102 or using speedscope (https://www.speedscope.app). 2103 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2104 2105 Level: beginner 2106 2107 .seealso: `PetscLogDefaultBegin()`, `PetscLogDump()` 2108 @*/ 2109 PetscErrorCode PetscLogView(PetscViewer viewer) 2110 { 2111 PetscBool isascii; 2112 PetscViewerFormat format; 2113 int stage, lastStage; 2114 PetscStageLog stageLog; 2115 2116 PetscFunctionBegin; 2117 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2118 /* Pop off any stages the user forgot to remove */ 2119 lastStage = 0; 2120 PetscCall(PetscLogGetStageLog(&stageLog)); 2121 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2122 while (stage >= 0) { 2123 lastStage = stage; 2124 PetscCall(PetscStageLogPop(stageLog)); 2125 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2126 } 2127 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2128 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2129 PetscCall(PetscViewerGetFormat(viewer, &format)); 2130 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2131 PetscCall(PetscLogView_Default(viewer)); 2132 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2133 PetscCall(PetscLogView_Detailed(viewer)); 2134 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2135 PetscCall(PetscLogView_CSV(viewer)); 2136 } else if (format == PETSC_VIEWER_ASCII_XML) { 2137 PetscCall(PetscLogView_Nested(viewer)); 2138 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2139 PetscCall(PetscLogView_Flamegraph(viewer)); 2140 } 2141 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 /*@C 2146 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2147 2148 Collective on `PETSC_COMM_WORLD` 2149 2150 Level: developer 2151 2152 .seealso: `PetscLogView()` 2153 @*/ 2154 PetscErrorCode PetscLogViewFromOptions(void) 2155 { 2156 PetscViewer viewer; 2157 PetscBool flg; 2158 PetscViewerFormat format; 2159 2160 PetscFunctionBegin; 2161 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2162 if (flg) { 2163 PetscCall(PetscViewerPushFormat(viewer, format)); 2164 PetscCall(PetscLogView(viewer)); 2165 PetscCall(PetscViewerPopFormat(viewer)); 2166 PetscCall(PetscViewerDestroy(&viewer)); 2167 } 2168 PetscFunctionReturn(0); 2169 } 2170 2171 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2172 /*@C 2173 PetscGetFlops - Returns the number of flops used on this processor 2174 since the program began. 2175 2176 Not Collective 2177 2178 Output Parameter: 2179 flops - number of floating point operations 2180 2181 Notes: 2182 A global counter logs all PETSc flop counts. The user can use 2183 `PetscLogFlops()` to increment this counter to include flops for the 2184 application code. 2185 2186 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2187 2188 Level: intermediate 2189 2190 .seealso: `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2191 @*/ 2192 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2193 { 2194 PetscFunctionBegin; 2195 *flops = petsc_TotalFlops; 2196 PetscFunctionReturn(0); 2197 } 2198 2199 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2200 { 2201 size_t fullLength; 2202 va_list Argp; 2203 2204 PetscFunctionBegin; 2205 if (!petsc_logObjects) PetscFunctionReturn(0); 2206 va_start(Argp, format); 2207 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2208 va_end(Argp); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 /*MC 2213 PetscLogFlops - Adds floating point operations to the global counter. 2214 2215 Synopsis: 2216 #include <petsclog.h> 2217 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2218 2219 Not Collective 2220 2221 Input Parameter: 2222 . f - flop counter 2223 2224 Usage: 2225 .vb 2226 PetscLogEvent USER_EVENT; 2227 PetscLogEventRegister("User event",0,&USER_EVENT); 2228 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2229 [code segment to monitor] 2230 PetscLogFlops(user_flops) 2231 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2232 .ve 2233 2234 Note: 2235 A global counter logs all PETSc flop counts. The user can use 2236 PetscLogFlops() to increment this counter to include flops for the 2237 application code. 2238 2239 Level: intermediate 2240 2241 .seealso: `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2242 M*/ 2243 2244 /*MC 2245 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2246 to get accurate timings 2247 2248 Synopsis: 2249 #include <petsclog.h> 2250 void PetscPreLoadBegin(PetscBool flag,char *name); 2251 2252 Not Collective 2253 2254 Input Parameters: 2255 + flag - PETSC_TRUE to run twice, `PETSC_FALSE` to run once, may be overridden 2256 with command line option -preload true or -preload false 2257 - name - name of first stage (lines of code timed separately with -log_view) to 2258 be preloaded 2259 2260 Usage: 2261 .vb 2262 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2263 lines of code 2264 PetscPreLoadStage("second stage"); 2265 lines of code 2266 PetscPreLoadEnd(); 2267 .ve 2268 2269 Note: 2270 Only works in C/C++, not Fortran 2271 2272 Flags available within the macro. 2273 + PetscPreLoadingUsed - true if we are or have done preloading 2274 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2275 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2276 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2277 The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin() 2278 and PetscPreLoadEnd() 2279 2280 Level: intermediate 2281 2282 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2283 M*/ 2284 2285 /*MC 2286 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2287 to get accurate timings 2288 2289 Synopsis: 2290 #include <petsclog.h> 2291 void PetscPreLoadEnd(void); 2292 2293 Not Collective 2294 2295 Usage: 2296 .vb 2297 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2298 lines of code 2299 PetscPreLoadStage("second stage"); 2300 lines of code 2301 PetscPreLoadEnd(); 2302 .ve 2303 2304 Note: 2305 Only works in C/C++ not fortran 2306 2307 Level: intermediate 2308 2309 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2310 M*/ 2311 2312 /*MC 2313 PetscPreLoadStage - Start a new segment of code to be timed separately. 2314 to get accurate timings 2315 2316 Synopsis: 2317 #include <petsclog.h> 2318 void PetscPreLoadStage(char *name); 2319 2320 Not Collective 2321 2322 Usage: 2323 .vb 2324 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2325 lines of code 2326 PetscPreLoadStage("second stage"); 2327 lines of code 2328 PetscPreLoadEnd(); 2329 .ve 2330 2331 Note: 2332 Only works in C/C++ not fortran 2333 2334 Level: intermediate 2335 2336 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2337 M*/ 2338 2339 #if PetscDefined(HAVE_DEVICE) 2340 #include <petsc/private/deviceimpl.h> 2341 2342 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2343 2344 /* 2345 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2346 because it will result in timing results that cannot be interpreted. 2347 */ 2348 static PetscErrorCode PetscLogGpuTime_Off(void) 2349 { 2350 PetscLogGpuTimeFlag = PETSC_FALSE; 2351 return 0; 2352 } 2353 2354 /*@C 2355 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2356 2357 Options Database Key: 2358 . -log_view_gpu_time - provide the GPU times in the -log_view output 2359 2360 Notes: 2361 Turning on the timing of the 2362 GPU kernels can slow down the entire computation and should only be used when studying the performance 2363 of operations on GPU such as vector operations and matrix-vector operations. 2364 2365 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2366 2367 Level: advanced 2368 2369 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2370 @*/ 2371 PetscErrorCode PetscLogGpuTime(void) 2372 { 2373 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2374 PetscLogGpuTimeFlag = PETSC_TRUE; 2375 return 0; 2376 } 2377 2378 /*@C 2379 PetscLogGpuTimeBegin - Start timer for device 2380 2381 Notes: 2382 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2383 2384 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2385 2386 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()` 2387 2388 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2389 2390 The regular logging captures the time for data transfers and any CPU activites during the event 2391 2392 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2393 2394 Developer Notes: 2395 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2396 2397 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the 2398 event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution, 2399 but not continued GPU excution, until the timer event is recorded. 2400 2401 Level: intermediate 2402 2403 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2404 @*/ 2405 PetscErrorCode PetscLogGpuTimeBegin(void) 2406 { 2407 PetscFunctionBegin; 2408 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2409 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2410 PetscDeviceContext dctx; 2411 2412 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2413 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2414 } else { 2415 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2416 } 2417 PetscFunctionReturn(0); 2418 } 2419 2420 /*@C 2421 PetscLogGpuTimeEnd - Stop timer for device 2422 2423 Level: intermediate 2424 2425 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2426 @*/ 2427 PetscErrorCode PetscLogGpuTimeEnd(void) 2428 { 2429 PetscFunctionBegin; 2430 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2431 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2432 PetscDeviceContext dctx; 2433 PetscLogDouble elapsed; 2434 2435 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2436 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2437 petsc_gtime += (elapsed / 1000.0); 2438 } else { 2439 PetscCall(PetscTimeAdd(&petsc_gtime)); 2440 } 2441 PetscFunctionReturn(0); 2442 } 2443 #endif /* end of PETSC_HAVE_DEVICE */ 2444 2445 #else /* end of -DPETSC_USE_LOG section */ 2446 2447 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2448 { 2449 PetscFunctionBegin; 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #endif /* PETSC_USE_LOG*/ 2454 2455 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2456 PetscClassId PETSC_OBJECT_CLASSID = 0; 2457 2458 /*@C 2459 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2460 2461 Not Collective 2462 2463 Input Parameter: 2464 . name - The class name 2465 2466 Output Parameter: 2467 . oclass - The class id or classid 2468 2469 Level: developer 2470 2471 .seealso: `PetscLogEventRegister()` 2472 @*/ 2473 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) 2474 { 2475 #if defined(PETSC_USE_LOG) 2476 PetscStageLog stageLog; 2477 PetscInt stage; 2478 #endif 2479 2480 PetscFunctionBegin; 2481 *oclass = ++PETSC_LARGEST_CLASSID; 2482 #if defined(PETSC_USE_LOG) 2483 PetscCall(PetscLogGetStageLog(&stageLog)); 2484 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2485 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2486 #endif 2487 PetscFunctionReturn(0); 2488 } 2489 2490 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2491 #include <mpe.h> 2492 2493 PetscBool PetscBeganMPE = PETSC_FALSE; 2494 2495 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2496 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2497 2498 /*@C 2499 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2500 and slows the program down. 2501 2502 Collective over `PETSC_COMM_WORLD` 2503 2504 Options Database Key: 2505 . -log_mpe - Prints extensive log information 2506 2507 Note: 2508 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 2509 intended for production runs since it logs only flop rates and object 2510 creation (and should not significantly slow the programs). 2511 2512 Level: advanced 2513 2514 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2515 `PetscLogEventDeactivate()` 2516 @*/ 2517 PetscErrorCode PetscLogMPEBegin(void) 2518 { 2519 PetscFunctionBegin; 2520 /* Do MPE initialization */ 2521 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2522 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2523 PetscCall(MPE_Init_log()); 2524 2525 PetscBeganMPE = PETSC_TRUE; 2526 } else { 2527 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2528 } 2529 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2530 PetscFunctionReturn(0); 2531 } 2532 2533 /*@C 2534 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2535 2536 Collective over `PETSC_COMM_WORLD` 2537 2538 Level: advanced 2539 2540 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2541 @*/ 2542 PetscErrorCode PetscLogMPEDump(const char sname[]) 2543 { 2544 char name[PETSC_MAX_PATH_LEN]; 2545 2546 PetscFunctionBegin; 2547 if (PetscBeganMPE) { 2548 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2549 if (sname) { 2550 PetscCall(PetscStrcpy(name, sname)); 2551 } else { 2552 PetscCall(PetscGetProgramName(name, sizeof(name))); 2553 } 2554 PetscCall(MPE_Finish_log(name)); 2555 } else { 2556 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #define PETSC_RGB_COLORS_MAX 39 2562 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2563 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2564 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2565 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2566 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2567 2568 /*@C 2569 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2570 2571 Not collective. Maybe it should be? 2572 2573 Output Parameter: 2574 . str - character string representing the color 2575 2576 Level: developer 2577 2578 .seealso: `PetscLogEventRegister()` 2579 @*/ 2580 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2581 { 2582 static int idx = 0; 2583 2584 PetscFunctionBegin; 2585 *str = PetscLogMPERGBColors[idx]; 2586 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2591