1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 15 #include <../src/sys/perfstubs/timer.h> 16 #endif 17 18 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 19 20 #if defined(PETSC_USE_LOG) 21 #include <petscmachineinfo.h> 22 #include <petscconfiginfo.h> 23 24 /* used in the MPI_XXX() count macros in petsclog.h */ 25 26 /* Action and object logging variables */ 27 Action *petsc_actions = NULL; 28 Object *petsc_objects = NULL; 29 PetscBool petsc_logActions = PETSC_FALSE; 30 PetscBool petsc_logObjects = PETSC_FALSE; 31 int petsc_numActions = 0, petsc_maxActions = 100; 32 int petsc_numObjects = 0, petsc_maxObjects = 100; 33 int petsc_numObjectsDestroyed = 0; 34 35 /* Global counters */ 36 PetscLogDouble petsc_BaseTime = 0.0; 37 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 38 PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */ 39 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 40 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 41 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 42 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 43 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 44 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 45 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 46 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 47 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 48 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 49 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 50 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 51 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 52 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 53 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 54 #if defined(PETSC_HAVE_DEVICE) 55 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 56 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 57 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 58 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 59 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 60 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 61 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 62 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 63 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 64 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 65 #endif 66 67 /* Logging functions */ 68 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 69 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 70 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 71 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 72 73 /* Tracing event logging variables */ 74 FILE *petsc_tracefile = NULL; 75 int petsc_tracelevel = 0; 76 const char *petsc_traceblanks = " "; 77 char petsc_tracespace[128] = " "; 78 PetscLogDouble petsc_tracetime = 0.0; 79 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 80 81 static PetscIntStack current_log_event_stack = NULL; 82 83 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 84 { 85 int stage; 86 PetscBool opt; 87 88 PetscFunctionBegin; 89 if (PetscLogInitializeCalled) PetscFunctionReturn(0); 90 PetscLogInitializeCalled = PETSC_TRUE; 91 92 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 93 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 94 if (opt) petsc_logActions = PETSC_FALSE; 95 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 96 if (opt) petsc_logObjects = PETSC_FALSE; 97 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 98 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 99 PetscLogPHC = PetscLogObjCreateDefault; 100 PetscLogPHD = PetscLogObjDestroyDefault; 101 /* Setup default logging structures */ 102 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 103 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 104 105 /* All processors sync here for more consistent logging */ 106 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 107 PetscTime(&petsc_BaseTime); 108 PetscCall(PetscLogStagePush(stage)); 109 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 110 PetscStackCallExternalVoid("ps_initialize_", ps_initialize_()); 111 #endif 112 PetscFunctionReturn(0); 113 } 114 115 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 116 { 117 PetscStageLog stageLog; 118 119 PetscFunctionBegin; 120 PetscCall(PetscFree(petsc_actions)); 121 PetscCall(PetscFree(petsc_objects)); 122 PetscCall(PetscLogNestedEnd()); 123 PetscCall(PetscLogSet(NULL, NULL)); 124 125 /* Resetting phase */ 126 PetscCall(PetscLogGetStageLog(&stageLog)); 127 PetscCall(PetscStageLogDestroy(stageLog)); 128 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 129 current_log_event_stack = NULL; 130 131 petsc_TotalFlops = 0.0; 132 petsc_numActions = 0; 133 petsc_numObjects = 0; 134 petsc_numObjectsDestroyed = 0; 135 petsc_maxActions = 100; 136 petsc_maxObjects = 100; 137 petsc_actions = NULL; 138 petsc_objects = NULL; 139 petsc_logActions = PETSC_FALSE; 140 petsc_logObjects = PETSC_FALSE; 141 petsc_BaseTime = 0.0; 142 petsc_TotalFlops = 0.0; 143 petsc_tmp_flops = 0.0; 144 petsc_send_ct = 0.0; 145 petsc_recv_ct = 0.0; 146 petsc_send_len = 0.0; 147 petsc_recv_len = 0.0; 148 petsc_isend_ct = 0.0; 149 petsc_irecv_ct = 0.0; 150 petsc_isend_len = 0.0; 151 petsc_irecv_len = 0.0; 152 petsc_wait_ct = 0.0; 153 petsc_wait_any_ct = 0.0; 154 petsc_wait_all_ct = 0.0; 155 petsc_sum_of_waits_ct = 0.0; 156 petsc_allreduce_ct = 0.0; 157 petsc_gather_ct = 0.0; 158 petsc_scatter_ct = 0.0; 159 #if defined(PETSC_HAVE_DEVICE) 160 petsc_ctog_ct = 0.0; 161 petsc_gtoc_ct = 0.0; 162 petsc_ctog_sz = 0.0; 163 petsc_gtoc_sz = 0.0; 164 petsc_gflops = 0.0; 165 petsc_gtime = 0.0; 166 #endif 167 PETSC_LARGEST_EVENT = PETSC_EVENT; 168 PetscLogPHC = NULL; 169 PetscLogPHD = NULL; 170 petsc_tracefile = NULL; 171 petsc_tracelevel = 0; 172 petsc_traceblanks = " "; 173 petsc_tracespace[0] = ' '; 174 petsc_tracespace[1] = 0; 175 petsc_tracetime = 0.0; 176 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 177 PETSC_OBJECT_CLASSID = 0; 178 petsc_stageLog = NULL; 179 PetscLogInitializeCalled = PETSC_FALSE; 180 PetscFunctionReturn(0); 181 } 182 183 /*@C 184 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 185 186 Not Collective 187 188 Input Parameters: 189 + b - The function called at beginning of event 190 - e - The function called at end of event 191 192 Level: developer 193 194 Developer Note: 195 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 196 197 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 198 @*/ 199 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 200 { 201 PetscFunctionBegin; 202 PetscLogPLB = b; 203 PetscLogPLE = e; 204 PetscFunctionReturn(0); 205 } 206 207 /*@C 208 PetscLogIsActive - Check if logging is currently in progress. 209 210 Not Collective 211 212 Output Parameter: 213 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 214 215 Level: beginner 216 217 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 218 @*/ 219 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 220 { 221 PetscFunctionBegin; 222 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 223 PetscFunctionReturn(0); 224 } 225 226 /*@C 227 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 228 rates and object creation and should not slow programs down too much. 229 This routine may be called more than once. 230 231 Logically Collective over `PETSC_COMM_WORLD` 232 233 Options Database Key: 234 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 235 screen (for code configured with --with-log=1 (which is the default)) 236 237 Usage: 238 .vb 239 PetscInitialize(...); 240 PetscLogDefaultBegin(); 241 ... code ... 242 PetscLogView(viewer); or PetscLogDump(); 243 PetscFinalize(); 244 .ve 245 246 Note: 247 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 248 the logging information. 249 250 Level: advanced 251 252 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 253 @*/ 254 PetscErrorCode PetscLogDefaultBegin(void) 255 { 256 PetscFunctionBegin; 257 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 258 PetscFunctionReturn(0); 259 } 260 261 /*@C 262 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 263 all events. This creates large log files and slows the program down. 264 265 Logically Collective on `PETSC_COMM_WORLD` 266 267 Options Database Key: 268 . -log_all - Prints extensive log information 269 270 Usage: 271 .vb 272 PetscInitialize(...); 273 PetscLogAllBegin(); 274 ... code ... 275 PetscLogDump(filename); 276 PetscFinalize(); 277 .ve 278 279 Note: 280 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 281 intended for production runs since it logs only flop rates and object 282 creation (and shouldn't significantly slow the programs). 283 284 Level: advanced 285 286 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 287 @*/ 288 PetscErrorCode PetscLogAllBegin(void) 289 { 290 PetscFunctionBegin; 291 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 292 PetscFunctionReturn(0); 293 } 294 295 /*@C 296 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 297 begins or ends, the event name is printed. 298 299 Logically Collective on `PETSC_COMM_WORLD` 300 301 Input Parameter: 302 . file - The file to print trace in (e.g. stdout) 303 304 Options Database Key: 305 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 306 307 Notes: 308 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 309 then "Event begin:" or "Event end:" followed by the event name. 310 311 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 312 to determine where a program is hanging without running in the 313 debugger. Can be used in conjunction with the -info option. 314 315 Level: intermediate 316 317 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 318 @*/ 319 PetscErrorCode PetscLogTraceBegin(FILE *file) 320 { 321 PetscFunctionBegin; 322 petsc_tracefile = file; 323 324 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 325 PetscFunctionReturn(0); 326 } 327 328 /*@ 329 PetscLogActions - Determines whether actions are logged for the graphical viewer. 330 331 Not Collective 332 333 Input Parameter: 334 . flag - `PETSC_TRUE` if actions are to be logged 335 336 Options Database Key: 337 . -log_exclude_actions - Turns off actions logging 338 339 Level: intermediate 340 341 Note: 342 Logging of actions continues to consume more memory as the program 343 runs. Long running programs should consider turning this feature off. 344 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 345 @*/ 346 PetscErrorCode PetscLogActions(PetscBool flag) 347 { 348 PetscFunctionBegin; 349 petsc_logActions = flag; 350 PetscFunctionReturn(0); 351 } 352 353 /*@ 354 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 355 356 Not Collective 357 358 Input Parameter: 359 . flag - `PETSC_TRUE` if objects are to be logged 360 361 Options Database Key: 362 . -log_exclude_objects - Turns off objects logging 363 364 Level: intermediate 365 366 Note: 367 Logging of objects continues to consume more memory as the program 368 runs. Long running programs should consider turning this feature off. 369 370 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 371 @*/ 372 PetscErrorCode PetscLogObjects(PetscBool flag) 373 { 374 PetscFunctionBegin; 375 petsc_logObjects = flag; 376 PetscFunctionReturn(0); 377 } 378 379 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 380 /*@C 381 PetscLogStageRegister - Attaches a character string name to a logging stage. 382 383 Not Collective 384 385 Input Parameter: 386 . sname - The name to associate with that stage 387 388 Output Parameter: 389 . stage - The stage number 390 391 Level: intermediate 392 393 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 394 @*/ 395 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) 396 { 397 PetscStageLog stageLog; 398 PetscLogEvent event; 399 400 PetscFunctionBegin; 401 PetscCall(PetscLogGetStageLog(&stageLog)); 402 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 403 /* Copy events already changed in the main stage, this sucks */ 404 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 405 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 406 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 407 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 408 if (perfstubs_initialized == PERFSTUBS_SUCCESS) PetscStackCallExternalVoid("ps_timer_create_", stageLog->stageInfo[*stage].timer = ps_timer_create_(sname)); 409 #endif 410 PetscFunctionReturn(0); 411 } 412 413 /*@C 414 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 415 416 Not Collective 417 418 Input Parameter: 419 . stage - The stage on which to log 420 421 Usage: 422 If the option -log_view is used to run the program containing the 423 following code, then 2 sets of summary data will be printed during 424 PetscFinalize(). 425 .vb 426 PetscInitialize(int *argc,char ***args,0,0); 427 [stage 0 of code] 428 PetscLogStagePush(1); 429 [stage 1 of code] 430 PetscLogStagePop(); 431 PetscBarrier(...); 432 [more stage 0 of code] 433 PetscFinalize(); 434 .ve 435 436 Note: 437 Use `PetscLogStageRegister()` to register a stage. 438 439 Level: intermediate 440 441 .seealso: `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 442 @*/ 443 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 444 { 445 PetscStageLog stageLog; 446 447 PetscFunctionBegin; 448 PetscCall(PetscLogGetStageLog(&stageLog)); 449 PetscCall(PetscStageLogPush(stageLog, stage)); 450 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 451 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stage].timer != NULL) PetscStackCallExternalVoid("ps_timer_start_", ps_timer_start_(stageLog->stageInfo[stage].timer)); 452 #endif 453 PetscFunctionReturn(0); 454 } 455 456 /*@C 457 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 458 459 Not Collective 460 461 Usage: 462 If the option -log_view is used to run the program containing the 463 following code, then 2 sets of summary data will be printed during 464 PetscFinalize(). 465 .vb 466 PetscInitialize(int *argc,char ***args,0,0); 467 [stage 0 of code] 468 PetscLogStagePush(1); 469 [stage 1 of code] 470 PetscLogStagePop(); 471 PetscBarrier(...); 472 [more stage 0 of code] 473 PetscFinalize(); 474 .ve 475 476 Level: intermediate 477 478 .seealso: `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 479 @*/ 480 PetscErrorCode PetscLogStagePop(void) 481 { 482 PetscStageLog stageLog; 483 484 PetscFunctionBegin; 485 PetscCall(PetscLogGetStageLog(&stageLog)); 486 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 487 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stageLog->curStage].timer != NULL) PetscStackCallExternalVoid("ps_timer_stop_", ps_timer_stop_(stageLog->stageInfo[stageLog->curStage].timer)); 488 #endif 489 PetscCall(PetscStageLogPop(stageLog)); 490 PetscFunctionReturn(0); 491 } 492 493 /*@ 494 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 495 496 Not Collective 497 498 Input Parameters: 499 + stage - The stage 500 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 501 502 Level: intermediate 503 504 Note: 505 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 506 507 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 508 @*/ 509 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 510 { 511 PetscStageLog stageLog; 512 513 PetscFunctionBegin; 514 PetscCall(PetscLogGetStageLog(&stageLog)); 515 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 516 PetscFunctionReturn(0); 517 } 518 519 /*@ 520 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 521 522 Not Collective 523 524 Input Parameter: 525 . stage - The stage 526 527 Output Parameter: 528 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 529 530 Level: intermediate 531 532 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 533 @*/ 534 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 535 { 536 PetscStageLog stageLog; 537 538 PetscFunctionBegin; 539 PetscCall(PetscLogGetStageLog(&stageLog)); 540 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 541 PetscFunctionReturn(0); 542 } 543 544 /*@ 545 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 546 547 Not Collective 548 549 Input Parameters: 550 + stage - The stage 551 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 552 553 Level: intermediate 554 555 Developer Note: 556 What does visible mean, needs to be documented. 557 558 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 559 @*/ 560 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 561 { 562 PetscStageLog stageLog; 563 564 PetscFunctionBegin; 565 PetscCall(PetscLogGetStageLog(&stageLog)); 566 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 567 PetscFunctionReturn(0); 568 } 569 570 /*@ 571 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 572 573 Not Collective 574 575 Input Parameter: 576 . stage - The stage 577 578 Output Parameter: 579 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 580 581 Level: intermediate 582 583 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 584 @*/ 585 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 586 { 587 PetscStageLog stageLog; 588 589 PetscFunctionBegin; 590 PetscCall(PetscLogGetStageLog(&stageLog)); 591 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 592 PetscFunctionReturn(0); 593 } 594 595 /*@C 596 PetscLogStageGetId - Returns the stage id when given the stage name. 597 598 Not Collective 599 600 Input Parameter: 601 . name - The stage name 602 603 Output Parameter: 604 . stage - The stage, , or -1 if no stage with that name exists 605 606 Level: intermediate 607 608 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 609 @*/ 610 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 611 { 612 PetscStageLog stageLog; 613 614 PetscFunctionBegin; 615 PetscCall(PetscLogGetStageLog(&stageLog)); 616 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 617 PetscFunctionReturn(0); 618 } 619 620 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 621 622 /*@C 623 PetscLogEventRegister - Registers an event name for logging operations 624 625 Not Collective 626 627 Input Parameters: 628 + name - The name associated with the event 629 - classid - The classid associated to the class for this event, obtain either with 630 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 631 are only available in C code 632 633 Output Parameter: 634 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 635 636 Example of Usage: 637 .vb 638 PetscLogEvent USER_EVENT; 639 PetscClassId classid; 640 PetscLogDouble user_event_flops; 641 PetscClassIdRegister("class name",&classid); 642 PetscLogEventRegister("User event name",classid,&USER_EVENT); 643 PetscLogEventBegin(USER_EVENT,0,0,0,0); 644 [code segment to monitor] 645 PetscLogFlops(user_event_flops); 646 PetscLogEventEnd(USER_EVENT,0,0,0,0); 647 .ve 648 649 Notes: 650 PETSc automatically logs library events if the code has been 651 configured with --with-log (which is the default) and 652 -log_view or -log_all is specified. `PetscLogEventRegister()` is 653 intended for logging user events to supplement this PETSc 654 information. 655 656 PETSc can gather data for use with the utilities Jumpshot 657 (part of the MPICH distribution). If PETSc has been compiled 658 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 659 MPICH), the user can employ another command line option, -log_mpe, 660 to create a logfile, "mpe.log", which can be visualized 661 Jumpshot. 662 663 The classid is associated with each event so that classes of events 664 can be disabled simultaneously, such as all matrix events. The user 665 can either use an existing classid, such as `MAT_CLASSID`, or create 666 their own as shown in the example. 667 668 If an existing event with the same name exists, its event handle is 669 returned instead of creating a new event. 670 671 Level: intermediate 672 673 .seealso: `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 674 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 675 @*/ 676 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) 677 { 678 PetscStageLog stageLog; 679 int stage; 680 681 PetscFunctionBegin; 682 *event = PETSC_DECIDE; 683 PetscCall(PetscLogGetStageLog(&stageLog)); 684 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 685 if (*event > 0) PetscFunctionReturn(0); 686 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 687 for (stage = 0; stage < stageLog->numStages; stage++) { 688 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 689 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 690 } 691 PetscFunctionReturn(0); 692 } 693 694 /*@ 695 PetscLogEventSetCollective - Indicates that a particular event is collective. 696 697 Not Collective 698 699 Input Parameters: 700 + event - The event id 701 - collective - Boolean flag indicating whether a particular event is collective 702 703 Notes: 704 New events returned from `PetscLogEventRegister()` are collective by default. 705 706 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 707 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 708 to be performed. This option is useful to debug imbalance within the computations or communications 709 710 Level: developer 711 712 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 713 @*/ 714 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) 715 { 716 PetscStageLog stageLog; 717 PetscEventRegLog eventRegLog; 718 719 PetscFunctionBegin; 720 PetscCall(PetscLogGetStageLog(&stageLog)); 721 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 722 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 723 eventRegLog->eventInfo[event].collective = collective; 724 PetscFunctionReturn(0); 725 } 726 727 /*@ 728 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 729 730 Not Collective 731 732 Input Parameter: 733 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 734 735 Level: developer 736 737 .seealso: `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 738 @*/ 739 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 740 { 741 PetscStageLog stageLog; 742 int stage; 743 744 PetscFunctionBegin; 745 PetscCall(PetscLogGetStageLog(&stageLog)); 746 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 747 PetscFunctionReturn(0); 748 } 749 750 /*@ 751 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 752 753 Not Collective 754 755 Input Parameter: 756 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 757 758 Level: developer 759 760 Note: 761 If a class is excluded then events associated with that class are not logged. 762 763 .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 764 @*/ 765 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 766 { 767 PetscStageLog stageLog; 768 int stage; 769 770 PetscFunctionBegin; 771 PetscCall(PetscLogGetStageLog(&stageLog)); 772 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 773 PetscFunctionReturn(0); 774 } 775 776 /*@ 777 PetscLogEventActivate - Indicates that a particular event should be logged. 778 779 Not Collective 780 781 Input Parameter: 782 . event - The event id 783 784 Usage: 785 .vb 786 PetscLogEventDeactivate(VEC_SetValues); 787 [code where you do not want to log VecSetValues()] 788 PetscLogEventActivate(VEC_SetValues); 789 [code where you do want to log VecSetValues()] 790 .ve 791 792 Note: 793 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 794 or an event number obtained with `PetscLogEventRegister()`. 795 796 Level: advanced 797 798 .seealso: `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 799 @*/ 800 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 801 { 802 PetscStageLog stageLog; 803 int stage; 804 805 PetscFunctionBegin; 806 PetscCall(PetscLogGetStageLog(&stageLog)); 807 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 808 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 809 PetscFunctionReturn(0); 810 } 811 812 /*@ 813 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 814 815 Not Collective 816 817 Input Parameter: 818 . event - The event id 819 820 Usage: 821 .vb 822 PetscLogEventDeactivate(VEC_SetValues); 823 [code where you do not want to log VecSetValues()] 824 PetscLogEventActivate(VEC_SetValues); 825 [code where you do want to log VecSetValues()] 826 .ve 827 828 Note: 829 The event may be either a pre-defined PETSc event (found in 830 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 831 832 Level: advanced 833 834 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 835 @*/ 836 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 837 { 838 PetscStageLog stageLog; 839 int stage; 840 841 PetscFunctionBegin; 842 PetscCall(PetscLogGetStageLog(&stageLog)); 843 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 844 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 845 PetscFunctionReturn(0); 846 } 847 848 /*@ 849 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 850 851 Not Collective 852 853 Input Parameter: 854 . event - The event id 855 856 Usage: 857 .vb 858 PetscLogEventDeactivatePush(VEC_SetValues); 859 [code where you do not want to log VecSetValues()] 860 PetscLogEventDeactivatePop(VEC_SetValues); 861 [code where you do want to log VecSetValues()] 862 .ve 863 864 Note: 865 The event may be either a pre-defined PETSc event (found in 866 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 867 868 Level: advanced 869 870 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 871 @*/ 872 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 873 { 874 PetscStageLog stageLog; 875 int stage; 876 877 PetscFunctionBegin; 878 PetscCall(PetscLogGetStageLog(&stageLog)); 879 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 880 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 881 PetscFunctionReturn(0); 882 } 883 884 /*@ 885 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 886 887 Not Collective 888 889 Input Parameter: 890 . event - The event id 891 892 Usage: 893 .vb 894 PetscLogEventDeactivatePush(VEC_SetValues); 895 [code where you do not want to log VecSetValues()] 896 PetscLogEventDeactivatePop(VEC_SetValues); 897 [code where you do want to log VecSetValues()] 898 .ve 899 900 Note: 901 The event may be either a pre-defined PETSc event (found in 902 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 903 904 Level: advanced 905 906 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 907 @*/ 908 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 909 { 910 PetscStageLog stageLog; 911 int stage; 912 913 PetscFunctionBegin; 914 PetscCall(PetscLogGetStageLog(&stageLog)); 915 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 916 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 917 PetscFunctionReturn(0); 918 } 919 920 /*@ 921 PetscLogEventSetActiveAll - Turns on logging of all events 922 923 Not Collective 924 925 Input Parameters: 926 + event - The event id 927 - isActive - The activity flag determining whether the event is logged 928 929 Level: advanced 930 931 .seealso: `PlogEventActivate()`, `PlogEventDeactivate()` 932 @*/ 933 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 934 { 935 PetscStageLog stageLog; 936 int stage; 937 938 PetscFunctionBegin; 939 PetscCall(PetscLogGetStageLog(&stageLog)); 940 for (stage = 0; stage < stageLog->numStages; stage++) { 941 if (isActive) { 942 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 943 } else { 944 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 945 } 946 } 947 PetscFunctionReturn(0); 948 } 949 950 /*@ 951 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 952 953 Not Collective 954 955 Input Parameter: 956 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 957 958 Level: developer 959 960 .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 961 @*/ 962 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 963 { 964 PetscStageLog stageLog; 965 int stage; 966 967 PetscFunctionBegin; 968 PetscCall(PetscLogGetStageLog(&stageLog)); 969 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 970 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 971 PetscFunctionReturn(0); 972 } 973 974 /*@ 975 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 976 977 Not Collective 978 979 Input Parameter: 980 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 981 982 Level: developer 983 984 .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 985 @*/ 986 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 987 { 988 PetscStageLog stageLog; 989 int stage; 990 991 PetscFunctionBegin; 992 PetscCall(PetscLogGetStageLog(&stageLog)); 993 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 994 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 995 PetscFunctionReturn(0); 996 } 997 998 /*MC 999 PetscLogEventSync - Synchronizes the beginning of a user event. 1000 1001 Synopsis: 1002 #include <petsclog.h> 1003 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 1004 1005 Collective 1006 1007 Input Parameters: 1008 + e - integer associated with the event obtained from PetscLogEventRegister() 1009 - comm - an MPI communicator 1010 1011 Usage: 1012 .vb 1013 PetscLogEvent USER_EVENT; 1014 PetscLogEventRegister("User event",0,&USER_EVENT); 1015 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1016 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1017 [code segment to monitor] 1018 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1019 .ve 1020 1021 Note: 1022 This routine should be called only if there is not a 1023 `PetscObject` available to pass to `PetscLogEventBegin()`. 1024 1025 Level: developer 1026 1027 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1028 M*/ 1029 1030 /*MC 1031 PetscLogEventBegin - Logs the beginning of a user event. 1032 1033 Synopsis: 1034 #include <petsclog.h> 1035 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1036 1037 Not Collective 1038 1039 Input Parameters: 1040 + e - integer associated with the event obtained from PetscLogEventRegister() 1041 - o1,o2,o3,o4 - objects associated with the event, or 0 1042 1043 Fortran Synopsis: 1044 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1045 1046 Usage: 1047 .vb 1048 PetscLogEvent USER_EVENT; 1049 PetscLogDouble user_event_flops; 1050 PetscLogEventRegister("User event",0,&USER_EVENT); 1051 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1052 [code segment to monitor] 1053 PetscLogFlops(user_event_flops); 1054 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1055 .ve 1056 1057 Developer Note: 1058 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the 1059 errors that occur in the macro directly because other packages that use this macros have used them in their 1060 own functions or methods that do not return error codes and it would be disruptive to change the current 1061 behavior. 1062 1063 Level: intermediate 1064 1065 .seealso: `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1066 M*/ 1067 1068 /*MC 1069 PetscLogEventEnd - Log the end of a user event. 1070 1071 Synopsis: 1072 #include <petsclog.h> 1073 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1074 1075 Not Collective 1076 1077 Input Parameters: 1078 + e - integer associated with the event obtained with PetscLogEventRegister() 1079 - o1,o2,o3,o4 - objects associated with the event, or 0 1080 1081 Fortran Synopsis: 1082 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1083 1084 Usage: 1085 .vb 1086 PetscLogEvent USER_EVENT; 1087 PetscLogDouble user_event_flops; 1088 PetscLogEventRegister("User event",0,&USER_EVENT,); 1089 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1090 [code segment to monitor] 1091 PetscLogFlops(user_event_flops); 1092 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1093 .ve 1094 1095 Level: intermediate 1096 1097 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1098 M*/ 1099 1100 /*@C 1101 PetscLogEventGetId - Returns the event id when given the event name. 1102 1103 Not Collective 1104 1105 Input Parameter: 1106 . name - The event name 1107 1108 Output Parameter: 1109 . event - The event, or -1 if no event with that name exists 1110 1111 Level: intermediate 1112 1113 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1114 @*/ 1115 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1116 { 1117 PetscStageLog stageLog; 1118 1119 PetscFunctionBegin; 1120 PetscCall(PetscLogGetStageLog(&stageLog)); 1121 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) 1126 { 1127 PetscFunctionBegin; 1128 PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) 1133 { 1134 PetscFunctionBegin; 1135 PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1136 PetscFunctionReturn(0); 1137 } 1138 1139 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) 1140 { 1141 PetscBool empty; 1142 1143 PetscFunctionBegin; 1144 PetscValidIntPointer(event, 1); 1145 *event = PETSC_DECIDE; 1146 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1147 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) 1152 { 1153 PetscFunctionBegin; 1154 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1155 PetscFunctionReturn(0); 1156 } 1157 1158 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) 1159 { 1160 PetscStageLog stageLog; 1161 PetscEventPerfLog eventLog; 1162 int stage; 1163 1164 PetscFunctionBegin; 1165 if (event == PETSC_DECIDE) PetscFunctionReturn(0); 1166 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1167 PetscCall(PetscLogGetStageLog(&stageLog)); 1168 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1169 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1170 eventLog->eventInfo[event].count--; 1171 PetscFunctionReturn(0); 1172 } 1173 1174 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1175 /*@C 1176 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1177 be read by bin/petscview. This program no longer exists. 1178 1179 Collective on `PETSC_COMM_WORLD` 1180 1181 Input Parameter: 1182 . name - an optional file name 1183 1184 Usage: 1185 .vb 1186 PetscInitialize(...); 1187 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1188 ... code ... 1189 PetscLogDump(filename); 1190 PetscFinalize(); 1191 .ve 1192 1193 Note: 1194 The default file name is 1195 $ Log.<rank> 1196 where <rank> is the processor number. If no name is specified, 1197 this file will be used. 1198 1199 Level: advanced 1200 1201 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1202 @*/ 1203 PetscErrorCode PetscLogDump(const char sname[]) 1204 { 1205 PetscStageLog stageLog; 1206 PetscEventPerfInfo *eventInfo; 1207 FILE *fd; 1208 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1209 PetscLogDouble flops, _TotalTime; 1210 PetscMPIInt rank; 1211 int action, object, curStage; 1212 PetscLogEvent event; 1213 1214 PetscFunctionBegin; 1215 /* Calculate the total elapsed time */ 1216 PetscTime(&_TotalTime); 1217 _TotalTime -= petsc_BaseTime; 1218 /* Open log file */ 1219 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1220 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 1221 PetscCall(PetscFixFilename(file, fname)); 1222 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1223 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1224 /* Output totals */ 1225 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1226 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1227 /* Output actions */ 1228 if (petsc_logActions) { 1229 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1230 for (action = 0; action < petsc_numActions; action++) { 1231 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1232 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1233 } 1234 } 1235 /* Output objects */ 1236 if (petsc_logObjects) { 1237 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1238 for (object = 0; object < petsc_numObjects; object++) { 1239 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1240 if (!petsc_objects[object].name[0]) { 1241 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1242 } else { 1243 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1244 } 1245 if (petsc_objects[object].info[0] != 0) { 1246 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1247 } else { 1248 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1249 } 1250 } 1251 } 1252 /* Output events */ 1253 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1254 PetscCall(PetscLogGetStageLog(&stageLog)); 1255 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1256 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1257 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1258 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1259 else flops = 0.0; 1260 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1261 } 1262 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 /* 1267 PetscLogView_Detailed - Each process prints the times for its own events 1268 1269 */ 1270 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1271 { 1272 PetscStageLog stageLog; 1273 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1274 PetscLogDouble locTotalTime, numRed, maxMem; 1275 int numStages, numEvents, stage, event; 1276 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1277 PetscMPIInt rank, size; 1278 1279 PetscFunctionBegin; 1280 PetscCallMPI(MPI_Comm_size(comm, &size)); 1281 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1282 /* Must preserve reduction count before we go on */ 1283 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1284 /* Get the total elapsed time */ 1285 PetscTime(&locTotalTime); 1286 locTotalTime -= petsc_BaseTime; 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1288 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1289 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1290 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1291 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1292 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1293 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1294 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1295 PetscCall(PetscLogGetStageLog(&stageLog)); 1296 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1297 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1298 for (stage = 0; stage < numStages; stage++) { 1299 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1300 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1301 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1302 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1303 } 1304 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1305 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1306 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1308 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1310 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1311 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1312 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1313 PetscCall(PetscViewerFlush(viewer)); 1314 for (stage = 0; stage < numStages; stage++) { 1315 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1316 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1317 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1318 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1319 for (event = 0; event < numEvents; event++) { 1320 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1322 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1323 eventInfo->flops)); 1324 if (eventInfo->dof[0] >= 0.) { 1325 PetscInt d, e; 1326 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1328 for (d = 0; d < 8; ++d) { 1329 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1330 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1331 } 1332 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1333 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1334 for (e = 0; e < 8; ++e) { 1335 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1336 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1337 } 1338 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1339 } 1340 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1341 } 1342 } 1343 PetscCall(PetscViewerFlush(viewer)); 1344 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 /* 1349 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1350 */ 1351 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1352 { 1353 PetscStageLog stageLog; 1354 PetscEventPerfInfo *eventInfo = NULL; 1355 PetscLogDouble locTotalTime, maxMem; 1356 int numStages, numEvents, stage, event; 1357 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1358 PetscMPIInt rank, size; 1359 1360 PetscFunctionBegin; 1361 PetscCallMPI(MPI_Comm_size(comm, &size)); 1362 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1363 /* Must preserve reduction count before we go on */ 1364 /* Get the total elapsed time */ 1365 PetscTime(&locTotalTime); 1366 locTotalTime -= petsc_BaseTime; 1367 PetscCall(PetscLogGetStageLog(&stageLog)); 1368 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1369 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1370 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1371 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1372 PetscCall(PetscViewerFlush(viewer)); 1373 for (stage = 0; stage < numStages; stage++) { 1374 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1375 1376 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1377 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1378 for (event = 0; event < numEvents; event++) { 1379 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1380 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1381 eventInfo->numReductions, eventInfo->flops)); 1382 if (eventInfo->dof[0] >= 0.) { 1383 PetscInt d, e; 1384 1385 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1386 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1387 } 1388 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1389 } 1390 } 1391 PetscCall(PetscViewerFlush(viewer)); 1392 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1397 { 1398 PetscFunctionBegin; 1399 if (!PetscLogSyncOn) PetscFunctionReturn(0); 1400 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1401 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1402 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1403 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1404 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1405 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1406 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1407 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1408 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1409 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1410 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1415 { 1416 PetscFunctionBegin; 1417 if (PetscDefined(USE_DEBUG)) { 1418 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1419 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1420 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1421 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1422 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1423 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1424 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1425 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1426 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1427 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1428 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1434 { 1435 #if defined(PETSC_HAVE_DEVICE) 1436 PetscMPIInt size; 1437 1438 PetscFunctionBegin; 1439 PetscCallMPI(MPI_Comm_size(comm, &size)); 1440 if (use_gpu_aware_mpi || size == 1) PetscFunctionReturn(0); 1441 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1442 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1443 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1444 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1445 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1446 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1447 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1448 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1449 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1450 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1451 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1452 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1453 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1454 PetscFunctionReturn(0); 1455 #else 1456 return 0; 1457 #endif 1458 } 1459 1460 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1461 { 1462 #if defined(PETSC_HAVE_DEVICE) 1463 1464 PetscFunctionBegin; 1465 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(0); 1466 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1467 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1468 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1469 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1470 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1471 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1472 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1473 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1474 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1475 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1476 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1477 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1478 PetscFunctionReturn(0); 1479 #else 1480 return 0; 1481 #endif 1482 } 1483 1484 PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1485 { 1486 FILE *fd; 1487 PetscLogDouble zero = 0.0; 1488 PetscStageLog stageLog; 1489 PetscStageInfo *stageInfo = NULL; 1490 PetscEventPerfInfo *eventInfo = NULL; 1491 PetscClassPerfInfo *classInfo; 1492 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1493 const char *name; 1494 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1495 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1496 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1497 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1498 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1499 PetscLogDouble min, max, tot, ratio, avg, x, y; 1500 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1501 #if defined(PETSC_HAVE_DEVICE) 1502 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1503 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1504 #endif 1505 PetscMPIInt minC, maxC; 1506 PetscMPIInt size, rank; 1507 PetscBool *localStageUsed, *stageUsed; 1508 PetscBool *localStageVisible, *stageVisible; 1509 int numStages, localNumEvents, numEvents; 1510 int stage, oclass; 1511 PetscLogEvent event; 1512 PetscErrorCode ierr = 0; 1513 char version[256]; 1514 MPI_Comm comm; 1515 #if defined(PETSC_HAVE_DEVICE) 1516 PetscLogEvent eventid; 1517 PetscInt64 nas = 0x7FF0000000000002; 1518 #endif 1519 1520 PetscFunctionBegin; 1521 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1522 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1523 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1524 PetscCallMPI(MPI_Comm_size(comm, &size)); 1525 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1526 /* Get the total elapsed time */ 1527 PetscTime(&locTotalTime); 1528 locTotalTime -= petsc_BaseTime; 1529 1530 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1531 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1532 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1533 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1534 PetscCall(PetscLogViewWarnSync(comm, fd)); 1535 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1536 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1537 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1538 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1539 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1540 PetscCall(PetscGetUserName(username, sizeof(username))); 1541 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1542 PetscCall(PetscGetDate(date, sizeof(date))); 1543 PetscCall(PetscGetVersion(version, sizeof(version))); 1544 if (size == 1) { 1545 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1546 } else { 1547 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1548 } 1549 #if defined(PETSC_HAVE_OPENMP) 1550 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1551 #endif 1552 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1553 1554 /* Must preserve reduction count before we go on */ 1555 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1556 1557 /* Calculate summary information */ 1558 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1559 /* Time */ 1560 PetscCallMPI(MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1561 PetscCallMPI(MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1562 PetscCallMPI(MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1563 avg = tot / ((PetscLogDouble)size); 1564 if (min != 0.0) ratio = max / min; 1565 else ratio = 0.0; 1566 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1567 TotalTime = tot; 1568 /* Objects */ 1569 avg = (PetscLogDouble)petsc_numObjects; 1570 PetscCallMPI(MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1571 PetscCallMPI(MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1572 PetscCallMPI(MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1573 avg = tot / ((PetscLogDouble)size); 1574 if (min != 0.0) ratio = max / min; 1575 else ratio = 0.0; 1576 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1577 /* Flops */ 1578 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1579 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1580 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1581 avg = tot / ((PetscLogDouble)size); 1582 if (min != 0.0) ratio = max / min; 1583 else ratio = 0.0; 1584 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1585 TotalFlops = tot; 1586 /* Flops/sec -- Must talk to Barry here */ 1587 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1588 else flops = 0.0; 1589 PetscCallMPI(MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1590 PetscCallMPI(MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1591 PetscCallMPI(MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1592 avg = tot / ((PetscLogDouble)size); 1593 if (min != 0.0) ratio = max / min; 1594 else ratio = 0.0; 1595 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1596 /* Memory */ 1597 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1598 if (mem > 0.0) { 1599 PetscCallMPI(MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1600 PetscCallMPI(MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1601 PetscCallMPI(MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1602 avg = tot / ((PetscLogDouble)size); 1603 if (min != 0.0) ratio = max / min; 1604 else ratio = 0.0; 1605 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1606 } 1607 /* Messages */ 1608 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1609 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1610 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1611 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1612 avg = tot / ((PetscLogDouble)size); 1613 if (min != 0.0) ratio = max / min; 1614 else ratio = 0.0; 1615 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1616 numMessages = tot; 1617 /* Message Lengths */ 1618 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1619 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1620 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1621 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1622 if (numMessages != 0) avg = tot / numMessages; 1623 else avg = 0.0; 1624 if (min != 0.0) ratio = max / min; 1625 else ratio = 0.0; 1626 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1627 messageLength = tot; 1628 /* Reductions */ 1629 PetscCallMPI(MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1630 PetscCallMPI(MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1631 PetscCallMPI(MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1632 if (min != 0.0) ratio = max / min; 1633 else ratio = 0.0; 1634 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1635 numReductions = red; /* wrong because uses count from process zero */ 1636 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1637 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1638 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1639 1640 /* Get total number of stages -- 1641 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1642 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1643 This seems best accomplished by assoicating a communicator with each stage. 1644 */ 1645 PetscCall(PetscLogGetStageLog(&stageLog)); 1646 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1647 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1648 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1649 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1650 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1651 if (numStages > 0) { 1652 stageInfo = stageLog->stageInfo; 1653 for (stage = 0; stage < numStages; stage++) { 1654 if (stage < stageLog->numStages) { 1655 localStageUsed[stage] = stageInfo[stage].used; 1656 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1657 } else { 1658 localStageUsed[stage] = PETSC_FALSE; 1659 localStageVisible[stage] = PETSC_TRUE; 1660 } 1661 } 1662 PetscCallMPI(MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1663 PetscCallMPI(MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1664 for (stage = 0; stage < numStages; stage++) { 1665 if (stageUsed[stage]) { 1666 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1667 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1668 break; 1669 } 1670 } 1671 for (stage = 0; stage < numStages; stage++) { 1672 if (!stageUsed[stage]) continue; 1673 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1674 if (localStageUsed[stage]) { 1675 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1676 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1677 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1678 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1679 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1680 name = stageInfo[stage].name; 1681 } else { 1682 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1683 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1684 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1685 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1686 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1687 name = ""; 1688 } 1689 mess *= 0.5; 1690 messLen *= 0.5; 1691 red /= size; 1692 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1693 else fracTime = 0.0; 1694 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1695 else fracFlops = 0.0; 1696 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1697 if (numMessages != 0.0) fracMessages = mess / numMessages; 1698 else fracMessages = 0.0; 1699 if (mess != 0.0) avgMessLen = messLen / mess; 1700 else avgMessLen = 0.0; 1701 if (messageLength != 0.0) fracLength = messLen / messageLength; 1702 else fracLength = 0.0; 1703 if (numReductions != 0.0) fracReductions = red / numReductions; 1704 else fracReductions = 0.0; 1705 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1706 } 1707 } 1708 1709 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1710 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1711 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1712 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1713 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1714 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1715 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1716 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1717 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1718 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1719 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1720 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1721 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1722 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1723 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1724 if (PetscLogMemory) { 1725 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1726 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1727 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1728 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1729 } 1730 #if defined(PETSC_HAVE_DEVICE) 1731 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1732 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1733 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1734 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1735 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1736 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1737 #endif 1738 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1739 1740 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1741 1742 /* Report events */ 1743 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1744 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1745 #if defined(PETSC_HAVE_DEVICE) 1746 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1747 #endif 1748 PetscCall(PetscFPrintf(comm, fd, "\n")); 1749 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1750 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1751 #if defined(PETSC_HAVE_DEVICE) 1752 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1753 #endif 1754 PetscCall(PetscFPrintf(comm, fd, "\n")); 1755 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1756 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1757 #if defined(PETSC_HAVE_DEVICE) 1758 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1759 #endif 1760 PetscCall(PetscFPrintf(comm, fd, "\n")); 1761 1762 #if defined(PETSC_HAVE_DEVICE) 1763 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1764 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1765 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1766 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1767 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1768 #endif 1769 1770 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1771 for (stage = 0; stage < numStages; stage++) { 1772 if (!stageVisible[stage]) continue; 1773 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1774 if (localStageUsed[stage]) { 1775 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1776 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1777 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1778 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1779 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1780 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1781 } else { 1782 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1783 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1784 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1785 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1786 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1787 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1788 } 1789 mess *= 0.5; 1790 messLen *= 0.5; 1791 red /= size; 1792 1793 /* Get total number of events in this stage -- 1794 Currently, a single processor can register more events than another, but events must all be registered in order, 1795 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1796 on the event ID. This seems best accomplished by associating a communicator with each stage. 1797 1798 Problem: If the event did not happen on proc 1, its name will not be available. 1799 Problem: Event visibility is not implemented 1800 */ 1801 if (localStageUsed[stage]) { 1802 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1803 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1804 } else localNumEvents = 0; 1805 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1806 for (event = 0; event < numEvents; event++) { 1807 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1808 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1809 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1810 else flopr = 0.0; 1811 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1812 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1813 PetscCallMPI(MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1814 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1815 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1816 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1817 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1818 PetscCallMPI(MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1819 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1820 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1821 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1822 if (PetscLogMemory) { 1823 PetscCallMPI(MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1824 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1825 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1826 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1827 } 1828 #if defined(PETSC_HAVE_DEVICE) 1829 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1830 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1831 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1832 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1833 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1834 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1835 #endif 1836 name = stageLog->eventLog->eventInfo[event].name; 1837 } else { 1838 flopr = 0.0; 1839 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1840 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1841 PetscCallMPI(MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1842 PetscCallMPI(MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1843 PetscCallMPI(MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1844 PetscCallMPI(MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1845 PetscCallMPI(MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1846 PetscCallMPI(MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1847 PetscCallMPI(MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1848 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1849 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1850 if (PetscLogMemory) { 1851 PetscCallMPI(MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1852 PetscCallMPI(MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1853 PetscCallMPI(MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1854 PetscCallMPI(MPI_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1855 } 1856 #if defined(PETSC_HAVE_DEVICE) 1857 PetscCallMPI(MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1858 PetscCallMPI(MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1859 PetscCallMPI(MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1860 PetscCallMPI(MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1861 PetscCallMPI(MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1862 PetscCallMPI(MPI_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1863 #endif 1864 name = ""; 1865 } 1866 if (mint < 0.0) { 1867 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 1868 mint = 0; 1869 } 1870 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 1871 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1872 #if defined(PETSC_HAVE_DEVICE) 1873 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1874 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 1875 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 1876 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 1877 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 1878 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 1879 } 1880 } 1881 #endif 1882 totm *= 0.5; 1883 totml *= 0.5; 1884 totr /= size; 1885 1886 if (maxC != 0) { 1887 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 1888 else ratC = 0.0; 1889 if (mint != 0.0) ratt = maxt / mint; 1890 else ratt = 0.0; 1891 if (minf != 0.0) ratf = maxf / minf; 1892 else ratf = 0.0; 1893 if (TotalTime != 0.0) fracTime = tott / TotalTime; 1894 else fracTime = 0.0; 1895 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 1896 else fracFlops = 0.0; 1897 if (stageTime != 0.0) fracStageTime = tott / stageTime; 1898 else fracStageTime = 0.0; 1899 if (flops != 0.0) fracStageFlops = totf / flops; 1900 else fracStageFlops = 0.0; 1901 if (numMessages != 0.0) fracMess = totm / numMessages; 1902 else fracMess = 0.0; 1903 if (messageLength != 0.0) fracMessLen = totml / messageLength; 1904 else fracMessLen = 0.0; 1905 if (numReductions != 0.0) fracRed = totr / numReductions; 1906 else fracRed = 0.0; 1907 if (mess != 0.0) fracStageMess = totm / mess; 1908 else fracStageMess = 0.0; 1909 if (messLen != 0.0) fracStageMessLen = totml / messLen; 1910 else fracStageMessLen = 0.0; 1911 if (red != 0.0) fracStageRed = totr / red; 1912 else fracStageRed = 0.0; 1913 if (totm != 0.0) totml /= totm; 1914 else totml = 0.0; 1915 if (maxt != 0.0) flopr = totf / maxt; 1916 else flopr = 0.0; 1917 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 1918 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 1919 else 1920 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 1921 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 1922 #if defined(PETSC_HAVE_DEVICE) 1923 if (totf != 0.0) fracgflops = gflops / totf; 1924 else fracgflops = 0.0; 1925 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 1926 else gflopr = 0.0; 1927 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1928 #endif 1929 PetscCall(PetscFPrintf(comm, fd, "\n")); 1930 } 1931 } 1932 } 1933 1934 /* Memory usage and object creation */ 1935 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1936 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1937 #if defined(PETSC_HAVE_DEVICE) 1938 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1939 #endif 1940 PetscCall(PetscFPrintf(comm, fd, "\n")); 1941 PetscCall(PetscFPrintf(comm, fd, "\n")); 1942 1943 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1944 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1945 stats for stages local to processor sets. 1946 */ 1947 /* We should figure out the longest object name here (now 20 characters) */ 1948 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1949 for (stage = 0; stage < numStages; stage++) { 1950 if (localStageUsed[stage]) { 1951 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 1952 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1953 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 1954 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 1955 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 1956 } 1957 } 1958 } else { 1959 if (!localStageVisible[stage]) continue; 1960 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1961 } 1962 } 1963 1964 PetscCall(PetscFree(localStageUsed)); 1965 PetscCall(PetscFree(stageUsed)); 1966 PetscCall(PetscFree(localStageVisible)); 1967 PetscCall(PetscFree(stageVisible)); 1968 1969 /* Information unrelated to this particular run */ 1970 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1971 PetscTime(&y); 1972 PetscTime(&x); 1973 PetscTime(&y); 1974 PetscTime(&y); 1975 PetscTime(&y); 1976 PetscTime(&y); 1977 PetscTime(&y); 1978 PetscTime(&y); 1979 PetscTime(&y); 1980 PetscTime(&y); 1981 PetscTime(&y); 1982 PetscTime(&y); 1983 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 1984 /* MPI information */ 1985 if (size > 1) { 1986 MPI_Status status; 1987 PetscMPIInt tag; 1988 MPI_Comm newcomm; 1989 1990 PetscCallMPI(MPI_Barrier(comm)); 1991 PetscTime(&x); 1992 PetscCallMPI(MPI_Barrier(comm)); 1993 PetscCallMPI(MPI_Barrier(comm)); 1994 PetscCallMPI(MPI_Barrier(comm)); 1995 PetscCallMPI(MPI_Barrier(comm)); 1996 PetscCallMPI(MPI_Barrier(comm)); 1997 PetscTime(&y); 1998 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 1999 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 2000 PetscCallMPI(MPI_Barrier(comm)); 2001 if (rank) { 2002 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 2003 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 2004 } else { 2005 PetscTime(&x); 2006 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 2007 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 2008 PetscTime(&y); 2009 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 2010 } 2011 PetscCall(PetscCommDestroy(&newcomm)); 2012 } 2013 PetscCall(PetscOptionsView(NULL, viewer)); 2014 2015 /* Machine and compile information */ 2016 #if defined(PETSC_USE_FORTRAN_KERNELS) 2017 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 2018 #else 2019 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 2020 #endif 2021 #if defined(PETSC_USE_64BIT_INDICES) 2022 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n")); 2023 #elif defined(PETSC_USE___FLOAT128) 2024 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n")); 2025 #endif 2026 #if defined(PETSC_USE_REAL_SINGLE) 2027 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2028 #elif defined(PETSC_USE___FLOAT128) 2029 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2030 #endif 2031 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2032 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2033 #else 2034 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2035 #endif 2036 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 2037 2038 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 2039 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2040 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2041 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2042 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2043 2044 /* Cleanup */ 2045 PetscCall(PetscFPrintf(comm, fd, "\n")); 2046 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 2047 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 2048 PetscCall(PetscFPTrapPop()); 2049 PetscFunctionReturn(0); 2050 } 2051 2052 /*@C 2053 PetscLogView - Prints a summary of the logging. 2054 2055 Collective over MPI_Comm 2056 2057 Input Parameter: 2058 . viewer - an ASCII viewer 2059 2060 Options Database Keys: 2061 + -log_view [:filename] - Prints summary of log information 2062 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2063 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2064 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2065 . -log_view_memory - Also display memory usage in each event 2066 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2067 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2068 - -log_trace [filename] - Displays a trace of what each process is doing 2069 2070 Notes: 2071 It is possible to control the logging programatically but we recommend using the options database approach whenever possible 2072 By default the summary is printed to stdout. 2073 2074 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2075 2076 If PETSc is configured with --with-logging=0 then this functionality is not available 2077 2078 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2079 directory then open filename.xml with your browser. Specific notes for certain browsers 2080 $ Firefox and Internet explorer - simply open the file 2081 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2082 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2083 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2084 your browser. 2085 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2086 window and render the XML log file contents. 2087 2088 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2089 2090 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2091 or using speedscope (https://www.speedscope.app). 2092 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2093 2094 Level: beginner 2095 2096 .seealso: `PetscLogDefaultBegin()`, `PetscLogDump()` 2097 @*/ 2098 PetscErrorCode PetscLogView(PetscViewer viewer) 2099 { 2100 PetscBool isascii; 2101 PetscViewerFormat format; 2102 int stage, lastStage; 2103 PetscStageLog stageLog; 2104 2105 PetscFunctionBegin; 2106 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2107 /* Pop off any stages the user forgot to remove */ 2108 lastStage = 0; 2109 PetscCall(PetscLogGetStageLog(&stageLog)); 2110 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2111 while (stage >= 0) { 2112 lastStage = stage; 2113 PetscCall(PetscStageLogPop(stageLog)); 2114 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2115 } 2116 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2117 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2118 PetscCall(PetscViewerGetFormat(viewer, &format)); 2119 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2120 PetscCall(PetscLogView_Default(viewer)); 2121 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2122 PetscCall(PetscLogView_Detailed(viewer)); 2123 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2124 PetscCall(PetscLogView_CSV(viewer)); 2125 } else if (format == PETSC_VIEWER_ASCII_XML) { 2126 PetscCall(PetscLogView_Nested(viewer)); 2127 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2128 PetscCall(PetscLogView_Flamegraph(viewer)); 2129 } 2130 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 /*@C 2135 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2136 2137 Collective on `PETSC_COMM_WORLD` 2138 2139 Level: developer 2140 2141 .seealso: `PetscLogView()` 2142 @*/ 2143 PetscErrorCode PetscLogViewFromOptions(void) 2144 { 2145 PetscViewer viewer; 2146 PetscBool flg; 2147 PetscViewerFormat format; 2148 2149 PetscFunctionBegin; 2150 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2151 if (flg) { 2152 PetscCall(PetscViewerPushFormat(viewer, format)); 2153 PetscCall(PetscLogView(viewer)); 2154 PetscCall(PetscViewerPopFormat(viewer)); 2155 PetscCall(PetscViewerDestroy(&viewer)); 2156 } 2157 PetscFunctionReturn(0); 2158 } 2159 2160 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2161 /*@C 2162 PetscGetFlops - Returns the number of flops used on this processor 2163 since the program began. 2164 2165 Not Collective 2166 2167 Output Parameter: 2168 flops - number of floating point operations 2169 2170 Notes: 2171 A global counter logs all PETSc flop counts. The user can use 2172 `PetscLogFlops()` to increment this counter to include flops for the 2173 application code. 2174 2175 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2176 2177 Level: intermediate 2178 2179 .seealso: `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2180 @*/ 2181 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2182 { 2183 PetscFunctionBegin; 2184 *flops = petsc_TotalFlops; 2185 PetscFunctionReturn(0); 2186 } 2187 2188 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2189 { 2190 size_t fullLength; 2191 va_list Argp; 2192 2193 PetscFunctionBegin; 2194 if (!petsc_logObjects) PetscFunctionReturn(0); 2195 va_start(Argp, format); 2196 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2197 va_end(Argp); 2198 PetscFunctionReturn(0); 2199 } 2200 2201 /*MC 2202 PetscLogFlops - Adds floating point operations to the global counter. 2203 2204 Synopsis: 2205 #include <petsclog.h> 2206 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2207 2208 Not Collective 2209 2210 Input Parameter: 2211 . f - flop counter 2212 2213 Usage: 2214 .vb 2215 PetscLogEvent USER_EVENT; 2216 PetscLogEventRegister("User event",0,&USER_EVENT); 2217 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2218 [code segment to monitor] 2219 PetscLogFlops(user_flops) 2220 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2221 .ve 2222 2223 Note: 2224 A global counter logs all PETSc flop counts. The user can use 2225 PetscLogFlops() to increment this counter to include flops for the 2226 application code. 2227 2228 Level: intermediate 2229 2230 .seealso: `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2231 M*/ 2232 2233 /*MC 2234 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2235 to get accurate timings 2236 2237 Synopsis: 2238 #include <petsclog.h> 2239 void PetscPreLoadBegin(PetscBool flag,char *name); 2240 2241 Not Collective 2242 2243 Input Parameters: 2244 + flag - PETSC_TRUE to run twice, `PETSC_FALSE` to run once, may be overridden 2245 with command line option -preload true or -preload false 2246 - name - name of first stage (lines of code timed separately with -log_view) to 2247 be preloaded 2248 2249 Usage: 2250 .vb 2251 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2252 lines of code 2253 PetscPreLoadStage("second stage"); 2254 lines of code 2255 PetscPreLoadEnd(); 2256 .ve 2257 2258 Note: 2259 Only works in C/C++, not Fortran 2260 2261 Flags available within the macro. 2262 + PetscPreLoadingUsed - true if we are or have done preloading 2263 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2264 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2265 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2266 The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin() 2267 and PetscPreLoadEnd() 2268 2269 Level: intermediate 2270 2271 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2272 M*/ 2273 2274 /*MC 2275 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2276 to get accurate timings 2277 2278 Synopsis: 2279 #include <petsclog.h> 2280 void PetscPreLoadEnd(void); 2281 2282 Not Collective 2283 2284 Usage: 2285 .vb 2286 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2287 lines of code 2288 PetscPreLoadStage("second stage"); 2289 lines of code 2290 PetscPreLoadEnd(); 2291 .ve 2292 2293 Note: 2294 Only works in C/C++ not fortran 2295 2296 Level: intermediate 2297 2298 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2299 M*/ 2300 2301 /*MC 2302 PetscPreLoadStage - Start a new segment of code to be timed separately. 2303 to get accurate timings 2304 2305 Synopsis: 2306 #include <petsclog.h> 2307 void PetscPreLoadStage(char *name); 2308 2309 Not Collective 2310 2311 Usage: 2312 .vb 2313 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2314 lines of code 2315 PetscPreLoadStage("second stage"); 2316 lines of code 2317 PetscPreLoadEnd(); 2318 .ve 2319 2320 Note: 2321 Only works in C/C++ not fortran 2322 2323 Level: intermediate 2324 2325 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2326 M*/ 2327 2328 #if PetscDefined(HAVE_DEVICE) 2329 #include <petsc/private/deviceimpl.h> 2330 2331 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2332 2333 /* 2334 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2335 because it will result in timing results that cannot be interpreted. 2336 */ 2337 static PetscErrorCode PetscLogGpuTime_Off(void) 2338 { 2339 PetscLogGpuTimeFlag = PETSC_FALSE; 2340 return 0; 2341 } 2342 2343 /*@C 2344 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2345 2346 Options Database Key: 2347 . -log_view_gpu_time - provide the GPU times in the -log_view output 2348 2349 Notes: 2350 Turning on the timing of the 2351 GPU kernels can slow down the entire computation and should only be used when studying the performance 2352 of operations on GPU such as vector operations and matrix-vector operations. 2353 2354 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2355 2356 Level: advanced 2357 2358 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2359 @*/ 2360 PetscErrorCode PetscLogGpuTime(void) 2361 { 2362 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2363 PetscLogGpuTimeFlag = PETSC_TRUE; 2364 return 0; 2365 } 2366 2367 /*@C 2368 PetscLogGpuTimeBegin - Start timer for device 2369 2370 Notes: 2371 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2372 2373 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2374 2375 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()` 2376 2377 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2378 2379 The regular logging captures the time for data transfers and any CPU activites during the event 2380 2381 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2382 2383 Developer Notes: 2384 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2385 2386 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the 2387 event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution, 2388 but not continued GPU excution, until the timer event is recorded. 2389 2390 Level: intermediate 2391 2392 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2393 @*/ 2394 PetscErrorCode PetscLogGpuTimeBegin(void) 2395 { 2396 PetscFunctionBegin; 2397 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2398 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2399 PetscDeviceContext dctx; 2400 2401 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2402 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2403 } else { 2404 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2405 } 2406 PetscFunctionReturn(0); 2407 } 2408 2409 /*@C 2410 PetscLogGpuTimeEnd - Stop timer for device 2411 2412 Level: intermediate 2413 2414 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2415 @*/ 2416 PetscErrorCode PetscLogGpuTimeEnd(void) 2417 { 2418 PetscFunctionBegin; 2419 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2420 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2421 PetscDeviceContext dctx; 2422 PetscLogDouble elapsed; 2423 2424 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2425 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2426 petsc_gtime += (elapsed / 1000.0); 2427 } else { 2428 PetscCall(PetscTimeAdd(&petsc_gtime)); 2429 } 2430 PetscFunctionReturn(0); 2431 } 2432 #endif /* end of PETSC_HAVE_DEVICE */ 2433 2434 #else /* end of -DPETSC_USE_LOG section */ 2435 2436 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2437 { 2438 PetscFunctionBegin; 2439 PetscFunctionReturn(0); 2440 } 2441 2442 #endif /* PETSC_USE_LOG*/ 2443 2444 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2445 PetscClassId PETSC_OBJECT_CLASSID = 0; 2446 2447 /*@C 2448 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2449 2450 Not Collective 2451 2452 Input Parameter: 2453 . name - The class name 2454 2455 Output Parameter: 2456 . oclass - The class id or classid 2457 2458 Level: developer 2459 2460 .seealso: `PetscLogEventRegister()` 2461 @*/ 2462 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) 2463 { 2464 #if defined(PETSC_USE_LOG) 2465 PetscStageLog stageLog; 2466 PetscInt stage; 2467 #endif 2468 2469 PetscFunctionBegin; 2470 *oclass = ++PETSC_LARGEST_CLASSID; 2471 #if defined(PETSC_USE_LOG) 2472 PetscCall(PetscLogGetStageLog(&stageLog)); 2473 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2474 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2475 #endif 2476 PetscFunctionReturn(0); 2477 } 2478 2479 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2480 #include <mpe.h> 2481 2482 PetscBool PetscBeganMPE = PETSC_FALSE; 2483 2484 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2485 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2486 2487 /*@C 2488 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2489 and slows the program down. 2490 2491 Collective over `PETSC_COMM_WORLD` 2492 2493 Options Database Key: 2494 . -log_mpe - Prints extensive log information 2495 2496 Note: 2497 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 2498 intended for production runs since it logs only flop rates and object 2499 creation (and should not significantly slow the programs). 2500 2501 Level: advanced 2502 2503 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2504 `PetscLogEventDeactivate()` 2505 @*/ 2506 PetscErrorCode PetscLogMPEBegin(void) 2507 { 2508 PetscFunctionBegin; 2509 /* Do MPE initialization */ 2510 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2511 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2512 PetscCall(MPE_Init_log()); 2513 2514 PetscBeganMPE = PETSC_TRUE; 2515 } else { 2516 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2517 } 2518 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 /*@C 2523 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2524 2525 Collective over `PETSC_COMM_WORLD` 2526 2527 Level: advanced 2528 2529 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2530 @*/ 2531 PetscErrorCode PetscLogMPEDump(const char sname[]) 2532 { 2533 char name[PETSC_MAX_PATH_LEN]; 2534 2535 PetscFunctionBegin; 2536 if (PetscBeganMPE) { 2537 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2538 if (sname) { 2539 PetscCall(PetscStrcpy(name, sname)); 2540 } else { 2541 PetscCall(PetscGetProgramName(name, sizeof(name))); 2542 } 2543 PetscCall(MPE_Finish_log(name)); 2544 } else { 2545 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2546 } 2547 PetscFunctionReturn(0); 2548 } 2549 2550 #define PETSC_RGB_COLORS_MAX 39 2551 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2552 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2553 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2554 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2555 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2556 2557 /*@C 2558 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2559 2560 Not collective. Maybe it should be? 2561 2562 Output Parameter: 2563 . str - character string representing the color 2564 2565 Level: developer 2566 2567 .seealso: `PetscLogEventRegister()` 2568 @*/ 2569 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2570 { 2571 static int idx = 0; 2572 2573 PetscFunctionBegin; 2574 *str = PetscLogMPERGBColors[idx]; 2575 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2576 PetscFunctionReturn(0); 2577 } 2578 2579 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2580