1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 15 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 16 17 #if defined(PETSC_USE_LOG) 18 #include <petscmachineinfo.h> 19 #include <petscconfiginfo.h> 20 21 /* used in the MPI_XXX() count macros in petsclog.h */ 22 23 /* Action and object logging variables */ 24 Action *petsc_actions = NULL; 25 Object *petsc_objects = NULL; 26 PetscBool petsc_logActions = PETSC_FALSE; 27 PetscBool petsc_logObjects = PETSC_FALSE; 28 int petsc_numActions = 0, petsc_maxActions = 100; 29 int petsc_numObjects = 0, petsc_maxObjects = 100; 30 int petsc_numObjectsDestroyed = 0; 31 32 /* Global counters */ 33 PetscLogDouble petsc_BaseTime = 0.0; 34 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 35 PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */ 36 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 37 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 38 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 39 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 40 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 41 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 42 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 43 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 44 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 45 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 46 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 47 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 48 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 49 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 50 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 51 #if defined(PETSC_HAVE_DEVICE) 52 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 53 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 54 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 55 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 56 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 57 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 58 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 59 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 60 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 61 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 62 #endif 63 64 /* Logging functions */ 65 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 66 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 67 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 68 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 69 70 /* Tracing event logging variables */ 71 FILE *petsc_tracefile = NULL; 72 int petsc_tracelevel = 0; 73 const char *petsc_traceblanks = " "; 74 char petsc_tracespace[128] = " "; 75 PetscLogDouble petsc_tracetime = 0.0; 76 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 77 78 static PetscIntStack current_log_event_stack = NULL; 79 80 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 81 { 82 int stage; 83 PetscBool opt; 84 85 PetscFunctionBegin; 86 if (PetscLogInitializeCalled) PetscFunctionReturn(0); 87 PetscLogInitializeCalled = PETSC_TRUE; 88 89 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 90 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 91 if (opt) petsc_logActions = PETSC_FALSE; 92 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 93 if (opt) petsc_logObjects = PETSC_FALSE; 94 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 95 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 96 PetscLogPHC = PetscLogObjCreateDefault; 97 PetscLogPHD = PetscLogObjDestroyDefault; 98 /* Setup default logging structures */ 99 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 100 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 101 102 /* All processors sync here for more consistent logging */ 103 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 104 PetscTime(&petsc_BaseTime); 105 PetscCall(PetscLogStagePush(stage)); 106 PetscFunctionReturn(0); 107 } 108 109 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 110 { 111 PetscStageLog stageLog; 112 113 PetscFunctionBegin; 114 PetscCall(PetscFree(petsc_actions)); 115 PetscCall(PetscFree(petsc_objects)); 116 PetscCall(PetscLogNestedEnd()); 117 PetscCall(PetscLogSet(NULL, NULL)); 118 119 /* Resetting phase */ 120 PetscCall(PetscLogGetStageLog(&stageLog)); 121 PetscCall(PetscStageLogDestroy(stageLog)); 122 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 123 current_log_event_stack = NULL; 124 125 petsc_TotalFlops = 0.0; 126 petsc_numActions = 0; 127 petsc_numObjects = 0; 128 petsc_numObjectsDestroyed = 0; 129 petsc_maxActions = 100; 130 petsc_maxObjects = 100; 131 petsc_actions = NULL; 132 petsc_objects = NULL; 133 petsc_logActions = PETSC_FALSE; 134 petsc_logObjects = PETSC_FALSE; 135 petsc_BaseTime = 0.0; 136 petsc_TotalFlops = 0.0; 137 petsc_tmp_flops = 0.0; 138 petsc_send_ct = 0.0; 139 petsc_recv_ct = 0.0; 140 petsc_send_len = 0.0; 141 petsc_recv_len = 0.0; 142 petsc_isend_ct = 0.0; 143 petsc_irecv_ct = 0.0; 144 petsc_isend_len = 0.0; 145 petsc_irecv_len = 0.0; 146 petsc_wait_ct = 0.0; 147 petsc_wait_any_ct = 0.0; 148 petsc_wait_all_ct = 0.0; 149 petsc_sum_of_waits_ct = 0.0; 150 petsc_allreduce_ct = 0.0; 151 petsc_gather_ct = 0.0; 152 petsc_scatter_ct = 0.0; 153 #if defined(PETSC_HAVE_DEVICE) 154 petsc_ctog_ct = 0.0; 155 petsc_gtoc_ct = 0.0; 156 petsc_ctog_sz = 0.0; 157 petsc_gtoc_sz = 0.0; 158 petsc_gflops = 0.0; 159 petsc_gtime = 0.0; 160 #endif 161 PETSC_LARGEST_EVENT = PETSC_EVENT; 162 PetscLogPHC = NULL; 163 PetscLogPHD = NULL; 164 petsc_tracefile = NULL; 165 petsc_tracelevel = 0; 166 petsc_traceblanks = " "; 167 petsc_tracespace[0] = ' '; 168 petsc_tracespace[1] = 0; 169 petsc_tracetime = 0.0; 170 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 171 PETSC_OBJECT_CLASSID = 0; 172 petsc_stageLog = NULL; 173 PetscLogInitializeCalled = PETSC_FALSE; 174 PetscFunctionReturn(0); 175 } 176 177 /*@C 178 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 179 180 Not Collective 181 182 Input Parameters: 183 + b - The function called at beginning of event 184 - e - The function called at end of event 185 186 Level: developer 187 188 Developer Note: 189 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 190 191 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 192 @*/ 193 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 194 { 195 PetscFunctionBegin; 196 PetscLogPLB = b; 197 PetscLogPLE = e; 198 PetscFunctionReturn(0); 199 } 200 201 /*@C 202 PetscLogIsActive - Check if logging is currently in progress. 203 204 Not Collective 205 206 Output Parameter: 207 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 208 209 Level: beginner 210 211 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 212 @*/ 213 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 214 { 215 PetscFunctionBegin; 216 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 217 PetscFunctionReturn(0); 218 } 219 220 /*@C 221 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 222 rates and object creation and should not slow programs down too much. 223 This routine may be called more than once. 224 225 Logically Collective over `PETSC_COMM_WORLD` 226 227 Options Database Key: 228 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 229 screen (for code configured with --with-log=1 (which is the default)) 230 231 Usage: 232 .vb 233 PetscInitialize(...); 234 PetscLogDefaultBegin(); 235 ... code ... 236 PetscLogView(viewer); or PetscLogDump(); 237 PetscFinalize(); 238 .ve 239 240 Note: 241 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 242 the logging information. 243 244 Level: advanced 245 246 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 247 @*/ 248 PetscErrorCode PetscLogDefaultBegin(void) 249 { 250 PetscFunctionBegin; 251 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 252 PetscFunctionReturn(0); 253 } 254 255 /*@C 256 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 257 all events. This creates large log files and slows the program down. 258 259 Logically Collective on `PETSC_COMM_WORLD` 260 261 Options Database Key: 262 . -log_all - Prints extensive log information 263 264 Usage: 265 .vb 266 PetscInitialize(...); 267 PetscLogAllBegin(); 268 ... code ... 269 PetscLogDump(filename); 270 PetscFinalize(); 271 .ve 272 273 Note: 274 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 275 intended for production runs since it logs only flop rates and object 276 creation (and shouldn't significantly slow the programs). 277 278 Level: advanced 279 280 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 281 @*/ 282 PetscErrorCode PetscLogAllBegin(void) 283 { 284 PetscFunctionBegin; 285 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 286 PetscFunctionReturn(0); 287 } 288 289 /*@C 290 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 291 begins or ends, the event name is printed. 292 293 Logically Collective on `PETSC_COMM_WORLD` 294 295 Input Parameter: 296 . file - The file to print trace in (e.g. stdout) 297 298 Options Database Key: 299 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 300 301 Notes: 302 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 303 then "Event begin:" or "Event end:" followed by the event name. 304 305 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 306 to determine where a program is hanging without running in the 307 debugger. Can be used in conjunction with the -info option. 308 309 Level: intermediate 310 311 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 312 @*/ 313 PetscErrorCode PetscLogTraceBegin(FILE *file) 314 { 315 PetscFunctionBegin; 316 petsc_tracefile = file; 317 318 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 319 PetscFunctionReturn(0); 320 } 321 322 /*@ 323 PetscLogActions - Determines whether actions are logged for the graphical viewer. 324 325 Not Collective 326 327 Input Parameter: 328 . flag - `PETSC_TRUE` if actions are to be logged 329 330 Options Database Key: 331 . -log_exclude_actions - Turns off actions logging 332 333 Level: intermediate 334 335 Note: 336 Logging of actions continues to consume more memory as the program 337 runs. Long running programs should consider turning this feature off. 338 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 339 @*/ 340 PetscErrorCode PetscLogActions(PetscBool flag) 341 { 342 PetscFunctionBegin; 343 petsc_logActions = flag; 344 PetscFunctionReturn(0); 345 } 346 347 /*@ 348 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 349 350 Not Collective 351 352 Input Parameter: 353 . flag - `PETSC_TRUE` if objects are to be logged 354 355 Options Database Key: 356 . -log_exclude_objects - Turns off objects logging 357 358 Level: intermediate 359 360 Note: 361 Logging of objects continues to consume more memory as the program 362 runs. Long running programs should consider turning this feature off. 363 364 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 365 @*/ 366 PetscErrorCode PetscLogObjects(PetscBool flag) 367 { 368 PetscFunctionBegin; 369 petsc_logObjects = flag; 370 PetscFunctionReturn(0); 371 } 372 373 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 374 /*@C 375 PetscLogStageRegister - Attaches a character string name to a logging stage. 376 377 Not Collective 378 379 Input Parameter: 380 . sname - The name to associate with that stage 381 382 Output Parameter: 383 . stage - The stage number 384 385 Level: intermediate 386 387 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 388 @*/ 389 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) 390 { 391 PetscStageLog stageLog; 392 PetscLogEvent event; 393 394 PetscFunctionBegin; 395 PetscCall(PetscLogGetStageLog(&stageLog)); 396 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 397 /* Copy events already changed in the main stage, this sucks */ 398 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 399 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 400 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 401 PetscFunctionReturn(0); 402 } 403 404 /*@C 405 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 406 407 Not Collective 408 409 Input Parameter: 410 . stage - The stage on which to log 411 412 Usage: 413 If the option -log_view is used to run the program containing the 414 following code, then 2 sets of summary data will be printed during 415 PetscFinalize(). 416 .vb 417 PetscInitialize(int *argc,char ***args,0,0); 418 [stage 0 of code] 419 PetscLogStagePush(1); 420 [stage 1 of code] 421 PetscLogStagePop(); 422 PetscBarrier(...); 423 [more stage 0 of code] 424 PetscFinalize(); 425 .ve 426 427 Note: 428 Use `PetscLogStageRegister()` to register a stage. 429 430 Level: intermediate 431 432 .seealso: `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 433 @*/ 434 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 435 { 436 PetscStageLog stageLog; 437 438 PetscFunctionBegin; 439 PetscCall(PetscLogGetStageLog(&stageLog)); 440 PetscCall(PetscStageLogPush(stageLog, stage)); 441 PetscFunctionReturn(0); 442 } 443 444 /*@C 445 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 446 447 Not Collective 448 449 Usage: 450 If the option -log_view is used to run the program containing the 451 following code, then 2 sets of summary data will be printed during 452 PetscFinalize(). 453 .vb 454 PetscInitialize(int *argc,char ***args,0,0); 455 [stage 0 of code] 456 PetscLogStagePush(1); 457 [stage 1 of code] 458 PetscLogStagePop(); 459 PetscBarrier(...); 460 [more stage 0 of code] 461 PetscFinalize(); 462 .ve 463 464 Level: intermediate 465 466 .seealso: `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 467 @*/ 468 PetscErrorCode PetscLogStagePop(void) 469 { 470 PetscStageLog stageLog; 471 472 PetscFunctionBegin; 473 PetscCall(PetscLogGetStageLog(&stageLog)); 474 PetscCall(PetscStageLogPop(stageLog)); 475 PetscFunctionReturn(0); 476 } 477 478 /*@ 479 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 480 481 Not Collective 482 483 Input Parameters: 484 + stage - The stage 485 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 486 487 Level: intermediate 488 489 Note: 490 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 491 492 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 493 @*/ 494 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 495 { 496 PetscStageLog stageLog; 497 498 PetscFunctionBegin; 499 PetscCall(PetscLogGetStageLog(&stageLog)); 500 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 501 PetscFunctionReturn(0); 502 } 503 504 /*@ 505 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 506 507 Not Collective 508 509 Input Parameter: 510 . stage - The stage 511 512 Output Parameter: 513 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 514 515 Level: intermediate 516 517 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 518 @*/ 519 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 520 { 521 PetscStageLog stageLog; 522 523 PetscFunctionBegin; 524 PetscCall(PetscLogGetStageLog(&stageLog)); 525 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 526 PetscFunctionReturn(0); 527 } 528 529 /*@ 530 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 531 532 Not Collective 533 534 Input Parameters: 535 + stage - The stage 536 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 537 538 Level: intermediate 539 540 Developer Note: 541 What does visible mean, needs to be documented. 542 543 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 544 @*/ 545 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 546 { 547 PetscStageLog stageLog; 548 549 PetscFunctionBegin; 550 PetscCall(PetscLogGetStageLog(&stageLog)); 551 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 552 PetscFunctionReturn(0); 553 } 554 555 /*@ 556 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 557 558 Not Collective 559 560 Input Parameter: 561 . stage - The stage 562 563 Output Parameter: 564 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 565 566 Level: intermediate 567 568 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 569 @*/ 570 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 571 { 572 PetscStageLog stageLog; 573 574 PetscFunctionBegin; 575 PetscCall(PetscLogGetStageLog(&stageLog)); 576 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 577 PetscFunctionReturn(0); 578 } 579 580 /*@C 581 PetscLogStageGetId - Returns the stage id when given the stage name. 582 583 Not Collective 584 585 Input Parameter: 586 . name - The stage name 587 588 Output Parameter: 589 . stage - The stage, , or -1 if no stage with that name exists 590 591 Level: intermediate 592 593 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 594 @*/ 595 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 596 { 597 PetscStageLog stageLog; 598 599 PetscFunctionBegin; 600 PetscCall(PetscLogGetStageLog(&stageLog)); 601 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 602 PetscFunctionReturn(0); 603 } 604 605 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 606 607 /*@C 608 PetscLogEventRegister - Registers an event name for logging operations 609 610 Not Collective 611 612 Input Parameters: 613 + name - The name associated with the event 614 - classid - The classid associated to the class for this event, obtain either with 615 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 616 are only available in C code 617 618 Output Parameter: 619 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 620 621 Example of Usage: 622 .vb 623 PetscLogEvent USER_EVENT; 624 PetscClassId classid; 625 PetscLogDouble user_event_flops; 626 PetscClassIdRegister("class name",&classid); 627 PetscLogEventRegister("User event name",classid,&USER_EVENT); 628 PetscLogEventBegin(USER_EVENT,0,0,0,0); 629 [code segment to monitor] 630 PetscLogFlops(user_event_flops); 631 PetscLogEventEnd(USER_EVENT,0,0,0,0); 632 .ve 633 634 Notes: 635 PETSc automatically logs library events if the code has been 636 configured with --with-log (which is the default) and 637 -log_view or -log_all is specified. `PetscLogEventRegister()` is 638 intended for logging user events to supplement this PETSc 639 information. 640 641 PETSc can gather data for use with the utilities Jumpshot 642 (part of the MPICH distribution). If PETSc has been compiled 643 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 644 MPICH), the user can employ another command line option, -log_mpe, 645 to create a logfile, "mpe.log", which can be visualized 646 Jumpshot. 647 648 The classid is associated with each event so that classes of events 649 can be disabled simultaneously, such as all matrix events. The user 650 can either use an existing classid, such as `MAT_CLASSID`, or create 651 their own as shown in the example. 652 653 If an existing event with the same name exists, its event handle is 654 returned instead of creating a new event. 655 656 Level: intermediate 657 658 .seealso: `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 659 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 660 @*/ 661 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) 662 { 663 PetscStageLog stageLog; 664 int stage; 665 666 PetscFunctionBegin; 667 *event = PETSC_DECIDE; 668 PetscCall(PetscLogGetStageLog(&stageLog)); 669 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 670 if (*event > 0) PetscFunctionReturn(0); 671 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 672 for (stage = 0; stage < stageLog->numStages; stage++) { 673 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 674 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 675 } 676 PetscFunctionReturn(0); 677 } 678 679 /*@ 680 PetscLogEventSetCollective - Indicates that a particular event is collective. 681 682 Not Collective 683 684 Input Parameters: 685 + event - The event id 686 - collective - Bolean flag indicating whether a particular event is collective 687 688 Notes: 689 New events returned from `PetscLogEventRegister()` are collective by default. 690 691 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 692 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 693 to be performed. This option is useful to debug imbalance within the computations or communications 694 695 Level: developer 696 697 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 698 @*/ 699 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) 700 { 701 PetscStageLog stageLog; 702 PetscEventRegLog eventRegLog; 703 704 PetscFunctionBegin; 705 PetscCall(PetscLogGetStageLog(&stageLog)); 706 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 707 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 708 eventRegLog->eventInfo[event].collective = collective; 709 PetscFunctionReturn(0); 710 } 711 712 /*@ 713 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 714 715 Not Collective 716 717 Input Parameter: 718 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 719 720 Level: developer 721 722 .seealso: `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 723 @*/ 724 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 725 { 726 PetscStageLog stageLog; 727 int stage; 728 729 PetscFunctionBegin; 730 PetscCall(PetscLogGetStageLog(&stageLog)); 731 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 732 PetscFunctionReturn(0); 733 } 734 735 /*@ 736 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 737 738 Not Collective 739 740 Input Parameter: 741 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 742 743 Level: developer 744 745 Note: 746 If a class is excluded then events associated with that class are not logged. 747 748 .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 749 @*/ 750 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 751 { 752 PetscStageLog stageLog; 753 int stage; 754 755 PetscFunctionBegin; 756 PetscCall(PetscLogGetStageLog(&stageLog)); 757 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 758 PetscFunctionReturn(0); 759 } 760 761 /*@ 762 PetscLogEventActivate - Indicates that a particular event should be logged. 763 764 Not Collective 765 766 Input Parameter: 767 . event - The event id 768 769 Usage: 770 .vb 771 PetscLogEventDeactivate(VEC_SetValues); 772 [code where you do not want to log VecSetValues()] 773 PetscLogEventActivate(VEC_SetValues); 774 [code where you do want to log VecSetValues()] 775 .ve 776 777 Note: 778 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 779 or an event number obtained with `PetscLogEventRegister()`. 780 781 Level: advanced 782 783 .seealso: `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 784 @*/ 785 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 786 { 787 PetscStageLog stageLog; 788 int stage; 789 790 PetscFunctionBegin; 791 PetscCall(PetscLogGetStageLog(&stageLog)); 792 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 793 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 794 PetscFunctionReturn(0); 795 } 796 797 /*@ 798 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 799 800 Not Collective 801 802 Input Parameter: 803 . event - The event id 804 805 Usage: 806 .vb 807 PetscLogEventDeactivate(VEC_SetValues); 808 [code where you do not want to log VecSetValues()] 809 PetscLogEventActivate(VEC_SetValues); 810 [code where you do want to log VecSetValues()] 811 .ve 812 813 Note: 814 The event may be either a pre-defined PETSc event (found in 815 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 816 817 Level: advanced 818 819 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 820 @*/ 821 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 822 { 823 PetscStageLog stageLog; 824 int stage; 825 826 PetscFunctionBegin; 827 PetscCall(PetscLogGetStageLog(&stageLog)); 828 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 829 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 830 PetscFunctionReturn(0); 831 } 832 833 /*@ 834 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 835 836 Not Collective 837 838 Input Parameter: 839 . event - The event id 840 841 Usage: 842 .vb 843 PetscLogEventDeactivatePush(VEC_SetValues); 844 [code where you do not want to log VecSetValues()] 845 PetscLogEventDeactivatePop(VEC_SetValues); 846 [code where you do want to log VecSetValues()] 847 .ve 848 849 Note: 850 The event may be either a pre-defined PETSc event (found in 851 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 852 853 Level: advanced 854 855 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 856 @*/ 857 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 858 { 859 PetscStageLog stageLog; 860 int stage; 861 862 PetscFunctionBegin; 863 PetscCall(PetscLogGetStageLog(&stageLog)); 864 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 865 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 866 PetscFunctionReturn(0); 867 } 868 869 /*@ 870 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 871 872 Not Collective 873 874 Input Parameter: 875 . event - The event id 876 877 Usage: 878 .vb 879 PetscLogEventDeactivatePush(VEC_SetValues); 880 [code where you do not want to log VecSetValues()] 881 PetscLogEventDeactivatePop(VEC_SetValues); 882 [code where you do want to log VecSetValues()] 883 .ve 884 885 Note: 886 The event may be either a pre-defined PETSc event (found in 887 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 888 889 Level: advanced 890 891 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 892 @*/ 893 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 894 { 895 PetscStageLog stageLog; 896 int stage; 897 898 PetscFunctionBegin; 899 PetscCall(PetscLogGetStageLog(&stageLog)); 900 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 901 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 902 PetscFunctionReturn(0); 903 } 904 905 /*@ 906 PetscLogEventSetActiveAll - Turns on logging of all events 907 908 Not Collective 909 910 Input Parameters: 911 + event - The event id 912 - isActive - The activity flag determining whether the event is logged 913 914 Level: advanced 915 916 .seealso: `PlogEventActivate()`, `PlogEventDeactivate()` 917 @*/ 918 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 919 { 920 PetscStageLog stageLog; 921 int stage; 922 923 PetscFunctionBegin; 924 PetscCall(PetscLogGetStageLog(&stageLog)); 925 for (stage = 0; stage < stageLog->numStages; stage++) { 926 if (isActive) { 927 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 928 } else { 929 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 930 } 931 } 932 PetscFunctionReturn(0); 933 } 934 935 /*@ 936 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 937 938 Not Collective 939 940 Input Parameter: 941 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 942 943 Level: developer 944 945 .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 946 @*/ 947 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 948 { 949 PetscStageLog stageLog; 950 int stage; 951 952 PetscFunctionBegin; 953 PetscCall(PetscLogGetStageLog(&stageLog)); 954 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 955 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 956 PetscFunctionReturn(0); 957 } 958 959 /*@ 960 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 961 962 Not Collective 963 964 Input Parameter: 965 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 966 967 Level: developer 968 969 .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`,`PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 970 @*/ 971 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 972 { 973 PetscStageLog stageLog; 974 int stage; 975 976 PetscFunctionBegin; 977 PetscCall(PetscLogGetStageLog(&stageLog)); 978 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 979 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 980 PetscFunctionReturn(0); 981 } 982 983 /*MC 984 PetscLogEventSync - Synchronizes the beginning of a user event. 985 986 Synopsis: 987 #include <petsclog.h> 988 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 989 990 Collective 991 992 Input Parameters: 993 + e - integer associated with the event obtained from PetscLogEventRegister() 994 - comm - an MPI communicator 995 996 Usage: 997 .vb 998 PetscLogEvent USER_EVENT; 999 PetscLogEventRegister("User event",0,&USER_EVENT); 1000 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1001 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1002 [code segment to monitor] 1003 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1004 .ve 1005 1006 Note: 1007 This routine should be called only if there is not a 1008 `PetscObject` available to pass to `PetscLogEventBegin()`. 1009 1010 Level: developer 1011 1012 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1013 M*/ 1014 1015 /*MC 1016 PetscLogEventBegin - Logs the beginning of a user event. 1017 1018 Synopsis: 1019 #include <petsclog.h> 1020 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1021 1022 Not Collective 1023 1024 Input Parameters: 1025 + e - integer associated with the event obtained from PetscLogEventRegister() 1026 - o1,o2,o3,o4 - objects associated with the event, or 0 1027 1028 Fortran Synopsis: 1029 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1030 1031 Usage: 1032 .vb 1033 PetscLogEvent USER_EVENT; 1034 PetscLogDouble user_event_flops; 1035 PetscLogEventRegister("User event",0,&USER_EVENT); 1036 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1037 [code segment to monitor] 1038 PetscLogFlops(user_event_flops); 1039 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1040 .ve 1041 1042 Developer Note: 1043 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the 1044 errors that occur in the macro directly because other packages that use this macros have used them in their 1045 own functions or methods that do not return error codes and it would be disruptive to change the current 1046 behavior. 1047 1048 Level: intermediate 1049 1050 .seealso: `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1051 M*/ 1052 1053 /*MC 1054 PetscLogEventEnd - Log the end of a user event. 1055 1056 Synopsis: 1057 #include <petsclog.h> 1058 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1059 1060 Not Collective 1061 1062 Input Parameters: 1063 + e - integer associated with the event obtained with PetscLogEventRegister() 1064 - o1,o2,o3,o4 - objects associated with the event, or 0 1065 1066 Fortran Synopsis: 1067 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1068 1069 Usage: 1070 .vb 1071 PetscLogEvent USER_EVENT; 1072 PetscLogDouble user_event_flops; 1073 PetscLogEventRegister("User event",0,&USER_EVENT,); 1074 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1075 [code segment to monitor] 1076 PetscLogFlops(user_event_flops); 1077 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1078 .ve 1079 1080 Level: intermediate 1081 1082 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1083 M*/ 1084 1085 /*@C 1086 PetscLogEventGetId - Returns the event id when given the event name. 1087 1088 Not Collective 1089 1090 Input Parameter: 1091 . name - The event name 1092 1093 Output Parameter: 1094 . event - The event, or -1 if no event with that name exists 1095 1096 Level: intermediate 1097 1098 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1099 @*/ 1100 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1101 { 1102 PetscStageLog stageLog; 1103 1104 PetscFunctionBegin; 1105 PetscCall(PetscLogGetStageLog(&stageLog)); 1106 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) 1111 { 1112 PetscFunctionBegin; 1113 PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) 1118 { 1119 PetscFunctionBegin; 1120 PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) 1125 { 1126 PetscBool empty; 1127 1128 PetscFunctionBegin; 1129 PetscValidIntPointer(event, 1); 1130 *event = PETSC_DECIDE; 1131 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1132 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) 1137 { 1138 PetscFunctionBegin; 1139 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) 1144 { 1145 PetscStageLog stageLog; 1146 PetscEventPerfLog eventLog; 1147 int stage; 1148 1149 PetscFunctionBegin; 1150 if (event == PETSC_DECIDE) PetscFunctionReturn(0); 1151 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1152 PetscCall(PetscLogGetStageLog(&stageLog)); 1153 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1154 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1155 eventLog->eventInfo[event].count--; 1156 PetscFunctionReturn(0); 1157 } 1158 1159 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1160 /*@C 1161 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1162 be read by bin/petscview. This program no longer exists. 1163 1164 Collective on `PETSC_COMM_WORLD` 1165 1166 Input Parameter: 1167 . name - an optional file name 1168 1169 Usage: 1170 .vb 1171 PetscInitialize(...); 1172 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1173 ... code ... 1174 PetscLogDump(filename); 1175 PetscFinalize(); 1176 .ve 1177 1178 Note: 1179 The default file name is 1180 $ Log.<rank> 1181 where <rank> is the processor number. If no name is specified, 1182 this file will be used. 1183 1184 Level: advanced 1185 1186 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1187 @*/ 1188 PetscErrorCode PetscLogDump(const char sname[]) 1189 { 1190 PetscStageLog stageLog; 1191 PetscEventPerfInfo *eventInfo; 1192 FILE *fd; 1193 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1194 PetscLogDouble flops, _TotalTime; 1195 PetscMPIInt rank; 1196 int action, object, curStage; 1197 PetscLogEvent event; 1198 1199 PetscFunctionBegin; 1200 /* Calculate the total elapsed time */ 1201 PetscTime(&_TotalTime); 1202 _TotalTime -= petsc_BaseTime; 1203 /* Open log file */ 1204 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1205 if (sname && sname[0]) sprintf(file, "%s.%d", sname, rank); 1206 else sprintf(file, "Log.%d", rank); 1207 PetscCall(PetscFixFilename(file, fname)); 1208 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1209 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1210 /* Output totals */ 1211 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1212 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1213 /* Output actions */ 1214 if (petsc_logActions) { 1215 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1216 for (action = 0; action < petsc_numActions; action++) { 1217 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1218 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1219 } 1220 } 1221 /* Output objects */ 1222 if (petsc_logObjects) { 1223 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1224 for (object = 0; object < petsc_numObjects; object++) { 1225 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1226 if (!petsc_objects[object].name[0]) { 1227 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1228 } else { 1229 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1230 } 1231 if (petsc_objects[object].info[0] != 0) { 1232 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1233 } else { 1234 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1235 } 1236 } 1237 } 1238 /* Output events */ 1239 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1240 PetscCall(PetscLogGetStageLog(&stageLog)); 1241 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1242 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1243 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1244 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1245 else flops = 0.0; 1246 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1247 } 1248 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1249 PetscFunctionReturn(0); 1250 } 1251 1252 /* 1253 PetscLogView_Detailed - Each process prints the times for its own events 1254 1255 */ 1256 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1257 { 1258 PetscStageLog stageLog; 1259 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1260 PetscLogDouble locTotalTime, numRed, maxMem; 1261 int numStages, numEvents, stage, event; 1262 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1263 PetscMPIInt rank, size; 1264 1265 PetscFunctionBegin; 1266 PetscCallMPI(MPI_Comm_size(comm, &size)); 1267 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1268 /* Must preserve reduction count before we go on */ 1269 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1270 /* Get the total elapsed time */ 1271 PetscTime(&locTotalTime); 1272 locTotalTime -= petsc_BaseTime; 1273 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1274 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1275 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1276 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1277 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1278 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1279 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1280 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1281 PetscCall(PetscLogGetStageLog(&stageLog)); 1282 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1283 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1284 for (stage = 0; stage < numStages; stage++) { 1285 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1286 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1287 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1288 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1289 } 1290 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1291 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1293 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1294 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1295 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1296 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1297 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1298 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1299 PetscCall(PetscViewerFlush(viewer)); 1300 for (stage = 0; stage < numStages; stage++) { 1301 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1302 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1303 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1304 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1305 for (event = 0; event < numEvents; event++) { 1306 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1308 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1309 eventInfo->flops)); 1310 if (eventInfo->dof[0] >= 0.) { 1311 PetscInt d, e; 1312 1313 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1314 for (d = 0; d < 8; ++d) { 1315 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1316 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1317 } 1318 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1319 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1320 for (e = 0; e < 8; ++e) { 1321 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1322 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1323 } 1324 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1325 } 1326 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1327 } 1328 } 1329 PetscCall(PetscViewerFlush(viewer)); 1330 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 /* 1335 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1336 */ 1337 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1338 { 1339 PetscStageLog stageLog; 1340 PetscEventPerfInfo *eventInfo = NULL; 1341 PetscLogDouble locTotalTime, maxMem; 1342 int numStages, numEvents, stage, event; 1343 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1344 PetscMPIInt rank, size; 1345 1346 PetscFunctionBegin; 1347 PetscCallMPI(MPI_Comm_size(comm, &size)); 1348 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1349 /* Must preserve reduction count before we go on */ 1350 /* Get the total elapsed time */ 1351 PetscTime(&locTotalTime); 1352 locTotalTime -= petsc_BaseTime; 1353 PetscCall(PetscLogGetStageLog(&stageLog)); 1354 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1355 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1356 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1357 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1358 PetscCall(PetscViewerFlush(viewer)); 1359 for (stage = 0; stage < numStages; stage++) { 1360 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1361 1362 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1363 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1364 for (event = 0; event < numEvents; event++) { 1365 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1366 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1367 eventInfo->numReductions, eventInfo->flops)); 1368 if (eventInfo->dof[0] >= 0.) { 1369 PetscInt d, e; 1370 1371 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1372 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1373 } 1374 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1375 } 1376 } 1377 PetscCall(PetscViewerFlush(viewer)); 1378 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1383 { 1384 PetscFunctionBegin; 1385 if (!PetscLogSyncOn) PetscFunctionReturn(0); 1386 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1387 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1388 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1389 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1390 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1391 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1392 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1393 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1394 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1395 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1396 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1397 PetscFunctionReturn(0); 1398 } 1399 1400 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1401 { 1402 PetscFunctionBegin; 1403 if (PetscDefined(USE_DEBUG)) { 1404 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1405 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1406 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1407 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1408 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1409 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1410 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1411 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1412 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1413 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1414 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1415 } 1416 PetscFunctionReturn(0); 1417 } 1418 1419 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1420 { 1421 #if defined(PETSC_HAVE_DEVICE) 1422 PetscMPIInt size; 1423 1424 PetscFunctionBegin; 1425 PetscCallMPI(MPI_Comm_size(comm, &size)); 1426 if (use_gpu_aware_mpi || size == 1) PetscFunctionReturn(0); 1427 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1428 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1429 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1430 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1431 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1432 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1433 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1434 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1435 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1436 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1437 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1438 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1439 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1440 PetscFunctionReturn(0); 1441 #else 1442 return 0; 1443 #endif 1444 } 1445 1446 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1447 { 1448 #if defined(PETSC_HAVE_DEVICE) 1449 1450 PetscFunctionBegin; 1451 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(0); 1452 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1453 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1454 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1455 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1456 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1457 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1458 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1459 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1460 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1461 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1462 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1463 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1464 PetscFunctionReturn(0); 1465 #else 1466 return 0; 1467 #endif 1468 } 1469 1470 PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1471 { 1472 FILE *fd; 1473 PetscLogDouble zero = 0.0; 1474 PetscStageLog stageLog; 1475 PetscStageInfo *stageInfo = NULL; 1476 PetscEventPerfInfo *eventInfo = NULL; 1477 PetscClassPerfInfo *classInfo; 1478 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1479 const char *name; 1480 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1481 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1482 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1483 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1484 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1485 PetscLogDouble min, max, tot, ratio, avg, x, y; 1486 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1487 #if defined(PETSC_HAVE_DEVICE) 1488 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1489 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1490 #endif 1491 PetscMPIInt minC, maxC; 1492 PetscMPIInt size, rank; 1493 PetscBool *localStageUsed, *stageUsed; 1494 PetscBool *localStageVisible, *stageVisible; 1495 int numStages, localNumEvents, numEvents; 1496 int stage, oclass; 1497 PetscLogEvent event; 1498 PetscErrorCode ierr = 0; 1499 char version[256]; 1500 MPI_Comm comm; 1501 #if defined(PETSC_HAVE_DEVICE) 1502 PetscLogEvent eventid; 1503 PetscInt64 nas = 0x7FF0000000000002; 1504 #endif 1505 1506 PetscFunctionBegin; 1507 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1508 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1509 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1510 PetscCallMPI(MPI_Comm_size(comm, &size)); 1511 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1512 /* Get the total elapsed time */ 1513 PetscTime(&locTotalTime); 1514 locTotalTime -= petsc_BaseTime; 1515 1516 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1517 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1518 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1519 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1520 PetscCall(PetscLogViewWarnSync(comm, fd)); 1521 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1522 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1523 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1524 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1525 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1526 PetscCall(PetscGetUserName(username, sizeof(username))); 1527 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1528 PetscCall(PetscGetDate(date, sizeof(date))); 1529 PetscCall(PetscGetVersion(version, sizeof(version))); 1530 if (size == 1) { 1531 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1532 } else { 1533 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1534 } 1535 #if defined(PETSC_HAVE_OPENMP) 1536 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1537 #endif 1538 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1539 1540 /* Must preserve reduction count before we go on */ 1541 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1542 1543 /* Calculate summary information */ 1544 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1545 /* Time */ 1546 PetscCallMPI(MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1547 PetscCallMPI(MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1548 PetscCallMPI(MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1549 avg = tot / ((PetscLogDouble)size); 1550 if (min != 0.0) ratio = max / min; 1551 else ratio = 0.0; 1552 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1553 TotalTime = tot; 1554 /* Objects */ 1555 avg = (PetscLogDouble)petsc_numObjects; 1556 PetscCallMPI(MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1557 PetscCallMPI(MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1558 PetscCallMPI(MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1559 avg = tot / ((PetscLogDouble)size); 1560 if (min != 0.0) ratio = max / min; 1561 else ratio = 0.0; 1562 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1563 /* Flops */ 1564 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1565 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1566 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1567 avg = tot / ((PetscLogDouble)size); 1568 if (min != 0.0) ratio = max / min; 1569 else ratio = 0.0; 1570 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1571 TotalFlops = tot; 1572 /* Flops/sec -- Must talk to Barry here */ 1573 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1574 else flops = 0.0; 1575 PetscCallMPI(MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1576 PetscCallMPI(MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1577 PetscCallMPI(MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1578 avg = tot / ((PetscLogDouble)size); 1579 if (min != 0.0) ratio = max / min; 1580 else ratio = 0.0; 1581 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1582 /* Memory */ 1583 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1584 if (mem > 0.0) { 1585 PetscCallMPI(MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1586 PetscCallMPI(MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1587 PetscCallMPI(MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1588 avg = tot / ((PetscLogDouble)size); 1589 if (min != 0.0) ratio = max / min; 1590 else ratio = 0.0; 1591 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1592 } 1593 /* Messages */ 1594 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1595 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1596 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1597 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1598 avg = tot / ((PetscLogDouble)size); 1599 if (min != 0.0) ratio = max / min; 1600 else ratio = 0.0; 1601 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1602 numMessages = tot; 1603 /* Message Lengths */ 1604 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1605 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1606 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1607 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1608 if (numMessages != 0) avg = tot / numMessages; 1609 else avg = 0.0; 1610 if (min != 0.0) ratio = max / min; 1611 else ratio = 0.0; 1612 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1613 messageLength = tot; 1614 /* Reductions */ 1615 PetscCallMPI(MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1616 PetscCallMPI(MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1617 PetscCallMPI(MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1618 if (min != 0.0) ratio = max / min; 1619 else ratio = 0.0; 1620 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1621 numReductions = red; /* wrong because uses count from process zero */ 1622 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1623 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1624 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1625 1626 /* Get total number of stages -- 1627 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1628 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1629 This seems best accomplished by assoicating a communicator with each stage. 1630 */ 1631 PetscCall(PetscLogGetStageLog(&stageLog)); 1632 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1633 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1634 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1635 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1636 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1637 if (numStages > 0) { 1638 stageInfo = stageLog->stageInfo; 1639 for (stage = 0; stage < numStages; stage++) { 1640 if (stage < stageLog->numStages) { 1641 localStageUsed[stage] = stageInfo[stage].used; 1642 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1643 } else { 1644 localStageUsed[stage] = PETSC_FALSE; 1645 localStageVisible[stage] = PETSC_TRUE; 1646 } 1647 } 1648 PetscCallMPI(MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1649 PetscCallMPI(MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1650 for (stage = 0; stage < numStages; stage++) { 1651 if (stageUsed[stage]) { 1652 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1653 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1654 break; 1655 } 1656 } 1657 for (stage = 0; stage < numStages; stage++) { 1658 if (!stageUsed[stage]) continue; 1659 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1660 if (localStageUsed[stage]) { 1661 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1662 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1663 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1664 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1665 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1666 name = stageInfo[stage].name; 1667 } else { 1668 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1669 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1670 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1671 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1672 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1673 name = ""; 1674 } 1675 mess *= 0.5; 1676 messLen *= 0.5; 1677 red /= size; 1678 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1679 else fracTime = 0.0; 1680 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1681 else fracFlops = 0.0; 1682 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1683 if (numMessages != 0.0) fracMessages = mess / numMessages; 1684 else fracMessages = 0.0; 1685 if (mess != 0.0) avgMessLen = messLen / mess; 1686 else avgMessLen = 0.0; 1687 if (messageLength != 0.0) fracLength = messLen / messageLength; 1688 else fracLength = 0.0; 1689 if (numReductions != 0.0) fracReductions = red / numReductions; 1690 else fracReductions = 0.0; 1691 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1692 } 1693 } 1694 1695 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1696 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1697 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1698 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1699 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1700 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1701 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1702 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1703 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1704 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1705 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1706 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1707 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1708 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1709 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1710 if (PetscLogMemory) { 1711 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1712 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1713 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1714 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1715 } 1716 #if defined(PETSC_HAVE_DEVICE) 1717 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1718 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1719 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1720 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1721 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1722 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1723 #endif 1724 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1725 1726 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1727 1728 /* Report events */ 1729 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1730 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1731 #if defined(PETSC_HAVE_DEVICE) 1732 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1733 #endif 1734 PetscCall(PetscFPrintf(comm, fd, "\n")); 1735 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1736 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1737 #if defined(PETSC_HAVE_DEVICE) 1738 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1739 #endif 1740 PetscCall(PetscFPrintf(comm, fd, "\n")); 1741 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1742 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1743 #if defined(PETSC_HAVE_DEVICE) 1744 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1745 #endif 1746 PetscCall(PetscFPrintf(comm, fd, "\n")); 1747 1748 #if defined(PETSC_HAVE_DEVICE) 1749 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1750 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1751 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1752 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1753 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1754 #endif 1755 1756 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1757 for (stage = 0; stage < numStages; stage++) { 1758 if (!stageVisible[stage]) continue; 1759 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1760 if (localStageUsed[stage]) { 1761 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1762 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1763 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1764 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1765 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1766 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1767 } else { 1768 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1769 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1770 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1771 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1772 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1773 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1774 } 1775 mess *= 0.5; 1776 messLen *= 0.5; 1777 red /= size; 1778 1779 /* Get total number of events in this stage -- 1780 Currently, a single processor can register more events than another, but events must all be registered in order, 1781 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1782 on the event ID. This seems best accomplished by associating a communicator with each stage. 1783 1784 Problem: If the event did not happen on proc 1, its name will not be available. 1785 Problem: Event visibility is not implemented 1786 */ 1787 if (localStageUsed[stage]) { 1788 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1789 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1790 } else localNumEvents = 0; 1791 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1792 for (event = 0; event < numEvents; event++) { 1793 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1794 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1795 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1796 else flopr = 0.0; 1797 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1798 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1799 PetscCallMPI(MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1800 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1801 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1802 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1803 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1804 PetscCallMPI(MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1805 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1806 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1807 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1808 if (PetscLogMemory) { 1809 PetscCallMPI(MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1810 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1811 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1812 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1813 } 1814 #if defined(PETSC_HAVE_DEVICE) 1815 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1816 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1817 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1818 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1819 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1820 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1821 #endif 1822 name = stageLog->eventLog->eventInfo[event].name; 1823 } else { 1824 flopr = 0.0; 1825 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1826 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1827 PetscCallMPI(MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1828 PetscCallMPI(MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1829 PetscCallMPI(MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1830 PetscCallMPI(MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1831 PetscCallMPI(MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1832 PetscCallMPI(MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1833 PetscCallMPI(MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1834 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1835 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1836 if (PetscLogMemory) { 1837 PetscCallMPI(MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1838 PetscCallMPI(MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1839 PetscCallMPI(MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1840 PetscCallMPI(MPI_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1841 } 1842 #if defined(PETSC_HAVE_DEVICE) 1843 PetscCallMPI(MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1844 PetscCallMPI(MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1845 PetscCallMPI(MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1846 PetscCallMPI(MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1847 PetscCallMPI(MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1848 PetscCallMPI(MPI_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1849 #endif 1850 name = ""; 1851 } 1852 if (mint < 0.0) { 1853 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 1854 mint = 0; 1855 } 1856 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 1857 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1858 #if defined(PETSC_HAVE_DEVICE) 1859 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1860 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 1861 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 1862 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 1863 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 1864 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 1865 } 1866 } 1867 #endif 1868 totm *= 0.5; 1869 totml *= 0.5; 1870 totr /= size; 1871 1872 if (maxC != 0) { 1873 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 1874 else ratC = 0.0; 1875 if (mint != 0.0) ratt = maxt / mint; 1876 else ratt = 0.0; 1877 if (minf != 0.0) ratf = maxf / minf; 1878 else ratf = 0.0; 1879 if (TotalTime != 0.0) fracTime = tott / TotalTime; 1880 else fracTime = 0.0; 1881 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 1882 else fracFlops = 0.0; 1883 if (stageTime != 0.0) fracStageTime = tott / stageTime; 1884 else fracStageTime = 0.0; 1885 if (flops != 0.0) fracStageFlops = totf / flops; 1886 else fracStageFlops = 0.0; 1887 if (numMessages != 0.0) fracMess = totm / numMessages; 1888 else fracMess = 0.0; 1889 if (messageLength != 0.0) fracMessLen = totml / messageLength; 1890 else fracMessLen = 0.0; 1891 if (numReductions != 0.0) fracRed = totr / numReductions; 1892 else fracRed = 0.0; 1893 if (mess != 0.0) fracStageMess = totm / mess; 1894 else fracStageMess = 0.0; 1895 if (messLen != 0.0) fracStageMessLen = totml / messLen; 1896 else fracStageMessLen = 0.0; 1897 if (red != 0.0) fracStageRed = totr / red; 1898 else fracStageRed = 0.0; 1899 if (totm != 0.0) totml /= totm; 1900 else totml = 0.0; 1901 if (maxt != 0.0) flopr = totf / maxt; 1902 else flopr = 0.0; 1903 if (fracStageTime > 1.00) PetscCall(PetscFPrintf(comm, fd, "Warning -- total time of event greater than time of entire stage -- something is wrong with the timer\n")); 1904 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f %3.0f%3.0f%3.0f%3.0f%3.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 1905 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 1906 #if defined(PETSC_HAVE_DEVICE) 1907 if (totf != 0.0) fracgflops = gflops / totf; 1908 else fracgflops = 0.0; 1909 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 1910 else gflopr = 0.0; 1911 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e% 3.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1912 #endif 1913 PetscCall(PetscFPrintf(comm, fd, "\n")); 1914 } 1915 } 1916 } 1917 1918 /* Memory usage and object creation */ 1919 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1920 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1921 #if defined(PETSC_HAVE_DEVICE) 1922 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1923 #endif 1924 PetscCall(PetscFPrintf(comm, fd, "\n")); 1925 PetscCall(PetscFPrintf(comm, fd, "\n")); 1926 1927 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1928 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1929 stats for stages local to processor sets. 1930 */ 1931 /* We should figure out the longest object name here (now 20 characters) */ 1932 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1933 for (stage = 0; stage < numStages; stage++) { 1934 if (localStageUsed[stage]) { 1935 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 1936 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1937 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 1938 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 1939 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 1940 } 1941 } 1942 } else { 1943 if (!localStageVisible[stage]) continue; 1944 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1945 } 1946 } 1947 1948 PetscCall(PetscFree(localStageUsed)); 1949 PetscCall(PetscFree(stageUsed)); 1950 PetscCall(PetscFree(localStageVisible)); 1951 PetscCall(PetscFree(stageVisible)); 1952 1953 /* Information unrelated to this particular run */ 1954 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1955 PetscTime(&y); 1956 PetscTime(&x); 1957 PetscTime(&y); 1958 PetscTime(&y); 1959 PetscTime(&y); 1960 PetscTime(&y); 1961 PetscTime(&y); 1962 PetscTime(&y); 1963 PetscTime(&y); 1964 PetscTime(&y); 1965 PetscTime(&y); 1966 PetscTime(&y); 1967 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 1968 /* MPI information */ 1969 if (size > 1) { 1970 MPI_Status status; 1971 PetscMPIInt tag; 1972 MPI_Comm newcomm; 1973 1974 PetscCallMPI(MPI_Barrier(comm)); 1975 PetscTime(&x); 1976 PetscCallMPI(MPI_Barrier(comm)); 1977 PetscCallMPI(MPI_Barrier(comm)); 1978 PetscCallMPI(MPI_Barrier(comm)); 1979 PetscCallMPI(MPI_Barrier(comm)); 1980 PetscCallMPI(MPI_Barrier(comm)); 1981 PetscTime(&y); 1982 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 1983 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 1984 PetscCallMPI(MPI_Barrier(comm)); 1985 if (rank) { 1986 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 1987 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 1988 } else { 1989 PetscTime(&x); 1990 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 1991 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 1992 PetscTime(&y); 1993 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 1994 } 1995 PetscCall(PetscCommDestroy(&newcomm)); 1996 } 1997 PetscCall(PetscOptionsView(NULL, viewer)); 1998 1999 /* Machine and compile information */ 2000 #if defined(PETSC_USE_FORTRAN_KERNELS) 2001 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 2002 #else 2003 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 2004 #endif 2005 #if defined(PETSC_USE_64BIT_INDICES) 2006 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n")); 2007 #elif defined(PETSC_USE___FLOAT128) 2008 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n")); 2009 #endif 2010 #if defined(PETSC_USE_REAL_SINGLE) 2011 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2012 #elif defined(PETSC_USE___FLOAT128) 2013 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2014 #endif 2015 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2016 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2017 #else 2018 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2019 #endif 2020 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 2021 2022 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 2023 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2024 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2025 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2026 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2027 2028 /* Cleanup */ 2029 PetscCall(PetscFPrintf(comm, fd, "\n")); 2030 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 2031 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 2032 PetscCall(PetscFPTrapPop()); 2033 PetscFunctionReturn(0); 2034 } 2035 2036 /*@C 2037 PetscLogView - Prints a summary of the logging. 2038 2039 Collective over MPI_Comm 2040 2041 Input Parameter: 2042 . viewer - an ASCII viewer 2043 2044 Options Database Keys: 2045 + -log_view [:filename] - Prints summary of log information 2046 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2047 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2048 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2049 . -log_view_memory - Also display memory usage in each event 2050 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2051 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2052 - -log_trace [filename] - Displays a trace of what each process is doing 2053 2054 Notes: 2055 It is possible to control the logging programatically but we recommend using the options database approach whenever possible 2056 By default the summary is printed to stdout. 2057 2058 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2059 2060 If PETSc is configured with --with-logging=0 then this functionality is not available 2061 2062 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2063 directory then open filename.xml with your browser. Specific notes for certain browsers 2064 $ Firefox and Internet explorer - simply open the file 2065 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2066 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2067 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2068 your browser. 2069 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2070 window and render the XML log file contents. 2071 2072 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2073 2074 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2075 or using speedscope (https://www.speedscope.app). 2076 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2077 2078 Level: beginner 2079 2080 .seealso: `PetscLogDefaultBegin()`, `PetscLogDump()` 2081 @*/ 2082 PetscErrorCode PetscLogView(PetscViewer viewer) 2083 { 2084 PetscBool isascii; 2085 PetscViewerFormat format; 2086 int stage, lastStage; 2087 PetscStageLog stageLog; 2088 2089 PetscFunctionBegin; 2090 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2091 /* Pop off any stages the user forgot to remove */ 2092 lastStage = 0; 2093 PetscCall(PetscLogGetStageLog(&stageLog)); 2094 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2095 while (stage >= 0) { 2096 lastStage = stage; 2097 PetscCall(PetscStageLogPop(stageLog)); 2098 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2099 } 2100 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2101 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2102 PetscCall(PetscViewerGetFormat(viewer, &format)); 2103 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2104 PetscCall(PetscLogView_Default(viewer)); 2105 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2106 PetscCall(PetscLogView_Detailed(viewer)); 2107 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2108 PetscCall(PetscLogView_CSV(viewer)); 2109 } else if (format == PETSC_VIEWER_ASCII_XML) { 2110 PetscCall(PetscLogView_Nested(viewer)); 2111 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2112 PetscCall(PetscLogView_Flamegraph(viewer)); 2113 } 2114 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2115 PetscFunctionReturn(0); 2116 } 2117 2118 /*@C 2119 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2120 2121 Collective on `PETSC_COMM_WORLD` 2122 2123 Level: developer 2124 2125 .seealso: `PetscLogView()` 2126 @*/ 2127 PetscErrorCode PetscLogViewFromOptions(void) 2128 { 2129 PetscViewer viewer; 2130 PetscBool flg; 2131 PetscViewerFormat format; 2132 2133 PetscFunctionBegin; 2134 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2135 if (flg) { 2136 PetscCall(PetscViewerPushFormat(viewer, format)); 2137 PetscCall(PetscLogView(viewer)); 2138 PetscCall(PetscViewerPopFormat(viewer)); 2139 PetscCall(PetscViewerDestroy(&viewer)); 2140 } 2141 PetscFunctionReturn(0); 2142 } 2143 2144 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2145 /*@C 2146 PetscGetFlops - Returns the number of flops used on this processor 2147 since the program began. 2148 2149 Not Collective 2150 2151 Output Parameter: 2152 flops - number of floating point operations 2153 2154 Notes: 2155 A global counter logs all PETSc flop counts. The user can use 2156 `PetscLogFlops()` to increment this counter to include flops for the 2157 application code. 2158 2159 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2160 2161 Level: intermediate 2162 2163 .seealso: `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2164 @*/ 2165 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2166 { 2167 PetscFunctionBegin; 2168 *flops = petsc_TotalFlops; 2169 PetscFunctionReturn(0); 2170 } 2171 2172 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2173 { 2174 size_t fullLength; 2175 va_list Argp; 2176 2177 PetscFunctionBegin; 2178 if (!petsc_logObjects) PetscFunctionReturn(0); 2179 va_start(Argp, format); 2180 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2181 va_end(Argp); 2182 PetscFunctionReturn(0); 2183 } 2184 2185 /*MC 2186 PetscLogFlops - Adds floating point operations to the global counter. 2187 2188 Synopsis: 2189 #include <petsclog.h> 2190 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2191 2192 Not Collective 2193 2194 Input Parameter: 2195 . f - flop counter 2196 2197 Usage: 2198 .vb 2199 PetscLogEvent USER_EVENT; 2200 PetscLogEventRegister("User event",0,&USER_EVENT); 2201 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2202 [code segment to monitor] 2203 PetscLogFlops(user_flops) 2204 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2205 .ve 2206 2207 Note: 2208 A global counter logs all PETSc flop counts. The user can use 2209 PetscLogFlops() to increment this counter to include flops for the 2210 application code. 2211 2212 Level: intermediate 2213 2214 .seealso: `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2215 M*/ 2216 2217 /*MC 2218 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2219 to get accurate timings 2220 2221 Synopsis: 2222 #include <petsclog.h> 2223 void PetscPreLoadBegin(PetscBool flag,char *name); 2224 2225 Not Collective 2226 2227 Input Parameters: 2228 + flag - PETSC_TRUE to run twice, `PETSC_FALSE` to run once, may be overridden 2229 with command line option -preload true or -preload false 2230 - name - name of first stage (lines of code timed separately with -log_view) to 2231 be preloaded 2232 2233 Usage: 2234 .vb 2235 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2236 lines of code 2237 PetscPreLoadStage("second stage"); 2238 lines of code 2239 PetscPreLoadEnd(); 2240 .ve 2241 2242 Note: 2243 Only works in C/C++, not Fortran 2244 2245 Flags available within the macro. 2246 + PetscPreLoadingUsed - true if we are or have done preloading 2247 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2248 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2249 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2250 The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin() 2251 and PetscPreLoadEnd() 2252 2253 Level: intermediate 2254 2255 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2256 M*/ 2257 2258 /*MC 2259 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2260 to get accurate timings 2261 2262 Synopsis: 2263 #include <petsclog.h> 2264 void PetscPreLoadEnd(void); 2265 2266 Not Collective 2267 2268 Usage: 2269 .vb 2270 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2271 lines of code 2272 PetscPreLoadStage("second stage"); 2273 lines of code 2274 PetscPreLoadEnd(); 2275 .ve 2276 2277 Note: 2278 Only works in C/C++ not fortran 2279 2280 Level: intermediate 2281 2282 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2283 M*/ 2284 2285 /*MC 2286 PetscPreLoadStage - Start a new segment of code to be timed separately. 2287 to get accurate timings 2288 2289 Synopsis: 2290 #include <petsclog.h> 2291 void PetscPreLoadStage(char *name); 2292 2293 Not Collective 2294 2295 Usage: 2296 .vb 2297 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2298 lines of code 2299 PetscPreLoadStage("second stage"); 2300 lines of code 2301 PetscPreLoadEnd(); 2302 .ve 2303 2304 Note: 2305 Only works in C/C++ not fortran 2306 2307 Level: intermediate 2308 2309 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2310 M*/ 2311 2312 #if PetscDefined(HAVE_DEVICE) 2313 #include <petsc/private/deviceimpl.h> 2314 2315 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2316 2317 /* 2318 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2319 because it will result in timing results that cannot be interpreted. 2320 */ 2321 static PetscErrorCode PetscLogGpuTime_Off(void) 2322 { 2323 PetscLogGpuTimeFlag = PETSC_FALSE; 2324 return 0; 2325 } 2326 2327 /*@C 2328 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2329 2330 Options Database Key: 2331 . -log_view_gpu_time - provide the GPU times in the -log_view output 2332 2333 Notes: 2334 Turning on the timing of the 2335 GPU kernels can slow down the entire computation and should only be used when studying the performance 2336 of operations on GPU such as vector operations and matrix-vector operations. 2337 2338 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2339 2340 Level: advanced 2341 2342 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2343 @*/ 2344 PetscErrorCode PetscLogGpuTime(void) 2345 { 2346 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2347 PetscLogGpuTimeFlag = PETSC_TRUE; 2348 return 0; 2349 } 2350 2351 /*@C 2352 PetscLogGpuTimeBegin - Start timer for device 2353 2354 Notes: 2355 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2356 2357 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2358 2359 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()` 2360 2361 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2362 2363 The regular logging captures the time for data transfers and any CPU activites during the event 2364 2365 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2366 2367 Developer Notes: 2368 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2369 2370 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the 2371 event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution, 2372 but not continued GPU excution, until the timer event is recorded. 2373 2374 Level: intermediate 2375 2376 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2377 @*/ 2378 PetscErrorCode PetscLogGpuTimeBegin(void) 2379 { 2380 PetscFunctionBegin; 2381 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2382 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2383 PetscDeviceContext dctx; 2384 2385 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2386 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2387 } else { 2388 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2389 } 2390 PetscFunctionReturn(0); 2391 } 2392 2393 /*@C 2394 PetscLogGpuTimeEnd - Stop timer for device 2395 2396 Level: intermediate 2397 2398 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2399 @*/ 2400 PetscErrorCode PetscLogGpuTimeEnd(void) 2401 { 2402 PetscFunctionBegin; 2403 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2404 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2405 PetscDeviceContext dctx; 2406 PetscLogDouble elapsed; 2407 2408 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2409 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2410 petsc_gtime += (elapsed / 1000.0); 2411 } else { 2412 PetscCall(PetscTimeAdd(&petsc_gtime)); 2413 } 2414 PetscFunctionReturn(0); 2415 } 2416 #endif /* end of PETSC_HAVE_DEVICE */ 2417 2418 #else /* end of -DPETSC_USE_LOG section */ 2419 2420 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2421 { 2422 PetscFunctionBegin; 2423 PetscFunctionReturn(0); 2424 } 2425 2426 #endif /* PETSC_USE_LOG*/ 2427 2428 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2429 PetscClassId PETSC_OBJECT_CLASSID = 0; 2430 2431 /*@C 2432 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2433 2434 Not Collective 2435 2436 Input Parameter: 2437 . name - The class name 2438 2439 Output Parameter: 2440 . oclass - The class id or classid 2441 2442 Level: developer 2443 2444 .seealso: `PetscLogEventRegister()` 2445 @*/ 2446 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) 2447 { 2448 #if defined(PETSC_USE_LOG) 2449 PetscStageLog stageLog; 2450 PetscInt stage; 2451 #endif 2452 2453 PetscFunctionBegin; 2454 *oclass = ++PETSC_LARGEST_CLASSID; 2455 #if defined(PETSC_USE_LOG) 2456 PetscCall(PetscLogGetStageLog(&stageLog)); 2457 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2458 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2459 #endif 2460 PetscFunctionReturn(0); 2461 } 2462 2463 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2464 #include <mpe.h> 2465 2466 PetscBool PetscBeganMPE = PETSC_FALSE; 2467 2468 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2469 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2470 2471 /*@C 2472 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2473 and slows the program down. 2474 2475 Collective over `PETSC_COMM_WORLD` 2476 2477 Options Database Key: 2478 . -log_mpe - Prints extensive log information 2479 2480 Note: 2481 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 2482 intended for production runs since it logs only flop rates and object 2483 creation (and should not significantly slow the programs). 2484 2485 Level: advanced 2486 2487 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2488 `PetscLogEventDeactivate()` 2489 @*/ 2490 PetscErrorCode PetscLogMPEBegin(void) 2491 { 2492 PetscFunctionBegin; 2493 /* Do MPE initialization */ 2494 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2495 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2496 PetscCall(MPE_Init_log()); 2497 2498 PetscBeganMPE = PETSC_TRUE; 2499 } else { 2500 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2501 } 2502 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 /*@C 2507 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2508 2509 Collective over `PETSC_COMM_WORLD` 2510 2511 Level: advanced 2512 2513 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2514 @*/ 2515 PetscErrorCode PetscLogMPEDump(const char sname[]) 2516 { 2517 char name[PETSC_MAX_PATH_LEN]; 2518 2519 PetscFunctionBegin; 2520 if (PetscBeganMPE) { 2521 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2522 if (sname) { 2523 PetscCall(PetscStrcpy(name, sname)); 2524 } else { 2525 PetscCall(PetscGetProgramName(name, sizeof(name))); 2526 } 2527 PetscCall(MPE_Finish_log(name)); 2528 } else { 2529 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2530 } 2531 PetscFunctionReturn(0); 2532 } 2533 2534 #define PETSC_RGB_COLORS_MAX 39 2535 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2536 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2537 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2538 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2539 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2540 2541 /*@C 2542 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2543 2544 Not collective. Maybe it should be? 2545 2546 Output Parameter: 2547 . str - character string representing the color 2548 2549 Level: developer 2550 2551 .seealso: `PetscLogEventRegister()` 2552 @*/ 2553 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2554 { 2555 static int idx = 0; 2556 2557 PetscFunctionBegin; 2558 *str = PetscLogMPERGBColors[idx]; 2559 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2560 PetscFunctionReturn(0); 2561 } 2562 2563 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2564