1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 15 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 16 17 #if defined(PETSC_USE_LOG) 18 #include <petscmachineinfo.h> 19 #include <petscconfiginfo.h> 20 21 /* used in the MPI_XXX() count macros in petsclog.h */ 22 23 /* Action and object logging variables */ 24 Action *petsc_actions = NULL; 25 Object *petsc_objects = NULL; 26 PetscBool petsc_logActions = PETSC_FALSE; 27 PetscBool petsc_logObjects = PETSC_FALSE; 28 int petsc_numActions = 0, petsc_maxActions = 100; 29 int petsc_numObjects = 0, petsc_maxObjects = 100; 30 int petsc_numObjectsDestroyed = 0; 31 32 /* Global counters */ 33 PetscLogDouble petsc_BaseTime = 0.0; 34 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 35 PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */ 36 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 37 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 38 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 39 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 40 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 41 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 42 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 43 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 44 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 45 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 46 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 47 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 48 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 49 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 50 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 51 #if defined(PETSC_HAVE_DEVICE) 52 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 53 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 54 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 55 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 56 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 57 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 58 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 59 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 60 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 61 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 62 #endif 63 64 /* Logging functions */ 65 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 66 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 67 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 68 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 69 70 /* Tracing event logging variables */ 71 FILE *petsc_tracefile = NULL; 72 int petsc_tracelevel = 0; 73 const char *petsc_traceblanks = " "; 74 char petsc_tracespace[128] = " "; 75 PetscLogDouble petsc_tracetime = 0.0; 76 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 77 78 static PetscIntStack current_log_event_stack = NULL; 79 80 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 81 { 82 int stage; 83 PetscBool opt; 84 85 PetscFunctionBegin; 86 if (PetscLogInitializeCalled) PetscFunctionReturn(0); 87 PetscLogInitializeCalled = PETSC_TRUE; 88 89 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 90 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 91 if (opt) petsc_logActions = PETSC_FALSE; 92 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 93 if (opt) petsc_logObjects = PETSC_FALSE; 94 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 95 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 96 PetscLogPHC = PetscLogObjCreateDefault; 97 PetscLogPHD = PetscLogObjDestroyDefault; 98 /* Setup default logging structures */ 99 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 100 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 101 102 /* All processors sync here for more consistent logging */ 103 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 104 PetscTime(&petsc_BaseTime); 105 PetscCall(PetscLogStagePush(stage)); 106 PetscFunctionReturn(0); 107 } 108 109 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 110 { 111 PetscStageLog stageLog; 112 113 PetscFunctionBegin; 114 PetscCall(PetscFree(petsc_actions)); 115 PetscCall(PetscFree(petsc_objects)); 116 PetscCall(PetscLogNestedEnd()); 117 PetscCall(PetscLogSet(NULL, NULL)); 118 119 /* Resetting phase */ 120 PetscCall(PetscLogGetStageLog(&stageLog)); 121 PetscCall(PetscStageLogDestroy(stageLog)); 122 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 123 current_log_event_stack = NULL; 124 125 petsc_TotalFlops = 0.0; 126 petsc_numActions = 0; 127 petsc_numObjects = 0; 128 petsc_numObjectsDestroyed = 0; 129 petsc_maxActions = 100; 130 petsc_maxObjects = 100; 131 petsc_actions = NULL; 132 petsc_objects = NULL; 133 petsc_logActions = PETSC_FALSE; 134 petsc_logObjects = PETSC_FALSE; 135 petsc_BaseTime = 0.0; 136 petsc_TotalFlops = 0.0; 137 petsc_tmp_flops = 0.0; 138 petsc_send_ct = 0.0; 139 petsc_recv_ct = 0.0; 140 petsc_send_len = 0.0; 141 petsc_recv_len = 0.0; 142 petsc_isend_ct = 0.0; 143 petsc_irecv_ct = 0.0; 144 petsc_isend_len = 0.0; 145 petsc_irecv_len = 0.0; 146 petsc_wait_ct = 0.0; 147 petsc_wait_any_ct = 0.0; 148 petsc_wait_all_ct = 0.0; 149 petsc_sum_of_waits_ct = 0.0; 150 petsc_allreduce_ct = 0.0; 151 petsc_gather_ct = 0.0; 152 petsc_scatter_ct = 0.0; 153 #if defined(PETSC_HAVE_DEVICE) 154 petsc_ctog_ct = 0.0; 155 petsc_gtoc_ct = 0.0; 156 petsc_ctog_sz = 0.0; 157 petsc_gtoc_sz = 0.0; 158 petsc_gflops = 0.0; 159 petsc_gtime = 0.0; 160 #endif 161 PETSC_LARGEST_EVENT = PETSC_EVENT; 162 PetscLogPHC = NULL; 163 PetscLogPHD = NULL; 164 petsc_tracefile = NULL; 165 petsc_tracelevel = 0; 166 petsc_traceblanks = " "; 167 petsc_tracespace[0] = ' '; 168 petsc_tracespace[1] = 0; 169 petsc_tracetime = 0.0; 170 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 171 PETSC_OBJECT_CLASSID = 0; 172 petsc_stageLog = NULL; 173 PetscLogInitializeCalled = PETSC_FALSE; 174 PetscFunctionReturn(0); 175 } 176 177 /*@C 178 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 179 180 Not Collective 181 182 Input Parameters: 183 + b - The function called at beginning of event 184 - e - The function called at end of event 185 186 Level: developer 187 188 Developer Note: 189 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 190 191 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 192 @*/ 193 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 194 { 195 PetscFunctionBegin; 196 PetscLogPLB = b; 197 PetscLogPLE = e; 198 PetscFunctionReturn(0); 199 } 200 201 /*@C 202 PetscLogIsActive - Check if logging is currently in progress. 203 204 Not Collective 205 206 Output Parameter: 207 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 208 209 Level: beginner 210 211 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 212 @*/ 213 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 214 { 215 PetscFunctionBegin; 216 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 217 PetscFunctionReturn(0); 218 } 219 220 /*@C 221 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 222 rates and object creation and should not slow programs down too much. 223 This routine may be called more than once. 224 225 Logically Collective over `PETSC_COMM_WORLD` 226 227 Options Database Key: 228 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 229 screen (for code configured with --with-log=1 (which is the default)) 230 231 Usage: 232 .vb 233 PetscInitialize(...); 234 PetscLogDefaultBegin(); 235 ... code ... 236 PetscLogView(viewer); or PetscLogDump(); 237 PetscFinalize(); 238 .ve 239 240 Note: 241 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 242 the logging information. 243 244 Level: advanced 245 246 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 247 @*/ 248 PetscErrorCode PetscLogDefaultBegin(void) 249 { 250 PetscFunctionBegin; 251 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 252 PetscFunctionReturn(0); 253 } 254 255 /*@C 256 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 257 all events. This creates large log files and slows the program down. 258 259 Logically Collective on `PETSC_COMM_WORLD` 260 261 Options Database Key: 262 . -log_all - Prints extensive log information 263 264 Usage: 265 .vb 266 PetscInitialize(...); 267 PetscLogAllBegin(); 268 ... code ... 269 PetscLogDump(filename); 270 PetscFinalize(); 271 .ve 272 273 Note: 274 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 275 intended for production runs since it logs only flop rates and object 276 creation (and shouldn't significantly slow the programs). 277 278 Level: advanced 279 280 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 281 @*/ 282 PetscErrorCode PetscLogAllBegin(void) 283 { 284 PetscFunctionBegin; 285 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 286 PetscFunctionReturn(0); 287 } 288 289 /*@C 290 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 291 begins or ends, the event name is printed. 292 293 Logically Collective on `PETSC_COMM_WORLD` 294 295 Input Parameter: 296 . file - The file to print trace in (e.g. stdout) 297 298 Options Database Key: 299 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 300 301 Notes: 302 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 303 then "Event begin:" or "Event end:" followed by the event name. 304 305 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 306 to determine where a program is hanging without running in the 307 debugger. Can be used in conjunction with the -info option. 308 309 Level: intermediate 310 311 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 312 @*/ 313 PetscErrorCode PetscLogTraceBegin(FILE *file) 314 { 315 PetscFunctionBegin; 316 petsc_tracefile = file; 317 318 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 319 PetscFunctionReturn(0); 320 } 321 322 /*@ 323 PetscLogActions - Determines whether actions are logged for the graphical viewer. 324 325 Not Collective 326 327 Input Parameter: 328 . flag - `PETSC_TRUE` if actions are to be logged 329 330 Options Database Key: 331 . -log_exclude_actions - Turns off actions logging 332 333 Level: intermediate 334 335 Note: 336 Logging of actions continues to consume more memory as the program 337 runs. Long running programs should consider turning this feature off. 338 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 339 @*/ 340 PetscErrorCode PetscLogActions(PetscBool flag) 341 { 342 PetscFunctionBegin; 343 petsc_logActions = flag; 344 PetscFunctionReturn(0); 345 } 346 347 /*@ 348 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 349 350 Not Collective 351 352 Input Parameter: 353 . flag - `PETSC_TRUE` if objects are to be logged 354 355 Options Database Key: 356 . -log_exclude_objects - Turns off objects logging 357 358 Level: intermediate 359 360 Note: 361 Logging of objects continues to consume more memory as the program 362 runs. Long running programs should consider turning this feature off. 363 364 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 365 @*/ 366 PetscErrorCode PetscLogObjects(PetscBool flag) 367 { 368 PetscFunctionBegin; 369 petsc_logObjects = flag; 370 PetscFunctionReturn(0); 371 } 372 373 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 374 /*@C 375 PetscLogStageRegister - Attaches a character string name to a logging stage. 376 377 Not Collective 378 379 Input Parameter: 380 . sname - The name to associate with that stage 381 382 Output Parameter: 383 . stage - The stage number 384 385 Level: intermediate 386 387 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 388 @*/ 389 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) 390 { 391 PetscStageLog stageLog; 392 PetscLogEvent event; 393 394 PetscFunctionBegin; 395 PetscCall(PetscLogGetStageLog(&stageLog)); 396 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 397 /* Copy events already changed in the main stage, this sucks */ 398 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 399 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 400 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 401 PetscFunctionReturn(0); 402 } 403 404 /*@C 405 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 406 407 Not Collective 408 409 Input Parameter: 410 . stage - The stage on which to log 411 412 Usage: 413 If the option -log_view is used to run the program containing the 414 following code, then 2 sets of summary data will be printed during 415 PetscFinalize(). 416 .vb 417 PetscInitialize(int *argc,char ***args,0,0); 418 [stage 0 of code] 419 PetscLogStagePush(1); 420 [stage 1 of code] 421 PetscLogStagePop(); 422 PetscBarrier(...); 423 [more stage 0 of code] 424 PetscFinalize(); 425 .ve 426 427 Note: 428 Use `PetscLogStageRegister()` to register a stage. 429 430 Level: intermediate 431 432 .seealso: `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 433 @*/ 434 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 435 { 436 PetscStageLog stageLog; 437 438 PetscFunctionBegin; 439 PetscCall(PetscLogGetStageLog(&stageLog)); 440 PetscCall(PetscStageLogPush(stageLog, stage)); 441 PetscFunctionReturn(0); 442 } 443 444 /*@C 445 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 446 447 Not Collective 448 449 Usage: 450 If the option -log_view is used to run the program containing the 451 following code, then 2 sets of summary data will be printed during 452 PetscFinalize(). 453 .vb 454 PetscInitialize(int *argc,char ***args,0,0); 455 [stage 0 of code] 456 PetscLogStagePush(1); 457 [stage 1 of code] 458 PetscLogStagePop(); 459 PetscBarrier(...); 460 [more stage 0 of code] 461 PetscFinalize(); 462 .ve 463 464 Level: intermediate 465 466 .seealso: `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 467 @*/ 468 PetscErrorCode PetscLogStagePop(void) 469 { 470 PetscStageLog stageLog; 471 472 PetscFunctionBegin; 473 PetscCall(PetscLogGetStageLog(&stageLog)); 474 PetscCall(PetscStageLogPop(stageLog)); 475 PetscFunctionReturn(0); 476 } 477 478 /*@ 479 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 480 481 Not Collective 482 483 Input Parameters: 484 + stage - The stage 485 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 486 487 Level: intermediate 488 489 Note: 490 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 491 492 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 493 @*/ 494 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 495 { 496 PetscStageLog stageLog; 497 498 PetscFunctionBegin; 499 PetscCall(PetscLogGetStageLog(&stageLog)); 500 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 501 PetscFunctionReturn(0); 502 } 503 504 /*@ 505 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 506 507 Not Collective 508 509 Input Parameter: 510 . stage - The stage 511 512 Output Parameter: 513 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 514 515 Level: intermediate 516 517 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 518 @*/ 519 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 520 { 521 PetscStageLog stageLog; 522 523 PetscFunctionBegin; 524 PetscCall(PetscLogGetStageLog(&stageLog)); 525 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 526 PetscFunctionReturn(0); 527 } 528 529 /*@ 530 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 531 532 Not Collective 533 534 Input Parameters: 535 + stage - The stage 536 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 537 538 Level: intermediate 539 540 Developer Note: 541 What does visible mean, needs to be documented. 542 543 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 544 @*/ 545 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 546 { 547 PetscStageLog stageLog; 548 549 PetscFunctionBegin; 550 PetscCall(PetscLogGetStageLog(&stageLog)); 551 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 552 PetscFunctionReturn(0); 553 } 554 555 /*@ 556 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 557 558 Not Collective 559 560 Input Parameter: 561 . stage - The stage 562 563 Output Parameter: 564 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 565 566 Level: intermediate 567 568 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 569 @*/ 570 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 571 { 572 PetscStageLog stageLog; 573 574 PetscFunctionBegin; 575 PetscCall(PetscLogGetStageLog(&stageLog)); 576 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 577 PetscFunctionReturn(0); 578 } 579 580 /*@C 581 PetscLogStageGetId - Returns the stage id when given the stage name. 582 583 Not Collective 584 585 Input Parameter: 586 . name - The stage name 587 588 Output Parameter: 589 . stage - The stage, , or -1 if no stage with that name exists 590 591 Level: intermediate 592 593 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 594 @*/ 595 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 596 { 597 PetscStageLog stageLog; 598 599 PetscFunctionBegin; 600 PetscCall(PetscLogGetStageLog(&stageLog)); 601 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 602 PetscFunctionReturn(0); 603 } 604 605 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 606 607 /*@C 608 PetscLogEventRegister - Registers an event name for logging operations 609 610 Not Collective 611 612 Input Parameters: 613 + name - The name associated with the event 614 - classid - The classid associated to the class for this event, obtain either with 615 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 616 are only available in C code 617 618 Output Parameter: 619 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 620 621 Example of Usage: 622 .vb 623 PetscLogEvent USER_EVENT; 624 PetscClassId classid; 625 PetscLogDouble user_event_flops; 626 PetscClassIdRegister("class name",&classid); 627 PetscLogEventRegister("User event name",classid,&USER_EVENT); 628 PetscLogEventBegin(USER_EVENT,0,0,0,0); 629 [code segment to monitor] 630 PetscLogFlops(user_event_flops); 631 PetscLogEventEnd(USER_EVENT,0,0,0,0); 632 .ve 633 634 Notes: 635 PETSc automatically logs library events if the code has been 636 configured with --with-log (which is the default) and 637 -log_view or -log_all is specified. `PetscLogEventRegister()` is 638 intended for logging user events to supplement this PETSc 639 information. 640 641 PETSc can gather data for use with the utilities Jumpshot 642 (part of the MPICH distribution). If PETSc has been compiled 643 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 644 MPICH), the user can employ another command line option, -log_mpe, 645 to create a logfile, "mpe.log", which can be visualized 646 Jumpshot. 647 648 The classid is associated with each event so that classes of events 649 can be disabled simultaneously, such as all matrix events. The user 650 can either use an existing classid, such as `MAT_CLASSID`, or create 651 their own as shown in the example. 652 653 If an existing event with the same name exists, its event handle is 654 returned instead of creating a new event. 655 656 Level: intermediate 657 658 .seealso: `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 659 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 660 @*/ 661 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) 662 { 663 PetscStageLog stageLog; 664 int stage; 665 666 PetscFunctionBegin; 667 *event = PETSC_DECIDE; 668 PetscCall(PetscLogGetStageLog(&stageLog)); 669 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 670 if (*event > 0) PetscFunctionReturn(0); 671 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 672 for (stage = 0; stage < stageLog->numStages; stage++) { 673 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 674 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 675 } 676 PetscFunctionReturn(0); 677 } 678 679 /*@ 680 PetscLogEventSetCollective - Indicates that a particular event is collective. 681 682 Not Collective 683 684 Input Parameters: 685 + event - The event id 686 - collective - Boolean flag indicating whether a particular event is collective 687 688 Notes: 689 New events returned from `PetscLogEventRegister()` are collective by default. 690 691 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 692 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 693 to be performed. This option is useful to debug imbalance within the computations or communications 694 695 Level: developer 696 697 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 698 @*/ 699 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) 700 { 701 PetscStageLog stageLog; 702 PetscEventRegLog eventRegLog; 703 704 PetscFunctionBegin; 705 PetscCall(PetscLogGetStageLog(&stageLog)); 706 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 707 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 708 eventRegLog->eventInfo[event].collective = collective; 709 PetscFunctionReturn(0); 710 } 711 712 /*@ 713 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 714 715 Not Collective 716 717 Input Parameter: 718 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 719 720 Level: developer 721 722 .seealso: `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 723 @*/ 724 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 725 { 726 PetscStageLog stageLog; 727 int stage; 728 729 PetscFunctionBegin; 730 PetscCall(PetscLogGetStageLog(&stageLog)); 731 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 732 PetscFunctionReturn(0); 733 } 734 735 /*@ 736 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 737 738 Not Collective 739 740 Input Parameter: 741 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 742 743 Level: developer 744 745 Note: 746 If a class is excluded then events associated with that class are not logged. 747 748 .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 749 @*/ 750 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 751 { 752 PetscStageLog stageLog; 753 int stage; 754 755 PetscFunctionBegin; 756 PetscCall(PetscLogGetStageLog(&stageLog)); 757 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 758 PetscFunctionReturn(0); 759 } 760 761 /*@ 762 PetscLogEventActivate - Indicates that a particular event should be logged. 763 764 Not Collective 765 766 Input Parameter: 767 . event - The event id 768 769 Usage: 770 .vb 771 PetscLogEventDeactivate(VEC_SetValues); 772 [code where you do not want to log VecSetValues()] 773 PetscLogEventActivate(VEC_SetValues); 774 [code where you do want to log VecSetValues()] 775 .ve 776 777 Note: 778 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 779 or an event number obtained with `PetscLogEventRegister()`. 780 781 Level: advanced 782 783 .seealso: `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 784 @*/ 785 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 786 { 787 PetscStageLog stageLog; 788 int stage; 789 790 PetscFunctionBegin; 791 PetscCall(PetscLogGetStageLog(&stageLog)); 792 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 793 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 794 PetscFunctionReturn(0); 795 } 796 797 /*@ 798 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 799 800 Not Collective 801 802 Input Parameter: 803 . event - The event id 804 805 Usage: 806 .vb 807 PetscLogEventDeactivate(VEC_SetValues); 808 [code where you do not want to log VecSetValues()] 809 PetscLogEventActivate(VEC_SetValues); 810 [code where you do want to log VecSetValues()] 811 .ve 812 813 Note: 814 The event may be either a pre-defined PETSc event (found in 815 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 816 817 Level: advanced 818 819 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 820 @*/ 821 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 822 { 823 PetscStageLog stageLog; 824 int stage; 825 826 PetscFunctionBegin; 827 PetscCall(PetscLogGetStageLog(&stageLog)); 828 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 829 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 830 PetscFunctionReturn(0); 831 } 832 833 /*@ 834 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 835 836 Not Collective 837 838 Input Parameter: 839 . event - The event id 840 841 Usage: 842 .vb 843 PetscLogEventDeactivatePush(VEC_SetValues); 844 [code where you do not want to log VecSetValues()] 845 PetscLogEventDeactivatePop(VEC_SetValues); 846 [code where you do want to log VecSetValues()] 847 .ve 848 849 Note: 850 The event may be either a pre-defined PETSc event (found in 851 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 852 853 Level: advanced 854 855 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 856 @*/ 857 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 858 { 859 PetscStageLog stageLog; 860 int stage; 861 862 PetscFunctionBegin; 863 PetscCall(PetscLogGetStageLog(&stageLog)); 864 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 865 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 866 PetscFunctionReturn(0); 867 } 868 869 /*@ 870 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 871 872 Not Collective 873 874 Input Parameter: 875 . event - The event id 876 877 Usage: 878 .vb 879 PetscLogEventDeactivatePush(VEC_SetValues); 880 [code where you do not want to log VecSetValues()] 881 PetscLogEventDeactivatePop(VEC_SetValues); 882 [code where you do want to log VecSetValues()] 883 .ve 884 885 Note: 886 The event may be either a pre-defined PETSc event (found in 887 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 888 889 Level: advanced 890 891 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 892 @*/ 893 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 894 { 895 PetscStageLog stageLog; 896 int stage; 897 898 PetscFunctionBegin; 899 PetscCall(PetscLogGetStageLog(&stageLog)); 900 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 901 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 902 PetscFunctionReturn(0); 903 } 904 905 /*@ 906 PetscLogEventSetActiveAll - Turns on logging of all events 907 908 Not Collective 909 910 Input Parameters: 911 + event - The event id 912 - isActive - The activity flag determining whether the event is logged 913 914 Level: advanced 915 916 .seealso: `PlogEventActivate()`, `PlogEventDeactivate()` 917 @*/ 918 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 919 { 920 PetscStageLog stageLog; 921 int stage; 922 923 PetscFunctionBegin; 924 PetscCall(PetscLogGetStageLog(&stageLog)); 925 for (stage = 0; stage < stageLog->numStages; stage++) { 926 if (isActive) { 927 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 928 } else { 929 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 930 } 931 } 932 PetscFunctionReturn(0); 933 } 934 935 /*@ 936 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 937 938 Not Collective 939 940 Input Parameter: 941 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 942 943 Level: developer 944 945 .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 946 @*/ 947 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 948 { 949 PetscStageLog stageLog; 950 int stage; 951 952 PetscFunctionBegin; 953 PetscCall(PetscLogGetStageLog(&stageLog)); 954 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 955 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 956 PetscFunctionReturn(0); 957 } 958 959 /*@ 960 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 961 962 Not Collective 963 964 Input Parameter: 965 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 966 967 Level: developer 968 969 .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 970 @*/ 971 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 972 { 973 PetscStageLog stageLog; 974 int stage; 975 976 PetscFunctionBegin; 977 PetscCall(PetscLogGetStageLog(&stageLog)); 978 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 979 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 980 PetscFunctionReturn(0); 981 } 982 983 /*MC 984 PetscLogEventSync - Synchronizes the beginning of a user event. 985 986 Synopsis: 987 #include <petsclog.h> 988 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 989 990 Collective 991 992 Input Parameters: 993 + e - integer associated with the event obtained from PetscLogEventRegister() 994 - comm - an MPI communicator 995 996 Usage: 997 .vb 998 PetscLogEvent USER_EVENT; 999 PetscLogEventRegister("User event",0,&USER_EVENT); 1000 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1001 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1002 [code segment to monitor] 1003 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1004 .ve 1005 1006 Note: 1007 This routine should be called only if there is not a 1008 `PetscObject` available to pass to `PetscLogEventBegin()`. 1009 1010 Level: developer 1011 1012 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1013 M*/ 1014 1015 /*MC 1016 PetscLogEventBegin - Logs the beginning of a user event. 1017 1018 Synopsis: 1019 #include <petsclog.h> 1020 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1021 1022 Not Collective 1023 1024 Input Parameters: 1025 + e - integer associated with the event obtained from PetscLogEventRegister() 1026 - o1,o2,o3,o4 - objects associated with the event, or 0 1027 1028 Fortran Synopsis: 1029 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1030 1031 Usage: 1032 .vb 1033 PetscLogEvent USER_EVENT; 1034 PetscLogDouble user_event_flops; 1035 PetscLogEventRegister("User event",0,&USER_EVENT); 1036 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1037 [code segment to monitor] 1038 PetscLogFlops(user_event_flops); 1039 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1040 .ve 1041 1042 Developer Note: 1043 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the 1044 errors that occur in the macro directly because other packages that use this macros have used them in their 1045 own functions or methods that do not return error codes and it would be disruptive to change the current 1046 behavior. 1047 1048 Level: intermediate 1049 1050 .seealso: `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1051 M*/ 1052 1053 /*MC 1054 PetscLogEventEnd - Log the end of a user event. 1055 1056 Synopsis: 1057 #include <petsclog.h> 1058 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1059 1060 Not Collective 1061 1062 Input Parameters: 1063 + e - integer associated with the event obtained with PetscLogEventRegister() 1064 - o1,o2,o3,o4 - objects associated with the event, or 0 1065 1066 Fortran Synopsis: 1067 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1068 1069 Usage: 1070 .vb 1071 PetscLogEvent USER_EVENT; 1072 PetscLogDouble user_event_flops; 1073 PetscLogEventRegister("User event",0,&USER_EVENT,); 1074 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1075 [code segment to monitor] 1076 PetscLogFlops(user_event_flops); 1077 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1078 .ve 1079 1080 Level: intermediate 1081 1082 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1083 M*/ 1084 1085 /*@C 1086 PetscLogEventGetId - Returns the event id when given the event name. 1087 1088 Not Collective 1089 1090 Input Parameter: 1091 . name - The event name 1092 1093 Output Parameter: 1094 . event - The event, or -1 if no event with that name exists 1095 1096 Level: intermediate 1097 1098 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1099 @*/ 1100 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1101 { 1102 PetscStageLog stageLog; 1103 1104 PetscFunctionBegin; 1105 PetscCall(PetscLogGetStageLog(&stageLog)); 1106 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) 1111 { 1112 PetscFunctionBegin; 1113 PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) 1118 { 1119 PetscFunctionBegin; 1120 PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) 1125 { 1126 PetscBool empty; 1127 1128 PetscFunctionBegin; 1129 PetscValidIntPointer(event, 1); 1130 *event = PETSC_DECIDE; 1131 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1132 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) 1137 { 1138 PetscFunctionBegin; 1139 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) 1144 { 1145 PetscStageLog stageLog; 1146 PetscEventPerfLog eventLog; 1147 int stage; 1148 1149 PetscFunctionBegin; 1150 if (event == PETSC_DECIDE) PetscFunctionReturn(0); 1151 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1152 PetscCall(PetscLogGetStageLog(&stageLog)); 1153 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1154 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1155 eventLog->eventInfo[event].count--; 1156 PetscFunctionReturn(0); 1157 } 1158 1159 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1160 /*@C 1161 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1162 be read by bin/petscview. This program no longer exists. 1163 1164 Collective on `PETSC_COMM_WORLD` 1165 1166 Input Parameter: 1167 . name - an optional file name 1168 1169 Usage: 1170 .vb 1171 PetscInitialize(...); 1172 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1173 ... code ... 1174 PetscLogDump(filename); 1175 PetscFinalize(); 1176 .ve 1177 1178 Note: 1179 The default file name is 1180 $ Log.<rank> 1181 where <rank> is the processor number. If no name is specified, 1182 this file will be used. 1183 1184 Level: advanced 1185 1186 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1187 @*/ 1188 PetscErrorCode PetscLogDump(const char sname[]) 1189 { 1190 PetscStageLog stageLog; 1191 PetscEventPerfInfo *eventInfo; 1192 FILE *fd; 1193 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1194 PetscLogDouble flops, _TotalTime; 1195 PetscMPIInt rank; 1196 int action, object, curStage; 1197 PetscLogEvent event; 1198 1199 PetscFunctionBegin; 1200 /* Calculate the total elapsed time */ 1201 PetscTime(&_TotalTime); 1202 _TotalTime -= petsc_BaseTime; 1203 /* Open log file */ 1204 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1205 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 1206 PetscCall(PetscFixFilename(file, fname)); 1207 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1208 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1209 /* Output totals */ 1210 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1211 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1212 /* Output actions */ 1213 if (petsc_logActions) { 1214 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1215 for (action = 0; action < petsc_numActions; action++) { 1216 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1217 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1218 } 1219 } 1220 /* Output objects */ 1221 if (petsc_logObjects) { 1222 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1223 for (object = 0; object < petsc_numObjects; object++) { 1224 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1225 if (!petsc_objects[object].name[0]) { 1226 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1227 } else { 1228 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1229 } 1230 if (petsc_objects[object].info[0] != 0) { 1231 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1232 } else { 1233 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1234 } 1235 } 1236 } 1237 /* Output events */ 1238 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1239 PetscCall(PetscLogGetStageLog(&stageLog)); 1240 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1241 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1242 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1243 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1244 else flops = 0.0; 1245 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1246 } 1247 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 /* 1252 PetscLogView_Detailed - Each process prints the times for its own events 1253 1254 */ 1255 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1256 { 1257 PetscStageLog stageLog; 1258 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1259 PetscLogDouble locTotalTime, numRed, maxMem; 1260 int numStages, numEvents, stage, event; 1261 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1262 PetscMPIInt rank, size; 1263 1264 PetscFunctionBegin; 1265 PetscCallMPI(MPI_Comm_size(comm, &size)); 1266 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1267 /* Must preserve reduction count before we go on */ 1268 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1269 /* Get the total elapsed time */ 1270 PetscTime(&locTotalTime); 1271 locTotalTime -= petsc_BaseTime; 1272 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1273 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1274 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1275 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1276 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1277 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1278 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1279 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1280 PetscCall(PetscLogGetStageLog(&stageLog)); 1281 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1282 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1283 for (stage = 0; stage < numStages; stage++) { 1284 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1285 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1286 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1287 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1288 } 1289 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1290 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1291 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1293 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1294 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1295 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1296 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1297 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1298 PetscCall(PetscViewerFlush(viewer)); 1299 for (stage = 0; stage < numStages; stage++) { 1300 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1301 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1302 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1303 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1304 for (event = 0; event < numEvents; event++) { 1305 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1306 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1307 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1308 eventInfo->flops)); 1309 if (eventInfo->dof[0] >= 0.) { 1310 PetscInt d, e; 1311 1312 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1313 for (d = 0; d < 8; ++d) { 1314 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1315 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1316 } 1317 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1318 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1319 for (e = 0; e < 8; ++e) { 1320 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1322 } 1323 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1324 } 1325 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1326 } 1327 } 1328 PetscCall(PetscViewerFlush(viewer)); 1329 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 /* 1334 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1335 */ 1336 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1337 { 1338 PetscStageLog stageLog; 1339 PetscEventPerfInfo *eventInfo = NULL; 1340 PetscLogDouble locTotalTime, maxMem; 1341 int numStages, numEvents, stage, event; 1342 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1343 PetscMPIInt rank, size; 1344 1345 PetscFunctionBegin; 1346 PetscCallMPI(MPI_Comm_size(comm, &size)); 1347 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1348 /* Must preserve reduction count before we go on */ 1349 /* Get the total elapsed time */ 1350 PetscTime(&locTotalTime); 1351 locTotalTime -= petsc_BaseTime; 1352 PetscCall(PetscLogGetStageLog(&stageLog)); 1353 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1354 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1355 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1356 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1357 PetscCall(PetscViewerFlush(viewer)); 1358 for (stage = 0; stage < numStages; stage++) { 1359 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1360 1361 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1362 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1363 for (event = 0; event < numEvents; event++) { 1364 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1365 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1366 eventInfo->numReductions, eventInfo->flops)); 1367 if (eventInfo->dof[0] >= 0.) { 1368 PetscInt d, e; 1369 1370 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1371 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1372 } 1373 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1374 } 1375 } 1376 PetscCall(PetscViewerFlush(viewer)); 1377 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1382 { 1383 PetscFunctionBegin; 1384 if (!PetscLogSyncOn) PetscFunctionReturn(0); 1385 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1386 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1387 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1388 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1389 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1390 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1391 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1392 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1393 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1394 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1395 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1396 PetscFunctionReturn(0); 1397 } 1398 1399 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1400 { 1401 PetscFunctionBegin; 1402 if (PetscDefined(USE_DEBUG)) { 1403 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1404 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1405 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1406 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1407 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1408 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1409 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1410 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1411 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1412 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1413 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1414 } 1415 PetscFunctionReturn(0); 1416 } 1417 1418 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1419 { 1420 #if defined(PETSC_HAVE_DEVICE) 1421 PetscMPIInt size; 1422 1423 PetscFunctionBegin; 1424 PetscCallMPI(MPI_Comm_size(comm, &size)); 1425 if (use_gpu_aware_mpi || size == 1) PetscFunctionReturn(0); 1426 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1427 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1428 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1429 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1430 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1431 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1432 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1433 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1434 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1435 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1436 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1437 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1438 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1439 PetscFunctionReturn(0); 1440 #else 1441 return 0; 1442 #endif 1443 } 1444 1445 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1446 { 1447 #if defined(PETSC_HAVE_DEVICE) 1448 1449 PetscFunctionBegin; 1450 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(0); 1451 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1452 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1453 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1454 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1455 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1456 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1457 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1458 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1459 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1460 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1461 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1462 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1463 PetscFunctionReturn(0); 1464 #else 1465 return 0; 1466 #endif 1467 } 1468 1469 PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1470 { 1471 FILE *fd; 1472 PetscLogDouble zero = 0.0; 1473 PetscStageLog stageLog; 1474 PetscStageInfo *stageInfo = NULL; 1475 PetscEventPerfInfo *eventInfo = NULL; 1476 PetscClassPerfInfo *classInfo; 1477 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1478 const char *name; 1479 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1480 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1481 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1482 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1483 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1484 PetscLogDouble min, max, tot, ratio, avg, x, y; 1485 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1486 #if defined(PETSC_HAVE_DEVICE) 1487 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1488 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1489 #endif 1490 PetscMPIInt minC, maxC; 1491 PetscMPIInt size, rank; 1492 PetscBool *localStageUsed, *stageUsed; 1493 PetscBool *localStageVisible, *stageVisible; 1494 int numStages, localNumEvents, numEvents; 1495 int stage, oclass; 1496 PetscLogEvent event; 1497 PetscErrorCode ierr = 0; 1498 char version[256]; 1499 MPI_Comm comm; 1500 #if defined(PETSC_HAVE_DEVICE) 1501 PetscLogEvent eventid; 1502 PetscInt64 nas = 0x7FF0000000000002; 1503 #endif 1504 1505 PetscFunctionBegin; 1506 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1507 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1508 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1509 PetscCallMPI(MPI_Comm_size(comm, &size)); 1510 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1511 /* Get the total elapsed time */ 1512 PetscTime(&locTotalTime); 1513 locTotalTime -= petsc_BaseTime; 1514 1515 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1516 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1517 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1518 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1519 PetscCall(PetscLogViewWarnSync(comm, fd)); 1520 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1521 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1522 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1523 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1524 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1525 PetscCall(PetscGetUserName(username, sizeof(username))); 1526 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1527 PetscCall(PetscGetDate(date, sizeof(date))); 1528 PetscCall(PetscGetVersion(version, sizeof(version))); 1529 if (size == 1) { 1530 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1531 } else { 1532 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1533 } 1534 #if defined(PETSC_HAVE_OPENMP) 1535 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1536 #endif 1537 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1538 1539 /* Must preserve reduction count before we go on */ 1540 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1541 1542 /* Calculate summary information */ 1543 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1544 /* Time */ 1545 PetscCallMPI(MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1546 PetscCallMPI(MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1547 PetscCallMPI(MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1548 avg = tot / ((PetscLogDouble)size); 1549 if (min != 0.0) ratio = max / min; 1550 else ratio = 0.0; 1551 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1552 TotalTime = tot; 1553 /* Objects */ 1554 avg = (PetscLogDouble)petsc_numObjects; 1555 PetscCallMPI(MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1556 PetscCallMPI(MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1557 PetscCallMPI(MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1558 avg = tot / ((PetscLogDouble)size); 1559 if (min != 0.0) ratio = max / min; 1560 else ratio = 0.0; 1561 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1562 /* Flops */ 1563 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1564 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1565 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1566 avg = tot / ((PetscLogDouble)size); 1567 if (min != 0.0) ratio = max / min; 1568 else ratio = 0.0; 1569 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1570 TotalFlops = tot; 1571 /* Flops/sec -- Must talk to Barry here */ 1572 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1573 else flops = 0.0; 1574 PetscCallMPI(MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1575 PetscCallMPI(MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1576 PetscCallMPI(MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1577 avg = tot / ((PetscLogDouble)size); 1578 if (min != 0.0) ratio = max / min; 1579 else ratio = 0.0; 1580 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1581 /* Memory */ 1582 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1583 if (mem > 0.0) { 1584 PetscCallMPI(MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1585 PetscCallMPI(MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1586 PetscCallMPI(MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1587 avg = tot / ((PetscLogDouble)size); 1588 if (min != 0.0) ratio = max / min; 1589 else ratio = 0.0; 1590 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1591 } 1592 /* Messages */ 1593 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1594 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1595 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1596 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1597 avg = tot / ((PetscLogDouble)size); 1598 if (min != 0.0) ratio = max / min; 1599 else ratio = 0.0; 1600 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1601 numMessages = tot; 1602 /* Message Lengths */ 1603 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1604 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1605 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1606 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1607 if (numMessages != 0) avg = tot / numMessages; 1608 else avg = 0.0; 1609 if (min != 0.0) ratio = max / min; 1610 else ratio = 0.0; 1611 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1612 messageLength = tot; 1613 /* Reductions */ 1614 PetscCallMPI(MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1615 PetscCallMPI(MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1616 PetscCallMPI(MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1617 if (min != 0.0) ratio = max / min; 1618 else ratio = 0.0; 1619 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1620 numReductions = red; /* wrong because uses count from process zero */ 1621 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1622 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1623 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1624 1625 /* Get total number of stages -- 1626 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1627 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1628 This seems best accomplished by assoicating a communicator with each stage. 1629 */ 1630 PetscCall(PetscLogGetStageLog(&stageLog)); 1631 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1632 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1633 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1634 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1635 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1636 if (numStages > 0) { 1637 stageInfo = stageLog->stageInfo; 1638 for (stage = 0; stage < numStages; stage++) { 1639 if (stage < stageLog->numStages) { 1640 localStageUsed[stage] = stageInfo[stage].used; 1641 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1642 } else { 1643 localStageUsed[stage] = PETSC_FALSE; 1644 localStageVisible[stage] = PETSC_TRUE; 1645 } 1646 } 1647 PetscCallMPI(MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1648 PetscCallMPI(MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1649 for (stage = 0; stage < numStages; stage++) { 1650 if (stageUsed[stage]) { 1651 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1652 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1653 break; 1654 } 1655 } 1656 for (stage = 0; stage < numStages; stage++) { 1657 if (!stageUsed[stage]) continue; 1658 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1659 if (localStageUsed[stage]) { 1660 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1661 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1662 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1663 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1664 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1665 name = stageInfo[stage].name; 1666 } else { 1667 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1668 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1669 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1670 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1671 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1672 name = ""; 1673 } 1674 mess *= 0.5; 1675 messLen *= 0.5; 1676 red /= size; 1677 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1678 else fracTime = 0.0; 1679 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1680 else fracFlops = 0.0; 1681 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1682 if (numMessages != 0.0) fracMessages = mess / numMessages; 1683 else fracMessages = 0.0; 1684 if (mess != 0.0) avgMessLen = messLen / mess; 1685 else avgMessLen = 0.0; 1686 if (messageLength != 0.0) fracLength = messLen / messageLength; 1687 else fracLength = 0.0; 1688 if (numReductions != 0.0) fracReductions = red / numReductions; 1689 else fracReductions = 0.0; 1690 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1691 } 1692 } 1693 1694 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1695 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1696 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1697 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1698 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1699 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1700 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1701 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1702 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1703 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1704 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1705 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1706 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1707 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1708 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1709 if (PetscLogMemory) { 1710 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1711 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1712 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1713 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1714 } 1715 #if defined(PETSC_HAVE_DEVICE) 1716 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1717 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1718 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1719 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1720 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1721 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1722 #endif 1723 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1724 1725 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1726 1727 /* Report events */ 1728 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1729 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1730 #if defined(PETSC_HAVE_DEVICE) 1731 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1732 #endif 1733 PetscCall(PetscFPrintf(comm, fd, "\n")); 1734 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1735 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1736 #if defined(PETSC_HAVE_DEVICE) 1737 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1738 #endif 1739 PetscCall(PetscFPrintf(comm, fd, "\n")); 1740 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1741 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1742 #if defined(PETSC_HAVE_DEVICE) 1743 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1744 #endif 1745 PetscCall(PetscFPrintf(comm, fd, "\n")); 1746 1747 #if defined(PETSC_HAVE_DEVICE) 1748 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1749 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1750 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1751 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1752 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1753 #endif 1754 1755 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1756 for (stage = 0; stage < numStages; stage++) { 1757 if (!stageVisible[stage]) continue; 1758 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1759 if (localStageUsed[stage]) { 1760 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1761 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1762 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1763 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1764 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1765 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1766 } else { 1767 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1768 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1769 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1770 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1771 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1772 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1773 } 1774 mess *= 0.5; 1775 messLen *= 0.5; 1776 red /= size; 1777 1778 /* Get total number of events in this stage -- 1779 Currently, a single processor can register more events than another, but events must all be registered in order, 1780 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1781 on the event ID. This seems best accomplished by associating a communicator with each stage. 1782 1783 Problem: If the event did not happen on proc 1, its name will not be available. 1784 Problem: Event visibility is not implemented 1785 */ 1786 if (localStageUsed[stage]) { 1787 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1788 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1789 } else localNumEvents = 0; 1790 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1791 for (event = 0; event < numEvents; event++) { 1792 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1793 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1794 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1795 else flopr = 0.0; 1796 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1797 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1798 PetscCallMPI(MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1799 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1800 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1801 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1802 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1803 PetscCallMPI(MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1804 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1805 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1806 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1807 if (PetscLogMemory) { 1808 PetscCallMPI(MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1809 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1810 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1811 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1812 } 1813 #if defined(PETSC_HAVE_DEVICE) 1814 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1815 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1816 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1817 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1818 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1819 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1820 #endif 1821 name = stageLog->eventLog->eventInfo[event].name; 1822 } else { 1823 flopr = 0.0; 1824 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1825 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1826 PetscCallMPI(MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1827 PetscCallMPI(MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1828 PetscCallMPI(MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1829 PetscCallMPI(MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1830 PetscCallMPI(MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1831 PetscCallMPI(MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1832 PetscCallMPI(MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1833 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1834 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1835 if (PetscLogMemory) { 1836 PetscCallMPI(MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1837 PetscCallMPI(MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1838 PetscCallMPI(MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1839 PetscCallMPI(MPI_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1840 } 1841 #if defined(PETSC_HAVE_DEVICE) 1842 PetscCallMPI(MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1843 PetscCallMPI(MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1844 PetscCallMPI(MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1845 PetscCallMPI(MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1846 PetscCallMPI(MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1847 PetscCallMPI(MPI_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1848 #endif 1849 name = ""; 1850 } 1851 if (mint < 0.0) { 1852 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 1853 mint = 0; 1854 } 1855 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 1856 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1857 #if defined(PETSC_HAVE_DEVICE) 1858 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1859 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 1860 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 1861 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 1862 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 1863 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 1864 } 1865 } 1866 #endif 1867 totm *= 0.5; 1868 totml *= 0.5; 1869 totr /= size; 1870 1871 if (maxC != 0) { 1872 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 1873 else ratC = 0.0; 1874 if (mint != 0.0) ratt = maxt / mint; 1875 else ratt = 0.0; 1876 if (minf != 0.0) ratf = maxf / minf; 1877 else ratf = 0.0; 1878 if (TotalTime != 0.0) fracTime = tott / TotalTime; 1879 else fracTime = 0.0; 1880 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 1881 else fracFlops = 0.0; 1882 if (stageTime != 0.0) fracStageTime = tott / stageTime; 1883 else fracStageTime = 0.0; 1884 if (flops != 0.0) fracStageFlops = totf / flops; 1885 else fracStageFlops = 0.0; 1886 if (numMessages != 0.0) fracMess = totm / numMessages; 1887 else fracMess = 0.0; 1888 if (messageLength != 0.0) fracMessLen = totml / messageLength; 1889 else fracMessLen = 0.0; 1890 if (numReductions != 0.0) fracRed = totr / numReductions; 1891 else fracRed = 0.0; 1892 if (mess != 0.0) fracStageMess = totm / mess; 1893 else fracStageMess = 0.0; 1894 if (messLen != 0.0) fracStageMessLen = totml / messLen; 1895 else fracStageMessLen = 0.0; 1896 if (red != 0.0) fracStageRed = totr / red; 1897 else fracStageRed = 0.0; 1898 if (totm != 0.0) totml /= totm; 1899 else totml = 0.0; 1900 if (maxt != 0.0) flopr = totf / maxt; 1901 else flopr = 0.0; 1902 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 1903 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 1904 else 1905 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 1906 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 1907 #if defined(PETSC_HAVE_DEVICE) 1908 if (totf != 0.0) fracgflops = gflops / totf; 1909 else fracgflops = 0.0; 1910 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 1911 else gflopr = 0.0; 1912 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1913 #endif 1914 PetscCall(PetscFPrintf(comm, fd, "\n")); 1915 } 1916 } 1917 } 1918 1919 /* Memory usage and object creation */ 1920 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1921 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1922 #if defined(PETSC_HAVE_DEVICE) 1923 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1924 #endif 1925 PetscCall(PetscFPrintf(comm, fd, "\n")); 1926 PetscCall(PetscFPrintf(comm, fd, "\n")); 1927 1928 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1929 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1930 stats for stages local to processor sets. 1931 */ 1932 /* We should figure out the longest object name here (now 20 characters) */ 1933 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1934 for (stage = 0; stage < numStages; stage++) { 1935 if (localStageUsed[stage]) { 1936 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 1937 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1938 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 1939 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 1940 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 1941 } 1942 } 1943 } else { 1944 if (!localStageVisible[stage]) continue; 1945 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1946 } 1947 } 1948 1949 PetscCall(PetscFree(localStageUsed)); 1950 PetscCall(PetscFree(stageUsed)); 1951 PetscCall(PetscFree(localStageVisible)); 1952 PetscCall(PetscFree(stageVisible)); 1953 1954 /* Information unrelated to this particular run */ 1955 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1956 PetscTime(&y); 1957 PetscTime(&x); 1958 PetscTime(&y); 1959 PetscTime(&y); 1960 PetscTime(&y); 1961 PetscTime(&y); 1962 PetscTime(&y); 1963 PetscTime(&y); 1964 PetscTime(&y); 1965 PetscTime(&y); 1966 PetscTime(&y); 1967 PetscTime(&y); 1968 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 1969 /* MPI information */ 1970 if (size > 1) { 1971 MPI_Status status; 1972 PetscMPIInt tag; 1973 MPI_Comm newcomm; 1974 1975 PetscCallMPI(MPI_Barrier(comm)); 1976 PetscTime(&x); 1977 PetscCallMPI(MPI_Barrier(comm)); 1978 PetscCallMPI(MPI_Barrier(comm)); 1979 PetscCallMPI(MPI_Barrier(comm)); 1980 PetscCallMPI(MPI_Barrier(comm)); 1981 PetscCallMPI(MPI_Barrier(comm)); 1982 PetscTime(&y); 1983 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 1984 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 1985 PetscCallMPI(MPI_Barrier(comm)); 1986 if (rank) { 1987 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 1988 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 1989 } else { 1990 PetscTime(&x); 1991 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 1992 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 1993 PetscTime(&y); 1994 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 1995 } 1996 PetscCall(PetscCommDestroy(&newcomm)); 1997 } 1998 PetscCall(PetscOptionsView(NULL, viewer)); 1999 2000 /* Machine and compile information */ 2001 #if defined(PETSC_USE_FORTRAN_KERNELS) 2002 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 2003 #else 2004 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 2005 #endif 2006 #if defined(PETSC_USE_64BIT_INDICES) 2007 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n")); 2008 #elif defined(PETSC_USE___FLOAT128) 2009 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n")); 2010 #endif 2011 #if defined(PETSC_USE_REAL_SINGLE) 2012 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2013 #elif defined(PETSC_USE___FLOAT128) 2014 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2015 #endif 2016 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2017 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2018 #else 2019 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2020 #endif 2021 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 2022 2023 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 2024 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2025 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2026 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2027 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2028 2029 /* Cleanup */ 2030 PetscCall(PetscFPrintf(comm, fd, "\n")); 2031 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 2032 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 2033 PetscCall(PetscFPTrapPop()); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 /*@C 2038 PetscLogView - Prints a summary of the logging. 2039 2040 Collective over MPI_Comm 2041 2042 Input Parameter: 2043 . viewer - an ASCII viewer 2044 2045 Options Database Keys: 2046 + -log_view [:filename] - Prints summary of log information 2047 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2048 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2049 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2050 . -log_view_memory - Also display memory usage in each event 2051 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2052 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2053 - -log_trace [filename] - Displays a trace of what each process is doing 2054 2055 Notes: 2056 It is possible to control the logging programatically but we recommend using the options database approach whenever possible 2057 By default the summary is printed to stdout. 2058 2059 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2060 2061 If PETSc is configured with --with-logging=0 then this functionality is not available 2062 2063 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2064 directory then open filename.xml with your browser. Specific notes for certain browsers 2065 $ Firefox and Internet explorer - simply open the file 2066 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2067 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2068 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2069 your browser. 2070 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2071 window and render the XML log file contents. 2072 2073 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2074 2075 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2076 or using speedscope (https://www.speedscope.app). 2077 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2078 2079 Level: beginner 2080 2081 .seealso: `PetscLogDefaultBegin()`, `PetscLogDump()` 2082 @*/ 2083 PetscErrorCode PetscLogView(PetscViewer viewer) 2084 { 2085 PetscBool isascii; 2086 PetscViewerFormat format; 2087 int stage, lastStage; 2088 PetscStageLog stageLog; 2089 2090 PetscFunctionBegin; 2091 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2092 /* Pop off any stages the user forgot to remove */ 2093 lastStage = 0; 2094 PetscCall(PetscLogGetStageLog(&stageLog)); 2095 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2096 while (stage >= 0) { 2097 lastStage = stage; 2098 PetscCall(PetscStageLogPop(stageLog)); 2099 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2100 } 2101 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2102 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2103 PetscCall(PetscViewerGetFormat(viewer, &format)); 2104 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2105 PetscCall(PetscLogView_Default(viewer)); 2106 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2107 PetscCall(PetscLogView_Detailed(viewer)); 2108 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2109 PetscCall(PetscLogView_CSV(viewer)); 2110 } else if (format == PETSC_VIEWER_ASCII_XML) { 2111 PetscCall(PetscLogView_Nested(viewer)); 2112 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2113 PetscCall(PetscLogView_Flamegraph(viewer)); 2114 } 2115 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 /*@C 2120 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2121 2122 Collective on `PETSC_COMM_WORLD` 2123 2124 Level: developer 2125 2126 .seealso: `PetscLogView()` 2127 @*/ 2128 PetscErrorCode PetscLogViewFromOptions(void) 2129 { 2130 PetscViewer viewer; 2131 PetscBool flg; 2132 PetscViewerFormat format; 2133 2134 PetscFunctionBegin; 2135 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2136 if (flg) { 2137 PetscCall(PetscViewerPushFormat(viewer, format)); 2138 PetscCall(PetscLogView(viewer)); 2139 PetscCall(PetscViewerPopFormat(viewer)); 2140 PetscCall(PetscViewerDestroy(&viewer)); 2141 } 2142 PetscFunctionReturn(0); 2143 } 2144 2145 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2146 /*@C 2147 PetscGetFlops - Returns the number of flops used on this processor 2148 since the program began. 2149 2150 Not Collective 2151 2152 Output Parameter: 2153 flops - number of floating point operations 2154 2155 Notes: 2156 A global counter logs all PETSc flop counts. The user can use 2157 `PetscLogFlops()` to increment this counter to include flops for the 2158 application code. 2159 2160 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2161 2162 Level: intermediate 2163 2164 .seealso: `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2165 @*/ 2166 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2167 { 2168 PetscFunctionBegin; 2169 *flops = petsc_TotalFlops; 2170 PetscFunctionReturn(0); 2171 } 2172 2173 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2174 { 2175 size_t fullLength; 2176 va_list Argp; 2177 2178 PetscFunctionBegin; 2179 if (!petsc_logObjects) PetscFunctionReturn(0); 2180 va_start(Argp, format); 2181 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2182 va_end(Argp); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 /*MC 2187 PetscLogFlops - Adds floating point operations to the global counter. 2188 2189 Synopsis: 2190 #include <petsclog.h> 2191 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2192 2193 Not Collective 2194 2195 Input Parameter: 2196 . f - flop counter 2197 2198 Usage: 2199 .vb 2200 PetscLogEvent USER_EVENT; 2201 PetscLogEventRegister("User event",0,&USER_EVENT); 2202 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2203 [code segment to monitor] 2204 PetscLogFlops(user_flops) 2205 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2206 .ve 2207 2208 Note: 2209 A global counter logs all PETSc flop counts. The user can use 2210 PetscLogFlops() to increment this counter to include flops for the 2211 application code. 2212 2213 Level: intermediate 2214 2215 .seealso: `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2216 M*/ 2217 2218 /*MC 2219 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2220 to get accurate timings 2221 2222 Synopsis: 2223 #include <petsclog.h> 2224 void PetscPreLoadBegin(PetscBool flag,char *name); 2225 2226 Not Collective 2227 2228 Input Parameters: 2229 + flag - PETSC_TRUE to run twice, `PETSC_FALSE` to run once, may be overridden 2230 with command line option -preload true or -preload false 2231 - name - name of first stage (lines of code timed separately with -log_view) to 2232 be preloaded 2233 2234 Usage: 2235 .vb 2236 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2237 lines of code 2238 PetscPreLoadStage("second stage"); 2239 lines of code 2240 PetscPreLoadEnd(); 2241 .ve 2242 2243 Note: 2244 Only works in C/C++, not Fortran 2245 2246 Flags available within the macro. 2247 + PetscPreLoadingUsed - true if we are or have done preloading 2248 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2249 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2250 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2251 The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin() 2252 and PetscPreLoadEnd() 2253 2254 Level: intermediate 2255 2256 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2257 M*/ 2258 2259 /*MC 2260 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2261 to get accurate timings 2262 2263 Synopsis: 2264 #include <petsclog.h> 2265 void PetscPreLoadEnd(void); 2266 2267 Not Collective 2268 2269 Usage: 2270 .vb 2271 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2272 lines of code 2273 PetscPreLoadStage("second stage"); 2274 lines of code 2275 PetscPreLoadEnd(); 2276 .ve 2277 2278 Note: 2279 Only works in C/C++ not fortran 2280 2281 Level: intermediate 2282 2283 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2284 M*/ 2285 2286 /*MC 2287 PetscPreLoadStage - Start a new segment of code to be timed separately. 2288 to get accurate timings 2289 2290 Synopsis: 2291 #include <petsclog.h> 2292 void PetscPreLoadStage(char *name); 2293 2294 Not Collective 2295 2296 Usage: 2297 .vb 2298 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2299 lines of code 2300 PetscPreLoadStage("second stage"); 2301 lines of code 2302 PetscPreLoadEnd(); 2303 .ve 2304 2305 Note: 2306 Only works in C/C++ not fortran 2307 2308 Level: intermediate 2309 2310 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2311 M*/ 2312 2313 #if PetscDefined(HAVE_DEVICE) 2314 #include <petsc/private/deviceimpl.h> 2315 2316 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2317 2318 /* 2319 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2320 because it will result in timing results that cannot be interpreted. 2321 */ 2322 static PetscErrorCode PetscLogGpuTime_Off(void) 2323 { 2324 PetscLogGpuTimeFlag = PETSC_FALSE; 2325 return 0; 2326 } 2327 2328 /*@C 2329 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2330 2331 Options Database Key: 2332 . -log_view_gpu_time - provide the GPU times in the -log_view output 2333 2334 Notes: 2335 Turning on the timing of the 2336 GPU kernels can slow down the entire computation and should only be used when studying the performance 2337 of operations on GPU such as vector operations and matrix-vector operations. 2338 2339 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2340 2341 Level: advanced 2342 2343 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2344 @*/ 2345 PetscErrorCode PetscLogGpuTime(void) 2346 { 2347 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2348 PetscLogGpuTimeFlag = PETSC_TRUE; 2349 return 0; 2350 } 2351 2352 /*@C 2353 PetscLogGpuTimeBegin - Start timer for device 2354 2355 Notes: 2356 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2357 2358 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2359 2360 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()` 2361 2362 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2363 2364 The regular logging captures the time for data transfers and any CPU activites during the event 2365 2366 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2367 2368 Developer Notes: 2369 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2370 2371 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the 2372 event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution, 2373 but not continued GPU excution, until the timer event is recorded. 2374 2375 Level: intermediate 2376 2377 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2378 @*/ 2379 PetscErrorCode PetscLogGpuTimeBegin(void) 2380 { 2381 PetscFunctionBegin; 2382 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2383 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2384 PetscDeviceContext dctx; 2385 2386 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2387 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2388 } else { 2389 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2390 } 2391 PetscFunctionReturn(0); 2392 } 2393 2394 /*@C 2395 PetscLogGpuTimeEnd - Stop timer for device 2396 2397 Level: intermediate 2398 2399 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2400 @*/ 2401 PetscErrorCode PetscLogGpuTimeEnd(void) 2402 { 2403 PetscFunctionBegin; 2404 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2405 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2406 PetscDeviceContext dctx; 2407 PetscLogDouble elapsed; 2408 2409 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2410 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2411 petsc_gtime += (elapsed / 1000.0); 2412 } else { 2413 PetscCall(PetscTimeAdd(&petsc_gtime)); 2414 } 2415 PetscFunctionReturn(0); 2416 } 2417 #endif /* end of PETSC_HAVE_DEVICE */ 2418 2419 #else /* end of -DPETSC_USE_LOG section */ 2420 2421 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2422 { 2423 PetscFunctionBegin; 2424 PetscFunctionReturn(0); 2425 } 2426 2427 #endif /* PETSC_USE_LOG*/ 2428 2429 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2430 PetscClassId PETSC_OBJECT_CLASSID = 0; 2431 2432 /*@C 2433 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2434 2435 Not Collective 2436 2437 Input Parameter: 2438 . name - The class name 2439 2440 Output Parameter: 2441 . oclass - The class id or classid 2442 2443 Level: developer 2444 2445 .seealso: `PetscLogEventRegister()` 2446 @*/ 2447 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) 2448 { 2449 #if defined(PETSC_USE_LOG) 2450 PetscStageLog stageLog; 2451 PetscInt stage; 2452 #endif 2453 2454 PetscFunctionBegin; 2455 *oclass = ++PETSC_LARGEST_CLASSID; 2456 #if defined(PETSC_USE_LOG) 2457 PetscCall(PetscLogGetStageLog(&stageLog)); 2458 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2459 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2460 #endif 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2465 #include <mpe.h> 2466 2467 PetscBool PetscBeganMPE = PETSC_FALSE; 2468 2469 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2470 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2471 2472 /*@C 2473 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2474 and slows the program down. 2475 2476 Collective over `PETSC_COMM_WORLD` 2477 2478 Options Database Key: 2479 . -log_mpe - Prints extensive log information 2480 2481 Note: 2482 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 2483 intended for production runs since it logs only flop rates and object 2484 creation (and should not significantly slow the programs). 2485 2486 Level: advanced 2487 2488 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2489 `PetscLogEventDeactivate()` 2490 @*/ 2491 PetscErrorCode PetscLogMPEBegin(void) 2492 { 2493 PetscFunctionBegin; 2494 /* Do MPE initialization */ 2495 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2496 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2497 PetscCall(MPE_Init_log()); 2498 2499 PetscBeganMPE = PETSC_TRUE; 2500 } else { 2501 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2502 } 2503 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2504 PetscFunctionReturn(0); 2505 } 2506 2507 /*@C 2508 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2509 2510 Collective over `PETSC_COMM_WORLD` 2511 2512 Level: advanced 2513 2514 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2515 @*/ 2516 PetscErrorCode PetscLogMPEDump(const char sname[]) 2517 { 2518 char name[PETSC_MAX_PATH_LEN]; 2519 2520 PetscFunctionBegin; 2521 if (PetscBeganMPE) { 2522 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2523 if (sname) { 2524 PetscCall(PetscStrcpy(name, sname)); 2525 } else { 2526 PetscCall(PetscGetProgramName(name, sizeof(name))); 2527 } 2528 PetscCall(MPE_Finish_log(name)); 2529 } else { 2530 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2531 } 2532 PetscFunctionReturn(0); 2533 } 2534 2535 #define PETSC_RGB_COLORS_MAX 39 2536 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2537 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2538 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2539 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2540 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2541 2542 /*@C 2543 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2544 2545 Not collective. Maybe it should be? 2546 2547 Output Parameter: 2548 . str - character string representing the color 2549 2550 Level: developer 2551 2552 .seealso: `PetscLogEventRegister()` 2553 @*/ 2554 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2555 { 2556 static int idx = 0; 2557 2558 PetscFunctionBegin; 2559 *str = PetscLogMPERGBColors[idx]; 2560 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2561 PetscFunctionReturn(0); 2562 } 2563 2564 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2565