1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 15 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 16 17 #if defined(PETSC_USE_LOG) 18 #include <petscmachineinfo.h> 19 #include <petscconfiginfo.h> 20 21 /* used in the MPI_XXX() count macros in petsclog.h */ 22 23 /* Action and object logging variables */ 24 Action *petsc_actions = NULL; 25 Object *petsc_objects = NULL; 26 PetscBool petsc_logActions = PETSC_FALSE; 27 PetscBool petsc_logObjects = PETSC_FALSE; 28 int petsc_numActions = 0, petsc_maxActions = 100; 29 int petsc_numObjects = 0, petsc_maxObjects = 100; 30 int petsc_numObjectsDestroyed = 0; 31 32 /* Global counters */ 33 PetscLogDouble petsc_BaseTime = 0.0; 34 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 35 PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */ 36 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 37 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 38 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 39 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 40 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 41 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 42 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 43 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 44 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 45 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 46 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 47 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 48 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 49 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 50 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 51 #if defined(PETSC_HAVE_DEVICE) 52 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 53 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 54 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 55 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 56 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 57 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 58 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 59 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 60 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 61 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 62 #endif 63 64 /* Logging functions */ 65 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 66 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 67 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 68 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 69 70 /* Tracing event logging variables */ 71 FILE *petsc_tracefile = NULL; 72 int petsc_tracelevel = 0; 73 const char *petsc_traceblanks = " "; 74 char petsc_tracespace[128] = " "; 75 PetscLogDouble petsc_tracetime = 0.0; 76 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 77 78 static PetscIntStack current_log_event_stack = NULL; 79 80 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) { 81 int stage; 82 PetscBool opt; 83 84 PetscFunctionBegin; 85 if (PetscLogInitializeCalled) PetscFunctionReturn(0); 86 PetscLogInitializeCalled = PETSC_TRUE; 87 88 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 89 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 90 if (opt) petsc_logActions = PETSC_FALSE; 91 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 92 if (opt) petsc_logObjects = PETSC_FALSE; 93 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 94 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 95 PetscLogPHC = PetscLogObjCreateDefault; 96 PetscLogPHD = PetscLogObjDestroyDefault; 97 /* Setup default logging structures */ 98 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 99 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 100 101 /* All processors sync here for more consistent logging */ 102 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 103 PetscTime(&petsc_BaseTime); 104 PetscCall(PetscLogStagePush(stage)); 105 PetscFunctionReturn(0); 106 } 107 108 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) { 109 PetscStageLog stageLog; 110 111 PetscFunctionBegin; 112 PetscCall(PetscFree(petsc_actions)); 113 PetscCall(PetscFree(petsc_objects)); 114 PetscCall(PetscLogNestedEnd()); 115 PetscCall(PetscLogSet(NULL, NULL)); 116 117 /* Resetting phase */ 118 PetscCall(PetscLogGetStageLog(&stageLog)); 119 PetscCall(PetscStageLogDestroy(stageLog)); 120 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 121 current_log_event_stack = NULL; 122 123 petsc_TotalFlops = 0.0; 124 petsc_numActions = 0; 125 petsc_numObjects = 0; 126 petsc_numObjectsDestroyed = 0; 127 petsc_maxActions = 100; 128 petsc_maxObjects = 100; 129 petsc_actions = NULL; 130 petsc_objects = NULL; 131 petsc_logActions = PETSC_FALSE; 132 petsc_logObjects = PETSC_FALSE; 133 petsc_BaseTime = 0.0; 134 petsc_TotalFlops = 0.0; 135 petsc_tmp_flops = 0.0; 136 petsc_send_ct = 0.0; 137 petsc_recv_ct = 0.0; 138 petsc_send_len = 0.0; 139 petsc_recv_len = 0.0; 140 petsc_isend_ct = 0.0; 141 petsc_irecv_ct = 0.0; 142 petsc_isend_len = 0.0; 143 petsc_irecv_len = 0.0; 144 petsc_wait_ct = 0.0; 145 petsc_wait_any_ct = 0.0; 146 petsc_wait_all_ct = 0.0; 147 petsc_sum_of_waits_ct = 0.0; 148 petsc_allreduce_ct = 0.0; 149 petsc_gather_ct = 0.0; 150 petsc_scatter_ct = 0.0; 151 #if defined(PETSC_HAVE_DEVICE) 152 petsc_ctog_ct = 0.0; 153 petsc_gtoc_ct = 0.0; 154 petsc_ctog_sz = 0.0; 155 petsc_gtoc_sz = 0.0; 156 petsc_gflops = 0.0; 157 petsc_gtime = 0.0; 158 #endif 159 PETSC_LARGEST_EVENT = PETSC_EVENT; 160 PetscLogPHC = NULL; 161 PetscLogPHD = NULL; 162 petsc_tracefile = NULL; 163 petsc_tracelevel = 0; 164 petsc_traceblanks = " "; 165 petsc_tracespace[0] = ' '; 166 petsc_tracespace[1] = 0; 167 petsc_tracetime = 0.0; 168 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 169 PETSC_OBJECT_CLASSID = 0; 170 petsc_stageLog = NULL; 171 PetscLogInitializeCalled = PETSC_FALSE; 172 PetscFunctionReturn(0); 173 } 174 175 /*@C 176 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 177 178 Not Collective 179 180 Input Parameters: 181 + b - The function called at beginning of event 182 - e - The function called at end of event 183 184 Level: developer 185 186 Developer Note: 187 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 188 189 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 190 @*/ 191 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) { 192 PetscFunctionBegin; 193 PetscLogPLB = b; 194 PetscLogPLE = e; 195 PetscFunctionReturn(0); 196 } 197 198 /*@C 199 PetscLogIsActive - Check if logging is currently in progress. 200 201 Not Collective 202 203 Output Parameter: 204 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 205 206 Level: beginner 207 208 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 209 @*/ 210 PetscErrorCode PetscLogIsActive(PetscBool *isActive) { 211 PetscFunctionBegin; 212 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 213 PetscFunctionReturn(0); 214 } 215 216 /*@C 217 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 218 rates and object creation and should not slow programs down too much. 219 This routine may be called more than once. 220 221 Logically Collective over `PETSC_COMM_WORLD` 222 223 Options Database Key: 224 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 225 screen (for code configured with --with-log=1 (which is the default)) 226 227 Usage: 228 .vb 229 PetscInitialize(...); 230 PetscLogDefaultBegin(); 231 ... code ... 232 PetscLogView(viewer); or PetscLogDump(); 233 PetscFinalize(); 234 .ve 235 236 Note: 237 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 238 the logging information. 239 240 Level: advanced 241 242 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 243 @*/ 244 PetscErrorCode PetscLogDefaultBegin(void) { 245 PetscFunctionBegin; 246 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 247 PetscFunctionReturn(0); 248 } 249 250 /*@C 251 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 252 all events. This creates large log files and slows the program down. 253 254 Logically Collective on `PETSC_COMM_WORLD` 255 256 Options Database Key: 257 . -log_all - Prints extensive log information 258 259 Usage: 260 .vb 261 PetscInitialize(...); 262 PetscLogAllBegin(); 263 ... code ... 264 PetscLogDump(filename); 265 PetscFinalize(); 266 .ve 267 268 Note: 269 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 270 intended for production runs since it logs only flop rates and object 271 creation (and shouldn't significantly slow the programs). 272 273 Level: advanced 274 275 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 276 @*/ 277 PetscErrorCode PetscLogAllBegin(void) { 278 PetscFunctionBegin; 279 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 280 PetscFunctionReturn(0); 281 } 282 283 /*@C 284 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 285 begins or ends, the event name is printed. 286 287 Logically Collective on `PETSC_COMM_WORLD` 288 289 Input Parameter: 290 . file - The file to print trace in (e.g. stdout) 291 292 Options Database Key: 293 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 294 295 Notes: 296 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 297 then "Event begin:" or "Event end:" followed by the event name. 298 299 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 300 to determine where a program is hanging without running in the 301 debugger. Can be used in conjunction with the -info option. 302 303 Level: intermediate 304 305 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 306 @*/ 307 PetscErrorCode PetscLogTraceBegin(FILE *file) { 308 PetscFunctionBegin; 309 petsc_tracefile = file; 310 311 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 312 PetscFunctionReturn(0); 313 } 314 315 /*@ 316 PetscLogActions - Determines whether actions are logged for the graphical viewer. 317 318 Not Collective 319 320 Input Parameter: 321 . flag - `PETSC_TRUE` if actions are to be logged 322 323 Options Database Key: 324 . -log_exclude_actions - Turns off actions logging 325 326 Level: intermediate 327 328 Note: 329 Logging of actions continues to consume more memory as the program 330 runs. Long running programs should consider turning this feature off. 331 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 332 @*/ 333 PetscErrorCode PetscLogActions(PetscBool flag) { 334 PetscFunctionBegin; 335 petsc_logActions = flag; 336 PetscFunctionReturn(0); 337 } 338 339 /*@ 340 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 341 342 Not Collective 343 344 Input Parameter: 345 . flag - `PETSC_TRUE` if objects are to be logged 346 347 Options Database Key: 348 . -log_exclude_objects - Turns off objects logging 349 350 Level: intermediate 351 352 Note: 353 Logging of objects continues to consume more memory as the program 354 runs. Long running programs should consider turning this feature off. 355 356 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 357 @*/ 358 PetscErrorCode PetscLogObjects(PetscBool flag) { 359 PetscFunctionBegin; 360 petsc_logObjects = flag; 361 PetscFunctionReturn(0); 362 } 363 364 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 365 /*@C 366 PetscLogStageRegister - Attaches a character string name to a logging stage. 367 368 Not Collective 369 370 Input Parameter: 371 . sname - The name to associate with that stage 372 373 Output Parameter: 374 . stage - The stage number 375 376 Level: intermediate 377 378 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 379 @*/ 380 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) { 381 PetscStageLog stageLog; 382 PetscLogEvent event; 383 384 PetscFunctionBegin; 385 PetscCall(PetscLogGetStageLog(&stageLog)); 386 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 387 /* Copy events already changed in the main stage, this sucks */ 388 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 389 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 390 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 391 PetscFunctionReturn(0); 392 } 393 394 /*@C 395 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 396 397 Not Collective 398 399 Input Parameter: 400 . stage - The stage on which to log 401 402 Usage: 403 If the option -log_view is used to run the program containing the 404 following code, then 2 sets of summary data will be printed during 405 PetscFinalize(). 406 .vb 407 PetscInitialize(int *argc,char ***args,0,0); 408 [stage 0 of code] 409 PetscLogStagePush(1); 410 [stage 1 of code] 411 PetscLogStagePop(); 412 PetscBarrier(...); 413 [more stage 0 of code] 414 PetscFinalize(); 415 .ve 416 417 Note: 418 Use `PetscLogStageRegister()` to register a stage. 419 420 Level: intermediate 421 422 .seealso: `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 423 @*/ 424 PetscErrorCode PetscLogStagePush(PetscLogStage stage) { 425 PetscStageLog stageLog; 426 427 PetscFunctionBegin; 428 PetscCall(PetscLogGetStageLog(&stageLog)); 429 PetscCall(PetscStageLogPush(stageLog, stage)); 430 PetscFunctionReturn(0); 431 } 432 433 /*@C 434 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 435 436 Not Collective 437 438 Usage: 439 If the option -log_view is used to run the program containing the 440 following code, then 2 sets of summary data will be printed during 441 PetscFinalize(). 442 .vb 443 PetscInitialize(int *argc,char ***args,0,0); 444 [stage 0 of code] 445 PetscLogStagePush(1); 446 [stage 1 of code] 447 PetscLogStagePop(); 448 PetscBarrier(...); 449 [more stage 0 of code] 450 PetscFinalize(); 451 .ve 452 453 Level: intermediate 454 455 .seealso: `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 456 @*/ 457 PetscErrorCode PetscLogStagePop(void) { 458 PetscStageLog stageLog; 459 460 PetscFunctionBegin; 461 PetscCall(PetscLogGetStageLog(&stageLog)); 462 PetscCall(PetscStageLogPop(stageLog)); 463 PetscFunctionReturn(0); 464 } 465 466 /*@ 467 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 468 469 Not Collective 470 471 Input Parameters: 472 + stage - The stage 473 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 474 475 Level: intermediate 476 477 Note: 478 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 479 480 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 481 @*/ 482 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) { 483 PetscStageLog stageLog; 484 485 PetscFunctionBegin; 486 PetscCall(PetscLogGetStageLog(&stageLog)); 487 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 488 PetscFunctionReturn(0); 489 } 490 491 /*@ 492 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 493 494 Not Collective 495 496 Input Parameter: 497 . stage - The stage 498 499 Output Parameter: 500 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 501 502 Level: intermediate 503 504 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 505 @*/ 506 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) { 507 PetscStageLog stageLog; 508 509 PetscFunctionBegin; 510 PetscCall(PetscLogGetStageLog(&stageLog)); 511 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 512 PetscFunctionReturn(0); 513 } 514 515 /*@ 516 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 517 518 Not Collective 519 520 Input Parameters: 521 + stage - The stage 522 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 523 524 Level: intermediate 525 526 Developer Note: 527 What does visible mean, needs to be documented. 528 529 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 530 @*/ 531 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) { 532 PetscStageLog stageLog; 533 534 PetscFunctionBegin; 535 PetscCall(PetscLogGetStageLog(&stageLog)); 536 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 537 PetscFunctionReturn(0); 538 } 539 540 /*@ 541 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 542 543 Not Collective 544 545 Input Parameter: 546 . stage - The stage 547 548 Output Parameter: 549 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 550 551 Level: intermediate 552 553 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 554 @*/ 555 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) { 556 PetscStageLog stageLog; 557 558 PetscFunctionBegin; 559 PetscCall(PetscLogGetStageLog(&stageLog)); 560 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 561 PetscFunctionReturn(0); 562 } 563 564 /*@C 565 PetscLogStageGetId - Returns the stage id when given the stage name. 566 567 Not Collective 568 569 Input Parameter: 570 . name - The stage name 571 572 Output Parameter: 573 . stage - The stage, , or -1 if no stage with that name exists 574 575 Level: intermediate 576 577 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 578 @*/ 579 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) { 580 PetscStageLog stageLog; 581 582 PetscFunctionBegin; 583 PetscCall(PetscLogGetStageLog(&stageLog)); 584 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 585 PetscFunctionReturn(0); 586 } 587 588 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 589 590 /*@C 591 PetscLogEventRegister - Registers an event name for logging operations 592 593 Not Collective 594 595 Input Parameters: 596 + name - The name associated with the event 597 - classid - The classid associated to the class for this event, obtain either with 598 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 599 are only available in C code 600 601 Output Parameter: 602 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 603 604 Example of Usage: 605 .vb 606 PetscLogEvent USER_EVENT; 607 PetscClassId classid; 608 PetscLogDouble user_event_flops; 609 PetscClassIdRegister("class name",&classid); 610 PetscLogEventRegister("User event name",classid,&USER_EVENT); 611 PetscLogEventBegin(USER_EVENT,0,0,0,0); 612 [code segment to monitor] 613 PetscLogFlops(user_event_flops); 614 PetscLogEventEnd(USER_EVENT,0,0,0,0); 615 .ve 616 617 Notes: 618 PETSc automatically logs library events if the code has been 619 configured with --with-log (which is the default) and 620 -log_view or -log_all is specified. `PetscLogEventRegister()` is 621 intended for logging user events to supplement this PETSc 622 information. 623 624 PETSc can gather data for use with the utilities Jumpshot 625 (part of the MPICH distribution). If PETSc has been compiled 626 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 627 MPICH), the user can employ another command line option, -log_mpe, 628 to create a logfile, "mpe.log", which can be visualized 629 Jumpshot. 630 631 The classid is associated with each event so that classes of events 632 can be disabled simultaneously, such as all matrix events. The user 633 can either use an existing classid, such as `MAT_CLASSID`, or create 634 their own as shown in the example. 635 636 If an existing event with the same name exists, its event handle is 637 returned instead of creating a new event. 638 639 Level: intermediate 640 641 .seealso: `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 642 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 643 @*/ 644 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) { 645 PetscStageLog stageLog; 646 int stage; 647 648 PetscFunctionBegin; 649 *event = PETSC_DECIDE; 650 PetscCall(PetscLogGetStageLog(&stageLog)); 651 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 652 if (*event > 0) PetscFunctionReturn(0); 653 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 654 for (stage = 0; stage < stageLog->numStages; stage++) { 655 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 656 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 657 } 658 PetscFunctionReturn(0); 659 } 660 661 /*@ 662 PetscLogEventSetCollective - Indicates that a particular event is collective. 663 664 Not Collective 665 666 Input Parameters: 667 + event - The event id 668 - collective - Bolean flag indicating whether a particular event is collective 669 670 Notes: 671 New events returned from `PetscLogEventRegister()` are collective by default. 672 673 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 674 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 675 to be performed. This option is useful to debug imbalance within the computations or communications 676 677 Level: developer 678 679 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 680 @*/ 681 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) { 682 PetscStageLog stageLog; 683 PetscEventRegLog eventRegLog; 684 685 PetscFunctionBegin; 686 PetscCall(PetscLogGetStageLog(&stageLog)); 687 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 688 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 689 eventRegLog->eventInfo[event].collective = collective; 690 PetscFunctionReturn(0); 691 } 692 693 /*@ 694 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 695 696 Not Collective 697 698 Input Parameter: 699 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 700 701 Level: developer 702 703 .seealso: `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 704 @*/ 705 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) { 706 PetscStageLog stageLog; 707 int stage; 708 709 PetscFunctionBegin; 710 PetscCall(PetscLogGetStageLog(&stageLog)); 711 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 712 PetscFunctionReturn(0); 713 } 714 715 /*@ 716 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 717 718 Not Collective 719 720 Input Parameter: 721 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 722 723 Level: developer 724 725 Note: 726 If a class is excluded then events associated with that class are not logged. 727 728 .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 729 @*/ 730 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) { 731 PetscStageLog stageLog; 732 int stage; 733 734 PetscFunctionBegin; 735 PetscCall(PetscLogGetStageLog(&stageLog)); 736 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 737 PetscFunctionReturn(0); 738 } 739 740 /*@ 741 PetscLogEventActivate - Indicates that a particular event should be logged. 742 743 Not Collective 744 745 Input Parameter: 746 . event - The event id 747 748 Usage: 749 .vb 750 PetscLogEventDeactivate(VEC_SetValues); 751 [code where you do not want to log VecSetValues()] 752 PetscLogEventActivate(VEC_SetValues); 753 [code where you do want to log VecSetValues()] 754 .ve 755 756 Note: 757 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 758 or an event number obtained with `PetscLogEventRegister()`. 759 760 Level: advanced 761 762 .seealso: `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 763 @*/ 764 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) { 765 PetscStageLog stageLog; 766 int stage; 767 768 PetscFunctionBegin; 769 PetscCall(PetscLogGetStageLog(&stageLog)); 770 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 771 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 772 PetscFunctionReturn(0); 773 } 774 775 /*@ 776 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 777 778 Not Collective 779 780 Input Parameter: 781 . event - The event id 782 783 Usage: 784 .vb 785 PetscLogEventDeactivate(VEC_SetValues); 786 [code where you do not want to log VecSetValues()] 787 PetscLogEventActivate(VEC_SetValues); 788 [code where you do want to log VecSetValues()] 789 .ve 790 791 Note: 792 The event may be either a pre-defined PETSc event (found in 793 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 794 795 Level: advanced 796 797 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 798 @*/ 799 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) { 800 PetscStageLog stageLog; 801 int stage; 802 803 PetscFunctionBegin; 804 PetscCall(PetscLogGetStageLog(&stageLog)); 805 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 806 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 807 PetscFunctionReturn(0); 808 } 809 810 /*@ 811 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 812 813 Not Collective 814 815 Input Parameter: 816 . event - The event id 817 818 Usage: 819 .vb 820 PetscLogEventDeactivatePush(VEC_SetValues); 821 [code where you do not want to log VecSetValues()] 822 PetscLogEventDeactivatePop(VEC_SetValues); 823 [code where you do want to log VecSetValues()] 824 .ve 825 826 Note: 827 The event may be either a pre-defined PETSc event (found in 828 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 829 830 Level: advanced 831 832 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 833 @*/ 834 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) { 835 PetscStageLog stageLog; 836 int stage; 837 838 PetscFunctionBegin; 839 PetscCall(PetscLogGetStageLog(&stageLog)); 840 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 841 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 842 PetscFunctionReturn(0); 843 } 844 845 /*@ 846 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 847 848 Not Collective 849 850 Input Parameter: 851 . event - The event id 852 853 Usage: 854 .vb 855 PetscLogEventDeactivatePush(VEC_SetValues); 856 [code where you do not want to log VecSetValues()] 857 PetscLogEventDeactivatePop(VEC_SetValues); 858 [code where you do want to log VecSetValues()] 859 .ve 860 861 Note: 862 The event may be either a pre-defined PETSc event (found in 863 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 864 865 Level: advanced 866 867 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 868 @*/ 869 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) { 870 PetscStageLog stageLog; 871 int stage; 872 873 PetscFunctionBegin; 874 PetscCall(PetscLogGetStageLog(&stageLog)); 875 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 876 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 877 PetscFunctionReturn(0); 878 } 879 880 /*@ 881 PetscLogEventSetActiveAll - Turns on logging of all events 882 883 Not Collective 884 885 Input Parameters: 886 + event - The event id 887 - isActive - The activity flag determining whether the event is logged 888 889 Level: advanced 890 891 .seealso: `PlogEventActivate()`, `PlogEventDeactivate()` 892 @*/ 893 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) { 894 PetscStageLog stageLog; 895 int stage; 896 897 PetscFunctionBegin; 898 PetscCall(PetscLogGetStageLog(&stageLog)); 899 for (stage = 0; stage < stageLog->numStages; stage++) { 900 if (isActive) { 901 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 902 } else { 903 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 904 } 905 } 906 PetscFunctionReturn(0); 907 } 908 909 /*@ 910 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 911 912 Not Collective 913 914 Input Parameter: 915 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 916 917 Level: developer 918 919 .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 920 @*/ 921 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) { 922 PetscStageLog stageLog; 923 int stage; 924 925 PetscFunctionBegin; 926 PetscCall(PetscLogGetStageLog(&stageLog)); 927 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 928 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 929 PetscFunctionReturn(0); 930 } 931 932 /*@ 933 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 934 935 Not Collective 936 937 Input Parameter: 938 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 939 940 Level: developer 941 942 .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`,`PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 943 @*/ 944 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) { 945 PetscStageLog stageLog; 946 int stage; 947 948 PetscFunctionBegin; 949 PetscCall(PetscLogGetStageLog(&stageLog)); 950 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 951 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 952 PetscFunctionReturn(0); 953 } 954 955 /*MC 956 PetscLogEventSync - Synchronizes the beginning of a user event. 957 958 Synopsis: 959 #include <petsclog.h> 960 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 961 962 Collective 963 964 Input Parameters: 965 + e - integer associated with the event obtained from PetscLogEventRegister() 966 - comm - an MPI communicator 967 968 Usage: 969 .vb 970 PetscLogEvent USER_EVENT; 971 PetscLogEventRegister("User event",0,&USER_EVENT); 972 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 973 PetscLogEventBegin(USER_EVENT,0,0,0,0); 974 [code segment to monitor] 975 PetscLogEventEnd(USER_EVENT,0,0,0,0); 976 .ve 977 978 Note: 979 This routine should be called only if there is not a 980 `PetscObject` available to pass to `PetscLogEventBegin()`. 981 982 Level: developer 983 984 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 985 M*/ 986 987 /*MC 988 PetscLogEventBegin - Logs the beginning of a user event. 989 990 Synopsis: 991 #include <petsclog.h> 992 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 993 994 Not Collective 995 996 Input Parameters: 997 + e - integer associated with the event obtained from PetscLogEventRegister() 998 - o1,o2,o3,o4 - objects associated with the event, or 0 999 1000 Fortran Synopsis: 1001 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1002 1003 Usage: 1004 .vb 1005 PetscLogEvent USER_EVENT; 1006 PetscLogDouble user_event_flops; 1007 PetscLogEventRegister("User event",0,&USER_EVENT); 1008 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1009 [code segment to monitor] 1010 PetscLogFlops(user_event_flops); 1011 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1012 .ve 1013 1014 Developer Note: 1015 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the 1016 errors that occur in the macro directly because other packages that use this macros have used them in their 1017 own functions or methods that do not return error codes and it would be disruptive to change the current 1018 behavior. 1019 1020 Level: intermediate 1021 1022 .seealso: `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1023 M*/ 1024 1025 /*MC 1026 PetscLogEventEnd - Log the end of a user event. 1027 1028 Synopsis: 1029 #include <petsclog.h> 1030 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1031 1032 Not Collective 1033 1034 Input Parameters: 1035 + e - integer associated with the event obtained with PetscLogEventRegister() 1036 - o1,o2,o3,o4 - objects associated with the event, or 0 1037 1038 Fortran Synopsis: 1039 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1040 1041 Usage: 1042 .vb 1043 PetscLogEvent USER_EVENT; 1044 PetscLogDouble user_event_flops; 1045 PetscLogEventRegister("User event",0,&USER_EVENT,); 1046 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1047 [code segment to monitor] 1048 PetscLogFlops(user_event_flops); 1049 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1050 .ve 1051 1052 Level: intermediate 1053 1054 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1055 M*/ 1056 1057 /*@C 1058 PetscLogEventGetId - Returns the event id when given the event name. 1059 1060 Not Collective 1061 1062 Input Parameter: 1063 . name - The event name 1064 1065 Output Parameter: 1066 . event - The event, or -1 if no event with that name exists 1067 1068 Level: intermediate 1069 1070 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1071 @*/ 1072 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) { 1073 PetscStageLog stageLog; 1074 1075 PetscFunctionBegin; 1076 PetscCall(PetscLogGetStageLog(&stageLog)); 1077 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) { 1082 PetscFunctionBegin; 1083 PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) { 1088 PetscFunctionBegin; 1089 PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) { 1094 PetscBool empty; 1095 1096 PetscFunctionBegin; 1097 PetscValidIntPointer(event, 1); 1098 *event = PETSC_DECIDE; 1099 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1100 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) { 1105 PetscFunctionBegin; 1106 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) { 1111 PetscStageLog stageLog; 1112 PetscEventPerfLog eventLog; 1113 int stage; 1114 1115 PetscFunctionBegin; 1116 if (event == PETSC_DECIDE) PetscFunctionReturn(0); 1117 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1118 PetscCall(PetscLogGetStageLog(&stageLog)); 1119 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1120 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1121 eventLog->eventInfo[event].count--; 1122 PetscFunctionReturn(0); 1123 } 1124 1125 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1126 /*@C 1127 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1128 be read by bin/petscview. This program no longer exists. 1129 1130 Collective on `PETSC_COMM_WORLD` 1131 1132 Input Parameter: 1133 . name - an optional file name 1134 1135 Usage: 1136 .vb 1137 PetscInitialize(...); 1138 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1139 ... code ... 1140 PetscLogDump(filename); 1141 PetscFinalize(); 1142 .ve 1143 1144 Note: 1145 The default file name is 1146 $ Log.<rank> 1147 where <rank> is the processor number. If no name is specified, 1148 this file will be used. 1149 1150 Level: advanced 1151 1152 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1153 @*/ 1154 PetscErrorCode PetscLogDump(const char sname[]) { 1155 PetscStageLog stageLog; 1156 PetscEventPerfInfo *eventInfo; 1157 FILE *fd; 1158 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1159 PetscLogDouble flops, _TotalTime; 1160 PetscMPIInt rank; 1161 int action, object, curStage; 1162 PetscLogEvent event; 1163 1164 PetscFunctionBegin; 1165 /* Calculate the total elapsed time */ 1166 PetscTime(&_TotalTime); 1167 _TotalTime -= petsc_BaseTime; 1168 /* Open log file */ 1169 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1170 if (sname && sname[0]) sprintf(file, "%s.%d", sname, rank); 1171 else sprintf(file, "Log.%d", rank); 1172 PetscCall(PetscFixFilename(file, fname)); 1173 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1174 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1175 /* Output totals */ 1176 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1177 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1178 /* Output actions */ 1179 if (petsc_logActions) { 1180 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1181 for (action = 0; action < petsc_numActions; action++) { 1182 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1183 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1184 } 1185 } 1186 /* Output objects */ 1187 if (petsc_logObjects) { 1188 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1189 for (object = 0; object < petsc_numObjects; object++) { 1190 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1191 if (!petsc_objects[object].name[0]) { 1192 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1193 } else { 1194 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1195 } 1196 if (petsc_objects[object].info[0] != 0) { 1197 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1198 } else { 1199 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1200 } 1201 } 1202 } 1203 /* Output events */ 1204 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1205 PetscCall(PetscLogGetStageLog(&stageLog)); 1206 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1207 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1208 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1209 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1210 else flops = 0.0; 1211 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1212 } 1213 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 /* 1218 PetscLogView_Detailed - Each process prints the times for its own events 1219 1220 */ 1221 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) { 1222 PetscStageLog stageLog; 1223 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1224 PetscLogDouble locTotalTime, numRed, maxMem; 1225 int numStages, numEvents, stage, event; 1226 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1227 PetscMPIInt rank, size; 1228 1229 PetscFunctionBegin; 1230 PetscCallMPI(MPI_Comm_size(comm, &size)); 1231 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1232 /* Must preserve reduction count before we go on */ 1233 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1234 /* Get the total elapsed time */ 1235 PetscTime(&locTotalTime); 1236 locTotalTime -= petsc_BaseTime; 1237 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1238 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1239 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1240 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1241 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1242 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1243 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1244 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1245 PetscCall(PetscLogGetStageLog(&stageLog)); 1246 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1247 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1248 for (stage = 0; stage < numStages; stage++) { 1249 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1250 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1251 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1252 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1253 } 1254 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1255 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1256 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1257 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1258 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1259 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1260 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1261 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1262 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1263 PetscCall(PetscViewerFlush(viewer)); 1264 for (stage = 0; stage < numStages; stage++) { 1265 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1266 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1267 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1268 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1269 for (event = 0; event < numEvents; event++) { 1270 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1271 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1272 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1273 eventInfo->flops)); 1274 if (eventInfo->dof[0] >= 0.) { 1275 PetscInt d, e; 1276 1277 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1278 for (d = 0; d < 8; ++d) { 1279 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1280 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1281 } 1282 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1284 for (e = 0; e < 8; ++e) { 1285 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1287 } 1288 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1289 } 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1291 } 1292 } 1293 PetscCall(PetscViewerFlush(viewer)); 1294 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 /* 1299 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1300 */ 1301 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) { 1302 PetscStageLog stageLog; 1303 PetscEventPerfInfo *eventInfo = NULL; 1304 PetscLogDouble locTotalTime, maxMem; 1305 int numStages, numEvents, stage, event; 1306 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1307 PetscMPIInt rank, size; 1308 1309 PetscFunctionBegin; 1310 PetscCallMPI(MPI_Comm_size(comm, &size)); 1311 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1312 /* Must preserve reduction count before we go on */ 1313 /* Get the total elapsed time */ 1314 PetscTime(&locTotalTime); 1315 locTotalTime -= petsc_BaseTime; 1316 PetscCall(PetscLogGetStageLog(&stageLog)); 1317 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1318 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1319 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1320 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1321 PetscCall(PetscViewerFlush(viewer)); 1322 for (stage = 0; stage < numStages; stage++) { 1323 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1324 1325 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1326 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1327 for (event = 0; event < numEvents; event++) { 1328 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1329 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1330 eventInfo->numReductions, eventInfo->flops)); 1331 if (eventInfo->dof[0] >= 0.) { 1332 PetscInt d, e; 1333 1334 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1335 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1336 } 1337 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1338 } 1339 } 1340 PetscCall(PetscViewerFlush(viewer)); 1341 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1342 PetscFunctionReturn(0); 1343 } 1344 1345 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) { 1346 PetscFunctionBegin; 1347 if (!PetscLogSyncOn) PetscFunctionReturn(0); 1348 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1349 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1350 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1351 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1352 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1353 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1354 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1355 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1356 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1357 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1358 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1359 PetscFunctionReturn(0); 1360 } 1361 1362 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) { 1363 PetscFunctionBegin; 1364 if (PetscDefined(USE_DEBUG)) { 1365 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1366 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1367 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1368 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1369 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1370 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1371 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1372 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1373 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1374 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1375 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1376 } 1377 PetscFunctionReturn(0); 1378 } 1379 1380 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) { 1381 #if defined(PETSC_HAVE_DEVICE) 1382 PetscMPIInt size; 1383 1384 PetscFunctionBegin; 1385 PetscCallMPI(MPI_Comm_size(comm, &size)); 1386 if (use_gpu_aware_mpi || size == 1) PetscFunctionReturn(0); 1387 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1388 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1389 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1390 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1391 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1392 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1393 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1394 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1395 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1396 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1397 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1398 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1399 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1400 PetscFunctionReturn(0); 1401 #else 1402 return 0; 1403 #endif 1404 } 1405 1406 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) { 1407 #if defined(PETSC_HAVE_DEVICE) 1408 1409 PetscFunctionBegin; 1410 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(0); 1411 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1412 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1413 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1414 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1415 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1416 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1417 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1418 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1419 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1420 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1421 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1422 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1423 PetscFunctionReturn(0); 1424 #else 1425 return 0; 1426 #endif 1427 } 1428 1429 PetscErrorCode PetscLogView_Default(PetscViewer viewer) { 1430 FILE *fd; 1431 PetscLogDouble zero = 0.0; 1432 PetscStageLog stageLog; 1433 PetscStageInfo *stageInfo = NULL; 1434 PetscEventPerfInfo *eventInfo = NULL; 1435 PetscClassPerfInfo *classInfo; 1436 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1437 const char *name; 1438 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1439 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1440 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1441 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1442 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1443 PetscLogDouble min, max, tot, ratio, avg, x, y; 1444 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1445 #if defined(PETSC_HAVE_DEVICE) 1446 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1447 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1448 #endif 1449 PetscMPIInt minC, maxC; 1450 PetscMPIInt size, rank; 1451 PetscBool *localStageUsed, *stageUsed; 1452 PetscBool *localStageVisible, *stageVisible; 1453 int numStages, localNumEvents, numEvents; 1454 int stage, oclass; 1455 PetscLogEvent event; 1456 PetscErrorCode ierr = 0; 1457 char version[256]; 1458 MPI_Comm comm; 1459 #if defined(PETSC_HAVE_DEVICE) 1460 PetscLogEvent eventid; 1461 PetscInt64 nas = 0x7FF0000000000002; 1462 #endif 1463 1464 PetscFunctionBegin; 1465 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1466 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1467 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1468 PetscCallMPI(MPI_Comm_size(comm, &size)); 1469 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1470 /* Get the total elapsed time */ 1471 PetscTime(&locTotalTime); 1472 locTotalTime -= petsc_BaseTime; 1473 1474 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1475 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1476 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1477 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1478 PetscCall(PetscLogViewWarnSync(comm, fd)); 1479 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1480 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1481 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1482 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1483 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1484 PetscCall(PetscGetUserName(username, sizeof(username))); 1485 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1486 PetscCall(PetscGetDate(date, sizeof(date))); 1487 PetscCall(PetscGetVersion(version, sizeof(version))); 1488 if (size == 1) { 1489 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1490 } else { 1491 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1492 } 1493 #if defined(PETSC_HAVE_OPENMP) 1494 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1495 #endif 1496 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1497 1498 /* Must preserve reduction count before we go on */ 1499 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1500 1501 /* Calculate summary information */ 1502 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1503 /* Time */ 1504 PetscCallMPI(MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1505 PetscCallMPI(MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1506 PetscCallMPI(MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1507 avg = tot / ((PetscLogDouble)size); 1508 if (min != 0.0) ratio = max / min; 1509 else ratio = 0.0; 1510 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1511 TotalTime = tot; 1512 /* Objects */ 1513 avg = (PetscLogDouble)petsc_numObjects; 1514 PetscCallMPI(MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1515 PetscCallMPI(MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1516 PetscCallMPI(MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1517 avg = tot / ((PetscLogDouble)size); 1518 if (min != 0.0) ratio = max / min; 1519 else ratio = 0.0; 1520 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1521 /* Flops */ 1522 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1523 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1524 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1525 avg = tot / ((PetscLogDouble)size); 1526 if (min != 0.0) ratio = max / min; 1527 else ratio = 0.0; 1528 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1529 TotalFlops = tot; 1530 /* Flops/sec -- Must talk to Barry here */ 1531 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1532 else flops = 0.0; 1533 PetscCallMPI(MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1534 PetscCallMPI(MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1535 PetscCallMPI(MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1536 avg = tot / ((PetscLogDouble)size); 1537 if (min != 0.0) ratio = max / min; 1538 else ratio = 0.0; 1539 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1540 /* Memory */ 1541 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1542 if (mem > 0.0) { 1543 PetscCallMPI(MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1544 PetscCallMPI(MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1545 PetscCallMPI(MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1546 avg = tot / ((PetscLogDouble)size); 1547 if (min != 0.0) ratio = max / min; 1548 else ratio = 0.0; 1549 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1550 } 1551 /* Messages */ 1552 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1553 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1554 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1555 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1556 avg = tot / ((PetscLogDouble)size); 1557 if (min != 0.0) ratio = max / min; 1558 else ratio = 0.0; 1559 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1560 numMessages = tot; 1561 /* Message Lengths */ 1562 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1563 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1564 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1565 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1566 if (numMessages != 0) avg = tot / numMessages; 1567 else avg = 0.0; 1568 if (min != 0.0) ratio = max / min; 1569 else ratio = 0.0; 1570 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1571 messageLength = tot; 1572 /* Reductions */ 1573 PetscCallMPI(MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1574 PetscCallMPI(MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1575 PetscCallMPI(MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1576 if (min != 0.0) ratio = max / min; 1577 else ratio = 0.0; 1578 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1579 numReductions = red; /* wrong because uses count from process zero */ 1580 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1581 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1582 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1583 1584 /* Get total number of stages -- 1585 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1586 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1587 This seems best accomplished by assoicating a communicator with each stage. 1588 */ 1589 PetscCall(PetscLogGetStageLog(&stageLog)); 1590 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1591 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1592 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1593 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1594 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1595 if (numStages > 0) { 1596 stageInfo = stageLog->stageInfo; 1597 for (stage = 0; stage < numStages; stage++) { 1598 if (stage < stageLog->numStages) { 1599 localStageUsed[stage] = stageInfo[stage].used; 1600 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1601 } else { 1602 localStageUsed[stage] = PETSC_FALSE; 1603 localStageVisible[stage] = PETSC_TRUE; 1604 } 1605 } 1606 PetscCallMPI(MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1607 PetscCallMPI(MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1608 for (stage = 0; stage < numStages; stage++) { 1609 if (stageUsed[stage]) { 1610 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1611 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1612 break; 1613 } 1614 } 1615 for (stage = 0; stage < numStages; stage++) { 1616 if (!stageUsed[stage]) continue; 1617 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1618 if (localStageUsed[stage]) { 1619 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1620 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1621 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1622 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1623 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1624 name = stageInfo[stage].name; 1625 } else { 1626 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1627 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1628 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1629 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1630 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1631 name = ""; 1632 } 1633 mess *= 0.5; 1634 messLen *= 0.5; 1635 red /= size; 1636 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1637 else fracTime = 0.0; 1638 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1639 else fracFlops = 0.0; 1640 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1641 if (numMessages != 0.0) fracMessages = mess / numMessages; 1642 else fracMessages = 0.0; 1643 if (mess != 0.0) avgMessLen = messLen / mess; 1644 else avgMessLen = 0.0; 1645 if (messageLength != 0.0) fracLength = messLen / messageLength; 1646 else fracLength = 0.0; 1647 if (numReductions != 0.0) fracReductions = red / numReductions; 1648 else fracReductions = 0.0; 1649 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1650 } 1651 } 1652 1653 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1654 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1655 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1656 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1657 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1658 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1659 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1660 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1661 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1662 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1663 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1664 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1665 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1666 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1667 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1668 if (PetscLogMemory) { 1669 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1670 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1671 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1672 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1673 } 1674 #if defined(PETSC_HAVE_DEVICE) 1675 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1676 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1677 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1678 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1679 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1680 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1681 #endif 1682 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1683 1684 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1685 1686 /* Report events */ 1687 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1688 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1689 #if defined(PETSC_HAVE_DEVICE) 1690 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1691 #endif 1692 PetscCall(PetscFPrintf(comm, fd, "\n")); 1693 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1694 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1695 #if defined(PETSC_HAVE_DEVICE) 1696 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1697 #endif 1698 PetscCall(PetscFPrintf(comm, fd, "\n")); 1699 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1700 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1701 #if defined(PETSC_HAVE_DEVICE) 1702 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1703 #endif 1704 PetscCall(PetscFPrintf(comm, fd, "\n")); 1705 1706 #if defined(PETSC_HAVE_DEVICE) 1707 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1708 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1709 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1710 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1711 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1712 #endif 1713 1714 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1715 for (stage = 0; stage < numStages; stage++) { 1716 if (!stageVisible[stage]) continue; 1717 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1718 if (localStageUsed[stage]) { 1719 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1720 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1721 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1722 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1723 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1724 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1725 } else { 1726 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1727 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1728 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1729 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1730 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1731 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1732 } 1733 mess *= 0.5; 1734 messLen *= 0.5; 1735 red /= size; 1736 1737 /* Get total number of events in this stage -- 1738 Currently, a single processor can register more events than another, but events must all be registered in order, 1739 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1740 on the event ID. This seems best accomplished by associating a communicator with each stage. 1741 1742 Problem: If the event did not happen on proc 1, its name will not be available. 1743 Problem: Event visibility is not implemented 1744 */ 1745 if (localStageUsed[stage]) { 1746 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1747 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1748 } else localNumEvents = 0; 1749 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1750 for (event = 0; event < numEvents; event++) { 1751 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1752 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1753 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1754 else flopr = 0.0; 1755 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1756 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1757 PetscCallMPI(MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1758 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1759 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1760 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1761 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1762 PetscCallMPI(MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1763 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1764 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1765 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1766 if (PetscLogMemory) { 1767 PetscCallMPI(MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1768 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1769 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1770 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1771 } 1772 #if defined(PETSC_HAVE_DEVICE) 1773 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1774 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1775 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1776 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1777 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1778 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1779 #endif 1780 name = stageLog->eventLog->eventInfo[event].name; 1781 } else { 1782 flopr = 0.0; 1783 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1784 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1785 PetscCallMPI(MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1786 PetscCallMPI(MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1787 PetscCallMPI(MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1788 PetscCallMPI(MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1789 PetscCallMPI(MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1790 PetscCallMPI(MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1791 PetscCallMPI(MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1792 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1793 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1794 if (PetscLogMemory) { 1795 PetscCallMPI(MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1796 PetscCallMPI(MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1797 PetscCallMPI(MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1798 PetscCallMPI(MPI_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1799 } 1800 #if defined(PETSC_HAVE_DEVICE) 1801 PetscCallMPI(MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1802 PetscCallMPI(MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1803 PetscCallMPI(MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1804 PetscCallMPI(MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1805 PetscCallMPI(MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1806 PetscCallMPI(MPI_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1807 #endif 1808 name = ""; 1809 } 1810 if (mint < 0.0) { 1811 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 1812 mint = 0; 1813 } 1814 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 1815 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1816 #if defined(PETSC_HAVE_DEVICE) 1817 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1818 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 1819 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 1820 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 1821 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 1822 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 1823 } 1824 } 1825 #endif 1826 totm *= 0.5; 1827 totml *= 0.5; 1828 totr /= size; 1829 1830 if (maxC != 0) { 1831 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 1832 else ratC = 0.0; 1833 if (mint != 0.0) ratt = maxt / mint; 1834 else ratt = 0.0; 1835 if (minf != 0.0) ratf = maxf / minf; 1836 else ratf = 0.0; 1837 if (TotalTime != 0.0) fracTime = tott / TotalTime; 1838 else fracTime = 0.0; 1839 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 1840 else fracFlops = 0.0; 1841 if (stageTime != 0.0) fracStageTime = tott / stageTime; 1842 else fracStageTime = 0.0; 1843 if (flops != 0.0) fracStageFlops = totf / flops; 1844 else fracStageFlops = 0.0; 1845 if (numMessages != 0.0) fracMess = totm / numMessages; 1846 else fracMess = 0.0; 1847 if (messageLength != 0.0) fracMessLen = totml / messageLength; 1848 else fracMessLen = 0.0; 1849 if (numReductions != 0.0) fracRed = totr / numReductions; 1850 else fracRed = 0.0; 1851 if (mess != 0.0) fracStageMess = totm / mess; 1852 else fracStageMess = 0.0; 1853 if (messLen != 0.0) fracStageMessLen = totml / messLen; 1854 else fracStageMessLen = 0.0; 1855 if (red != 0.0) fracStageRed = totr / red; 1856 else fracStageRed = 0.0; 1857 if (totm != 0.0) totml /= totm; 1858 else totml = 0.0; 1859 if (maxt != 0.0) flopr = totf / maxt; 1860 else flopr = 0.0; 1861 if (fracStageTime > 1.00) PetscCall(PetscFPrintf(comm, fd, "Warning -- total time of event greater than time of entire stage -- something is wrong with the timer\n")); 1862 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f %3.0f%3.0f%3.0f%3.0f%3.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 1863 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 1864 #if defined(PETSC_HAVE_DEVICE) 1865 if (totf != 0.0) fracgflops = gflops / totf; 1866 else fracgflops = 0.0; 1867 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 1868 else gflopr = 0.0; 1869 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e% 3.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1870 #endif 1871 PetscCall(PetscFPrintf(comm, fd, "\n")); 1872 } 1873 } 1874 } 1875 1876 /* Memory usage and object creation */ 1877 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1878 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1879 #if defined(PETSC_HAVE_DEVICE) 1880 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1881 #endif 1882 PetscCall(PetscFPrintf(comm, fd, "\n")); 1883 PetscCall(PetscFPrintf(comm, fd, "\n")); 1884 1885 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1886 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1887 stats for stages local to processor sets. 1888 */ 1889 /* We should figure out the longest object name here (now 20 characters) */ 1890 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1891 for (stage = 0; stage < numStages; stage++) { 1892 if (localStageUsed[stage]) { 1893 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 1894 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1895 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 1896 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 1897 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 1898 } 1899 } 1900 } else { 1901 if (!localStageVisible[stage]) continue; 1902 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1903 } 1904 } 1905 1906 PetscCall(PetscFree(localStageUsed)); 1907 PetscCall(PetscFree(stageUsed)); 1908 PetscCall(PetscFree(localStageVisible)); 1909 PetscCall(PetscFree(stageVisible)); 1910 1911 /* Information unrelated to this particular run */ 1912 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1913 PetscTime(&y); 1914 PetscTime(&x); 1915 PetscTime(&y); 1916 PetscTime(&y); 1917 PetscTime(&y); 1918 PetscTime(&y); 1919 PetscTime(&y); 1920 PetscTime(&y); 1921 PetscTime(&y); 1922 PetscTime(&y); 1923 PetscTime(&y); 1924 PetscTime(&y); 1925 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 1926 /* MPI information */ 1927 if (size > 1) { 1928 MPI_Status status; 1929 PetscMPIInt tag; 1930 MPI_Comm newcomm; 1931 1932 PetscCallMPI(MPI_Barrier(comm)); 1933 PetscTime(&x); 1934 PetscCallMPI(MPI_Barrier(comm)); 1935 PetscCallMPI(MPI_Barrier(comm)); 1936 PetscCallMPI(MPI_Barrier(comm)); 1937 PetscCallMPI(MPI_Barrier(comm)); 1938 PetscCallMPI(MPI_Barrier(comm)); 1939 PetscTime(&y); 1940 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 1941 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 1942 PetscCallMPI(MPI_Barrier(comm)); 1943 if (rank) { 1944 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 1945 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 1946 } else { 1947 PetscTime(&x); 1948 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 1949 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 1950 PetscTime(&y); 1951 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 1952 } 1953 PetscCall(PetscCommDestroy(&newcomm)); 1954 } 1955 PetscCall(PetscOptionsView(NULL, viewer)); 1956 1957 /* Machine and compile information */ 1958 #if defined(PETSC_USE_FORTRAN_KERNELS) 1959 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 1960 #else 1961 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 1962 #endif 1963 #if defined(PETSC_USE_64BIT_INDICES) 1964 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n")); 1965 #elif defined(PETSC_USE___FLOAT128) 1966 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n")); 1967 #endif 1968 #if defined(PETSC_USE_REAL_SINGLE) 1969 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 1970 #elif defined(PETSC_USE___FLOAT128) 1971 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 1972 #endif 1973 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1974 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 1975 #else 1976 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 1977 #endif 1978 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 1979 1980 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 1981 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 1982 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 1983 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 1984 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 1985 1986 /* Cleanup */ 1987 PetscCall(PetscFPrintf(comm, fd, "\n")); 1988 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1989 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1990 PetscCall(PetscFPTrapPop()); 1991 PetscFunctionReturn(0); 1992 } 1993 1994 /*@C 1995 PetscLogView - Prints a summary of the logging. 1996 1997 Collective over MPI_Comm 1998 1999 Input Parameter: 2000 . viewer - an ASCII viewer 2001 2002 Options Database Keys: 2003 + -log_view [:filename] - Prints summary of log information 2004 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2005 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2006 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2007 . -log_view_memory - Also display memory usage in each event 2008 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2009 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2010 - -log_trace [filename] - Displays a trace of what each process is doing 2011 2012 Notes: 2013 It is possible to control the logging programatically but we recommend using the options database approach whenever possible 2014 By default the summary is printed to stdout. 2015 2016 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2017 2018 If PETSc is configured with --with-logging=0 then this functionality is not available 2019 2020 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2021 directory then open filename.xml with your browser. Specific notes for certain browsers 2022 $ Firefox and Internet explorer - simply open the file 2023 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2024 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2025 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2026 your browser. 2027 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2028 window and render the XML log file contents. 2029 2030 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2031 2032 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2033 or using speedscope (https://www.speedscope.app). 2034 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2035 2036 Level: beginner 2037 2038 .seealso: `PetscLogDefaultBegin()`, `PetscLogDump()` 2039 @*/ 2040 PetscErrorCode PetscLogView(PetscViewer viewer) { 2041 PetscBool isascii; 2042 PetscViewerFormat format; 2043 int stage, lastStage; 2044 PetscStageLog stageLog; 2045 2046 PetscFunctionBegin; 2047 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2048 /* Pop off any stages the user forgot to remove */ 2049 lastStage = 0; 2050 PetscCall(PetscLogGetStageLog(&stageLog)); 2051 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2052 while (stage >= 0) { 2053 lastStage = stage; 2054 PetscCall(PetscStageLogPop(stageLog)); 2055 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2056 } 2057 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2058 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2059 PetscCall(PetscViewerGetFormat(viewer, &format)); 2060 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2061 PetscCall(PetscLogView_Default(viewer)); 2062 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2063 PetscCall(PetscLogView_Detailed(viewer)); 2064 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2065 PetscCall(PetscLogView_CSV(viewer)); 2066 } else if (format == PETSC_VIEWER_ASCII_XML) { 2067 PetscCall(PetscLogView_Nested(viewer)); 2068 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2069 PetscCall(PetscLogView_Flamegraph(viewer)); 2070 } 2071 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 /*@C 2076 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2077 2078 Collective on `PETSC_COMM_WORLD` 2079 2080 Level: developer 2081 2082 .seealso: `PetscLogView()` 2083 @*/ 2084 PetscErrorCode PetscLogViewFromOptions(void) { 2085 PetscViewer viewer; 2086 PetscBool flg; 2087 PetscViewerFormat format; 2088 2089 PetscFunctionBegin; 2090 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2091 if (flg) { 2092 PetscCall(PetscViewerPushFormat(viewer, format)); 2093 PetscCall(PetscLogView(viewer)); 2094 PetscCall(PetscViewerPopFormat(viewer)); 2095 PetscCall(PetscViewerDestroy(&viewer)); 2096 } 2097 PetscFunctionReturn(0); 2098 } 2099 2100 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2101 /*@C 2102 PetscGetFlops - Returns the number of flops used on this processor 2103 since the program began. 2104 2105 Not Collective 2106 2107 Output Parameter: 2108 flops - number of floating point operations 2109 2110 Notes: 2111 A global counter logs all PETSc flop counts. The user can use 2112 `PetscLogFlops()` to increment this counter to include flops for the 2113 application code. 2114 2115 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2116 2117 Level: intermediate 2118 2119 .seealso: `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2120 @*/ 2121 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) { 2122 PetscFunctionBegin; 2123 *flops = petsc_TotalFlops; 2124 PetscFunctionReturn(0); 2125 } 2126 2127 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) { 2128 size_t fullLength; 2129 va_list Argp; 2130 2131 PetscFunctionBegin; 2132 if (!petsc_logObjects) PetscFunctionReturn(0); 2133 va_start(Argp, format); 2134 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2135 va_end(Argp); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /*MC 2140 PetscLogFlops - Adds floating point operations to the global counter. 2141 2142 Synopsis: 2143 #include <petsclog.h> 2144 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2145 2146 Not Collective 2147 2148 Input Parameter: 2149 . f - flop counter 2150 2151 Usage: 2152 .vb 2153 PetscLogEvent USER_EVENT; 2154 PetscLogEventRegister("User event",0,&USER_EVENT); 2155 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2156 [code segment to monitor] 2157 PetscLogFlops(user_flops) 2158 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2159 .ve 2160 2161 Note: 2162 A global counter logs all PETSc flop counts. The user can use 2163 PetscLogFlops() to increment this counter to include flops for the 2164 application code. 2165 2166 Level: intermediate 2167 2168 .seealso: `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2169 M*/ 2170 2171 /*MC 2172 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2173 to get accurate timings 2174 2175 Synopsis: 2176 #include <petsclog.h> 2177 void PetscPreLoadBegin(PetscBool flag,char *name); 2178 2179 Not Collective 2180 2181 Input Parameters: 2182 + flag - PETSC_TRUE to run twice, `PETSC_FALSE` to run once, may be overridden 2183 with command line option -preload true or -preload false 2184 - name - name of first stage (lines of code timed separately with -log_view) to 2185 be preloaded 2186 2187 Usage: 2188 .vb 2189 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2190 lines of code 2191 PetscPreLoadStage("second stage"); 2192 lines of code 2193 PetscPreLoadEnd(); 2194 .ve 2195 2196 Note: 2197 Only works in C/C++, not Fortran 2198 2199 Flags available within the macro. 2200 + PetscPreLoadingUsed - true if we are or have done preloading 2201 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2202 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2203 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2204 The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin() 2205 and PetscPreLoadEnd() 2206 2207 Level: intermediate 2208 2209 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2210 M*/ 2211 2212 /*MC 2213 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2214 to get accurate timings 2215 2216 Synopsis: 2217 #include <petsclog.h> 2218 void PetscPreLoadEnd(void); 2219 2220 Not Collective 2221 2222 Usage: 2223 .vb 2224 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2225 lines of code 2226 PetscPreLoadStage("second stage"); 2227 lines of code 2228 PetscPreLoadEnd(); 2229 .ve 2230 2231 Note: 2232 Only works in C/C++ not fortran 2233 2234 Level: intermediate 2235 2236 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2237 M*/ 2238 2239 /*MC 2240 PetscPreLoadStage - Start a new segment of code to be timed separately. 2241 to get accurate timings 2242 2243 Synopsis: 2244 #include <petsclog.h> 2245 void PetscPreLoadStage(char *name); 2246 2247 Not Collective 2248 2249 Usage: 2250 .vb 2251 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2252 lines of code 2253 PetscPreLoadStage("second stage"); 2254 lines of code 2255 PetscPreLoadEnd(); 2256 .ve 2257 2258 Note: 2259 Only works in C/C++ not fortran 2260 2261 Level: intermediate 2262 2263 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2264 M*/ 2265 2266 #if PetscDefined(HAVE_DEVICE) 2267 #include <petsc/private/deviceimpl.h> 2268 2269 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2270 2271 /* 2272 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2273 because it will result in timing results that cannot be interpreted. 2274 */ 2275 static PetscErrorCode PetscLogGpuTime_Off(void) { 2276 PetscLogGpuTimeFlag = PETSC_FALSE; 2277 return 0; 2278 } 2279 2280 /*@C 2281 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2282 2283 Options Database Key: 2284 . -log_view_gpu_time - provide the GPU times in the -log_view output 2285 2286 Notes: 2287 Turning on the timing of the 2288 GPU kernels can slow down the entire computation and should only be used when studying the performance 2289 of operations on GPU such as vector operations and matrix-vector operations. 2290 2291 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2292 2293 Level: advanced 2294 2295 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2296 @*/ 2297 PetscErrorCode PetscLogGpuTime(void) { 2298 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2299 PetscLogGpuTimeFlag = PETSC_TRUE; 2300 return 0; 2301 } 2302 2303 /*@C 2304 PetscLogGpuTimeBegin - Start timer for device 2305 2306 Notes: 2307 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2308 2309 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2310 2311 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()` 2312 2313 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2314 2315 The regular logging captures the time for data transfers and any CPU activites during the event 2316 2317 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2318 2319 Developer Notes: 2320 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2321 2322 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the 2323 event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution, 2324 but not continued GPU excution, until the timer event is recorded. 2325 2326 Level: intermediate 2327 2328 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2329 @*/ 2330 PetscErrorCode PetscLogGpuTimeBegin(void) { 2331 PetscFunctionBegin; 2332 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2333 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2334 PetscDeviceContext dctx; 2335 2336 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2337 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2338 } else { 2339 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2340 } 2341 PetscFunctionReturn(0); 2342 } 2343 2344 /*@C 2345 PetscLogGpuTimeEnd - Stop timer for device 2346 2347 Level: intermediate 2348 2349 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2350 @*/ 2351 PetscErrorCode PetscLogGpuTimeEnd(void) { 2352 PetscFunctionBegin; 2353 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2354 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2355 PetscDeviceContext dctx; 2356 PetscLogDouble elapsed; 2357 2358 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2359 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2360 petsc_gtime += (elapsed / 1000.0); 2361 } else { 2362 PetscCall(PetscTimeAdd(&petsc_gtime)); 2363 } 2364 PetscFunctionReturn(0); 2365 } 2366 #endif /* end of PETSC_HAVE_DEVICE */ 2367 2368 #else /* end of -DPETSC_USE_LOG section */ 2369 2370 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) { 2371 PetscFunctionBegin; 2372 PetscFunctionReturn(0); 2373 } 2374 2375 #endif /* PETSC_USE_LOG*/ 2376 2377 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2378 PetscClassId PETSC_OBJECT_CLASSID = 0; 2379 2380 /*@C 2381 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2382 2383 Not Collective 2384 2385 Input Parameter: 2386 . name - The class name 2387 2388 Output Parameter: 2389 . oclass - The class id or classid 2390 2391 Level: developer 2392 2393 .seealso: `PetscLogEventRegister()` 2394 @*/ 2395 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) { 2396 #if defined(PETSC_USE_LOG) 2397 PetscStageLog stageLog; 2398 PetscInt stage; 2399 #endif 2400 2401 PetscFunctionBegin; 2402 *oclass = ++PETSC_LARGEST_CLASSID; 2403 #if defined(PETSC_USE_LOG) 2404 PetscCall(PetscLogGetStageLog(&stageLog)); 2405 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2406 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2407 #endif 2408 PetscFunctionReturn(0); 2409 } 2410 2411 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2412 #include <mpe.h> 2413 2414 PetscBool PetscBeganMPE = PETSC_FALSE; 2415 2416 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2417 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2418 2419 /*@C 2420 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2421 and slows the program down. 2422 2423 Collective over `PETSC_COMM_WORLD` 2424 2425 Options Database Key: 2426 . -log_mpe - Prints extensive log information 2427 2428 Note: 2429 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 2430 intended for production runs since it logs only flop rates and object 2431 creation (and should not significantly slow the programs). 2432 2433 Level: advanced 2434 2435 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2436 `PetscLogEventDeactivate()` 2437 @*/ 2438 PetscErrorCode PetscLogMPEBegin(void) { 2439 PetscFunctionBegin; 2440 /* Do MPE initialization */ 2441 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2442 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2443 PetscCall(MPE_Init_log()); 2444 2445 PetscBeganMPE = PETSC_TRUE; 2446 } else { 2447 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2448 } 2449 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2450 PetscFunctionReturn(0); 2451 } 2452 2453 /*@C 2454 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2455 2456 Collective over `PETSC_COMM_WORLD` 2457 2458 Level: advanced 2459 2460 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2461 @*/ 2462 PetscErrorCode PetscLogMPEDump(const char sname[]) { 2463 char name[PETSC_MAX_PATH_LEN]; 2464 2465 PetscFunctionBegin; 2466 if (PetscBeganMPE) { 2467 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2468 if (sname) { 2469 PetscCall(PetscStrcpy(name, sname)); 2470 } else { 2471 PetscCall(PetscGetProgramName(name, sizeof(name))); 2472 } 2473 PetscCall(MPE_Finish_log(name)); 2474 } else { 2475 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2476 } 2477 PetscFunctionReturn(0); 2478 } 2479 2480 #define PETSC_RGB_COLORS_MAX 39 2481 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2482 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2483 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2484 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2485 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2486 2487 /*@C 2488 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2489 2490 Not collective. Maybe it should be? 2491 2492 Output Parameter: 2493 . str - character string representing the color 2494 2495 Level: developer 2496 2497 .seealso: `PetscLogEventRegister()` 2498 @*/ 2499 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) { 2500 static int idx = 0; 2501 2502 PetscFunctionBegin; 2503 *str = PetscLogMPERGBColors[idx]; 2504 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2505 PetscFunctionReturn(0); 2506 } 2507 2508 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2509