1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 15 /* this is not consistently used and is difficult to keep correct if done manually, possibly it should be removed */ 16 PetscErrorCode PetscLogObjectParent(PetscObject p, PetscObject c) { 17 if (!c || !p) return 0; 18 c->parent = p; 19 c->parentid = p->id; 20 return 0; 21 } 22 23 /*@C 24 PetscLogObjectMemory - Adds to an object a count of additional amount of memory that is used by the object. 25 26 Not collective. 27 28 Input Parameters: 29 + obj - the PETSc object 30 - mem - the amount of memory that is being added to the object 31 32 Level: developer 33 34 Developer Note: 35 This is not used consistently. It is very difficult to manually track the memory usage per object so this should 36 likely be removed and replaced with an automated system. 37 38 .seealso: `PetscFinalize()`, `PetscInitializeFortran()`, `PetscGetArgs()`, `PetscInitializeNoArguments()` 39 @*/ 40 PetscErrorCode PetscLogObjectMemory(PetscObject p, PetscLogDouble m) { 41 if (!p) return 0; 42 p->mem += m; 43 return 0; 44 } 45 46 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 47 48 #if defined(PETSC_USE_LOG) 49 #include <petscmachineinfo.h> 50 #include <petscconfiginfo.h> 51 52 /* used in the MPI_XXX() count macros in petsclog.h */ 53 54 /* Action and object logging variables */ 55 Action *petsc_actions = NULL; 56 Object *petsc_objects = NULL; 57 PetscBool petsc_logActions = PETSC_FALSE; 58 PetscBool petsc_logObjects = PETSC_FALSE; 59 int petsc_numActions = 0, petsc_maxActions = 100; 60 int petsc_numObjects = 0, petsc_maxObjects = 100; 61 int petsc_numObjectsDestroyed = 0; 62 63 /* Global counters */ 64 PetscLogDouble petsc_BaseTime = 0.0; 65 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 66 PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */ 67 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 68 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 69 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 70 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 71 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 72 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 73 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 74 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 75 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 76 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 77 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 78 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 79 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 80 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 81 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 82 #if defined(PETSC_HAVE_DEVICE) 83 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 84 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 85 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 86 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 87 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 88 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 89 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 90 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 91 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 92 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 93 #endif 94 95 /* Logging functions */ 96 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 97 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 98 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 99 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 100 101 /* Tracing event logging variables */ 102 FILE *petsc_tracefile = NULL; 103 int petsc_tracelevel = 0; 104 const char *petsc_traceblanks = " "; 105 char petsc_tracespace[128] = " "; 106 PetscLogDouble petsc_tracetime = 0.0; 107 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 108 109 static PetscIntStack current_log_event_stack = NULL; 110 111 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) { 112 int stage; 113 PetscBool opt; 114 115 PetscFunctionBegin; 116 if (PetscLogInitializeCalled) PetscFunctionReturn(0); 117 PetscLogInitializeCalled = PETSC_TRUE; 118 119 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 120 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 121 if (opt) petsc_logActions = PETSC_FALSE; 122 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 123 if (opt) petsc_logObjects = PETSC_FALSE; 124 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 125 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 126 PetscLogPHC = PetscLogObjCreateDefault; 127 PetscLogPHD = PetscLogObjDestroyDefault; 128 /* Setup default logging structures */ 129 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 130 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 131 132 /* All processors sync here for more consistent logging */ 133 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 134 PetscTime(&petsc_BaseTime); 135 PetscCall(PetscLogStagePush(stage)); 136 PetscFunctionReturn(0); 137 } 138 139 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) { 140 PetscStageLog stageLog; 141 142 PetscFunctionBegin; 143 PetscCall(PetscFree(petsc_actions)); 144 PetscCall(PetscFree(petsc_objects)); 145 PetscCall(PetscLogNestedEnd()); 146 PetscCall(PetscLogSet(NULL, NULL)); 147 148 /* Resetting phase */ 149 PetscCall(PetscLogGetStageLog(&stageLog)); 150 PetscCall(PetscStageLogDestroy(stageLog)); 151 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 152 current_log_event_stack = NULL; 153 154 petsc_TotalFlops = 0.0; 155 petsc_numActions = 0; 156 petsc_numObjects = 0; 157 petsc_numObjectsDestroyed = 0; 158 petsc_maxActions = 100; 159 petsc_maxObjects = 100; 160 petsc_actions = NULL; 161 petsc_objects = NULL; 162 petsc_logActions = PETSC_FALSE; 163 petsc_logObjects = PETSC_FALSE; 164 petsc_BaseTime = 0.0; 165 petsc_TotalFlops = 0.0; 166 petsc_tmp_flops = 0.0; 167 petsc_send_ct = 0.0; 168 petsc_recv_ct = 0.0; 169 petsc_send_len = 0.0; 170 petsc_recv_len = 0.0; 171 petsc_isend_ct = 0.0; 172 petsc_irecv_ct = 0.0; 173 petsc_isend_len = 0.0; 174 petsc_irecv_len = 0.0; 175 petsc_wait_ct = 0.0; 176 petsc_wait_any_ct = 0.0; 177 petsc_wait_all_ct = 0.0; 178 petsc_sum_of_waits_ct = 0.0; 179 petsc_allreduce_ct = 0.0; 180 petsc_gather_ct = 0.0; 181 petsc_scatter_ct = 0.0; 182 #if defined(PETSC_HAVE_DEVICE) 183 petsc_ctog_ct = 0.0; 184 petsc_gtoc_ct = 0.0; 185 petsc_ctog_sz = 0.0; 186 petsc_gtoc_sz = 0.0; 187 petsc_gflops = 0.0; 188 petsc_gtime = 0.0; 189 #endif 190 PETSC_LARGEST_EVENT = PETSC_EVENT; 191 PetscLogPHC = NULL; 192 PetscLogPHD = NULL; 193 petsc_tracefile = NULL; 194 petsc_tracelevel = 0; 195 petsc_traceblanks = " "; 196 petsc_tracespace[0] = ' '; 197 petsc_tracespace[1] = 0; 198 petsc_tracetime = 0.0; 199 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 200 PETSC_OBJECT_CLASSID = 0; 201 petsc_stageLog = NULL; 202 PetscLogInitializeCalled = PETSC_FALSE; 203 PetscFunctionReturn(0); 204 } 205 206 /*@C 207 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 208 209 Not Collective 210 211 Input Parameters: 212 + b - The function called at beginning of event 213 - e - The function called at end of event 214 215 Level: developer 216 217 Developer Note: 218 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 219 220 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 221 @*/ 222 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) { 223 PetscFunctionBegin; 224 PetscLogPLB = b; 225 PetscLogPLE = e; 226 PetscFunctionReturn(0); 227 } 228 229 /*@C 230 PetscLogIsActive - Check if logging is currently in progress. 231 232 Not Collective 233 234 Output Parameter: 235 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 236 237 Level: beginner 238 239 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 240 @*/ 241 PetscErrorCode PetscLogIsActive(PetscBool *isActive) { 242 PetscFunctionBegin; 243 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 244 PetscFunctionReturn(0); 245 } 246 247 /*@C 248 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 249 rates and object creation and should not slow programs down too much. 250 This routine may be called more than once. 251 252 Logically Collective over `PETSC_COMM_WORLD` 253 254 Options Database Key: 255 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 256 screen (for code configured with --with-log=1 (which is the default)) 257 258 Usage: 259 .vb 260 PetscInitialize(...); 261 PetscLogDefaultBegin(); 262 ... code ... 263 PetscLogView(viewer); or PetscLogDump(); 264 PetscFinalize(); 265 .ve 266 267 Note: 268 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 269 the logging information. 270 271 Level: advanced 272 273 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 274 @*/ 275 PetscErrorCode PetscLogDefaultBegin(void) { 276 PetscFunctionBegin; 277 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 278 PetscFunctionReturn(0); 279 } 280 281 /*@C 282 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 283 all events. This creates large log files and slows the program down. 284 285 Logically Collective on `PETSC_COMM_WORLD` 286 287 Options Database Key: 288 . -log_all - Prints extensive log information 289 290 Usage: 291 .vb 292 PetscInitialize(...); 293 PetscLogAllBegin(); 294 ... code ... 295 PetscLogDump(filename); 296 PetscFinalize(); 297 .ve 298 299 Note: 300 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 301 intended for production runs since it logs only flop rates and object 302 creation (and shouldn't significantly slow the programs). 303 304 Level: advanced 305 306 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 307 @*/ 308 PetscErrorCode PetscLogAllBegin(void) { 309 PetscFunctionBegin; 310 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 311 PetscFunctionReturn(0); 312 } 313 314 /*@C 315 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 316 begins or ends, the event name is printed. 317 318 Logically Collective on `PETSC_COMM_WORLD` 319 320 Input Parameter: 321 . file - The file to print trace in (e.g. stdout) 322 323 Options Database Key: 324 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 325 326 Notes: 327 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 328 then "Event begin:" or "Event end:" followed by the event name. 329 330 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 331 to determine where a program is hanging without running in the 332 debugger. Can be used in conjunction with the -info option. 333 334 Level: intermediate 335 336 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 337 @*/ 338 PetscErrorCode PetscLogTraceBegin(FILE *file) { 339 PetscFunctionBegin; 340 petsc_tracefile = file; 341 342 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 343 PetscFunctionReturn(0); 344 } 345 346 /*@ 347 PetscLogActions - Determines whether actions are logged for the graphical viewer. 348 349 Not Collective 350 351 Input Parameter: 352 . flag - `PETSC_TRUE` if actions are to be logged 353 354 Options Database Key: 355 . -log_exclude_actions - Turns off actions logging 356 357 Level: intermediate 358 359 Note: 360 Logging of actions continues to consume more memory as the program 361 runs. Long running programs should consider turning this feature off. 362 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 363 @*/ 364 PetscErrorCode PetscLogActions(PetscBool flag) { 365 PetscFunctionBegin; 366 petsc_logActions = flag; 367 PetscFunctionReturn(0); 368 } 369 370 /*@ 371 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 372 373 Not Collective 374 375 Input Parameter: 376 . flag - `PETSC_TRUE` if objects are to be logged 377 378 Options Database Key: 379 . -log_exclude_objects - Turns off objects logging 380 381 Level: intermediate 382 383 Note: 384 Logging of objects continues to consume more memory as the program 385 runs. Long running programs should consider turning this feature off. 386 387 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 388 @*/ 389 PetscErrorCode PetscLogObjects(PetscBool flag) { 390 PetscFunctionBegin; 391 petsc_logObjects = flag; 392 PetscFunctionReturn(0); 393 } 394 395 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 396 /*@C 397 PetscLogStageRegister - Attaches a character string name to a logging stage. 398 399 Not Collective 400 401 Input Parameter: 402 . sname - The name to associate with that stage 403 404 Output Parameter: 405 . stage - The stage number 406 407 Level: intermediate 408 409 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 410 @*/ 411 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) { 412 PetscStageLog stageLog; 413 PetscLogEvent event; 414 415 PetscFunctionBegin; 416 PetscCall(PetscLogGetStageLog(&stageLog)); 417 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 418 /* Copy events already changed in the main stage, this sucks */ 419 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 420 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 421 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 422 PetscFunctionReturn(0); 423 } 424 425 /*@C 426 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 427 428 Not Collective 429 430 Input Parameter: 431 . stage - The stage on which to log 432 433 Usage: 434 If the option -log_view is used to run the program containing the 435 following code, then 2 sets of summary data will be printed during 436 PetscFinalize(). 437 .vb 438 PetscInitialize(int *argc,char ***args,0,0); 439 [stage 0 of code] 440 PetscLogStagePush(1); 441 [stage 1 of code] 442 PetscLogStagePop(); 443 PetscBarrier(...); 444 [more stage 0 of code] 445 PetscFinalize(); 446 .ve 447 448 Note: 449 Use `PetscLogStageRegister()` to register a stage. 450 451 Level: intermediate 452 453 .seealso: `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 454 @*/ 455 PetscErrorCode PetscLogStagePush(PetscLogStage stage) { 456 PetscStageLog stageLog; 457 458 PetscFunctionBegin; 459 PetscCall(PetscLogGetStageLog(&stageLog)); 460 PetscCall(PetscStageLogPush(stageLog, stage)); 461 PetscFunctionReturn(0); 462 } 463 464 /*@C 465 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 466 467 Not Collective 468 469 Usage: 470 If the option -log_view is used to run the program containing the 471 following code, then 2 sets of summary data will be printed during 472 PetscFinalize(). 473 .vb 474 PetscInitialize(int *argc,char ***args,0,0); 475 [stage 0 of code] 476 PetscLogStagePush(1); 477 [stage 1 of code] 478 PetscLogStagePop(); 479 PetscBarrier(...); 480 [more stage 0 of code] 481 PetscFinalize(); 482 .ve 483 484 Level: intermediate 485 486 .seealso: `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 487 @*/ 488 PetscErrorCode PetscLogStagePop(void) { 489 PetscStageLog stageLog; 490 491 PetscFunctionBegin; 492 PetscCall(PetscLogGetStageLog(&stageLog)); 493 PetscCall(PetscStageLogPop(stageLog)); 494 PetscFunctionReturn(0); 495 } 496 497 /*@ 498 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 499 500 Not Collective 501 502 Input Parameters: 503 + stage - The stage 504 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 505 506 Level: intermediate 507 508 Note: 509 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 510 511 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 512 @*/ 513 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) { 514 PetscStageLog stageLog; 515 516 PetscFunctionBegin; 517 PetscCall(PetscLogGetStageLog(&stageLog)); 518 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 519 PetscFunctionReturn(0); 520 } 521 522 /*@ 523 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 524 525 Not Collective 526 527 Input Parameter: 528 . stage - The stage 529 530 Output Parameter: 531 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 532 533 Level: intermediate 534 535 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 536 @*/ 537 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) { 538 PetscStageLog stageLog; 539 540 PetscFunctionBegin; 541 PetscCall(PetscLogGetStageLog(&stageLog)); 542 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 543 PetscFunctionReturn(0); 544 } 545 546 /*@ 547 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 548 549 Not Collective 550 551 Input Parameters: 552 + stage - The stage 553 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 554 555 Level: intermediate 556 557 Developer Note: 558 What does visible mean, needs to be documented. 559 560 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 561 @*/ 562 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) { 563 PetscStageLog stageLog; 564 565 PetscFunctionBegin; 566 PetscCall(PetscLogGetStageLog(&stageLog)); 567 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 568 PetscFunctionReturn(0); 569 } 570 571 /*@ 572 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 573 574 Not Collective 575 576 Input Parameter: 577 . stage - The stage 578 579 Output Parameter: 580 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 581 582 Level: intermediate 583 584 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 585 @*/ 586 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) { 587 PetscStageLog stageLog; 588 589 PetscFunctionBegin; 590 PetscCall(PetscLogGetStageLog(&stageLog)); 591 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 592 PetscFunctionReturn(0); 593 } 594 595 /*@C 596 PetscLogStageGetId - Returns the stage id when given the stage name. 597 598 Not Collective 599 600 Input Parameter: 601 . name - The stage name 602 603 Output Parameter: 604 . stage - The stage, , or -1 if no stage with that name exists 605 606 Level: intermediate 607 608 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 609 @*/ 610 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) { 611 PetscStageLog stageLog; 612 613 PetscFunctionBegin; 614 PetscCall(PetscLogGetStageLog(&stageLog)); 615 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 616 PetscFunctionReturn(0); 617 } 618 619 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 620 621 /*@C 622 PetscLogEventRegister - Registers an event name for logging operations 623 624 Not Collective 625 626 Input Parameters: 627 + name - The name associated with the event 628 - classid - The classid associated to the class for this event, obtain either with 629 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 630 are only available in C code 631 632 Output Parameter: 633 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 634 635 Example of Usage: 636 .vb 637 PetscLogEvent USER_EVENT; 638 PetscClassId classid; 639 PetscLogDouble user_event_flops; 640 PetscClassIdRegister("class name",&classid); 641 PetscLogEventRegister("User event name",classid,&USER_EVENT); 642 PetscLogEventBegin(USER_EVENT,0,0,0,0); 643 [code segment to monitor] 644 PetscLogFlops(user_event_flops); 645 PetscLogEventEnd(USER_EVENT,0,0,0,0); 646 .ve 647 648 Notes: 649 PETSc automatically logs library events if the code has been 650 configured with --with-log (which is the default) and 651 -log_view or -log_all is specified. `PetscLogEventRegister()` is 652 intended for logging user events to supplement this PETSc 653 information. 654 655 PETSc can gather data for use with the utilities Jumpshot 656 (part of the MPICH distribution). If PETSc has been compiled 657 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 658 MPICH), the user can employ another command line option, -log_mpe, 659 to create a logfile, "mpe.log", which can be visualized 660 Jumpshot. 661 662 The classid is associated with each event so that classes of events 663 can be disabled simultaneously, such as all matrix events. The user 664 can either use an existing classid, such as `MAT_CLASSID`, or create 665 their own as shown in the example. 666 667 If an existing event with the same name exists, its event handle is 668 returned instead of creating a new event. 669 670 Level: intermediate 671 672 .seealso: `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 673 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 674 @*/ 675 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) { 676 PetscStageLog stageLog; 677 int stage; 678 679 PetscFunctionBegin; 680 *event = PETSC_DECIDE; 681 PetscCall(PetscLogGetStageLog(&stageLog)); 682 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 683 if (*event > 0) PetscFunctionReturn(0); 684 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 685 for (stage = 0; stage < stageLog->numStages; stage++) { 686 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 687 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 688 } 689 PetscFunctionReturn(0); 690 } 691 692 /*@ 693 PetscLogEventSetCollective - Indicates that a particular event is collective. 694 695 Not Collective 696 697 Input Parameters: 698 + event - The event id 699 - collective - Bolean flag indicating whether a particular event is collective 700 701 Notes: 702 New events returned from `PetscLogEventRegister()` are collective by default. 703 704 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 705 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 706 to be performed. This option is useful to debug imbalance within the computations or communications 707 708 Level: developer 709 710 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 711 @*/ 712 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) { 713 PetscStageLog stageLog; 714 PetscEventRegLog eventRegLog; 715 716 PetscFunctionBegin; 717 PetscCall(PetscLogGetStageLog(&stageLog)); 718 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 719 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 720 eventRegLog->eventInfo[event].collective = collective; 721 PetscFunctionReturn(0); 722 } 723 724 /*@ 725 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 726 727 Not Collective 728 729 Input Parameter: 730 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 731 732 Level: developer 733 734 .seealso: `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 735 @*/ 736 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) { 737 PetscStageLog stageLog; 738 int stage; 739 740 PetscFunctionBegin; 741 PetscCall(PetscLogGetStageLog(&stageLog)); 742 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 743 PetscFunctionReturn(0); 744 } 745 746 /*@ 747 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 748 749 Not Collective 750 751 Input Parameter: 752 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 753 754 Level: developer 755 756 Note: 757 If a class is excluded then events associated with that class are not logged. 758 759 .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 760 @*/ 761 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) { 762 PetscStageLog stageLog; 763 int stage; 764 765 PetscFunctionBegin; 766 PetscCall(PetscLogGetStageLog(&stageLog)); 767 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 768 PetscFunctionReturn(0); 769 } 770 771 /*@ 772 PetscLogEventActivate - Indicates that a particular event should be logged. 773 774 Not Collective 775 776 Input Parameter: 777 . event - The event id 778 779 Usage: 780 .vb 781 PetscLogEventDeactivate(VEC_SetValues); 782 [code where you do not want to log VecSetValues()] 783 PetscLogEventActivate(VEC_SetValues); 784 [code where you do want to log VecSetValues()] 785 .ve 786 787 Note: 788 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 789 or an event number obtained with `PetscLogEventRegister()`. 790 791 Level: advanced 792 793 .seealso: `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 794 @*/ 795 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) { 796 PetscStageLog stageLog; 797 int stage; 798 799 PetscFunctionBegin; 800 PetscCall(PetscLogGetStageLog(&stageLog)); 801 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 802 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 803 PetscFunctionReturn(0); 804 } 805 806 /*@ 807 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 808 809 Not Collective 810 811 Input Parameter: 812 . event - The event id 813 814 Usage: 815 .vb 816 PetscLogEventDeactivate(VEC_SetValues); 817 [code where you do not want to log VecSetValues()] 818 PetscLogEventActivate(VEC_SetValues); 819 [code where you do want to log VecSetValues()] 820 .ve 821 822 Note: 823 The event may be either a pre-defined PETSc event (found in 824 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 825 826 Level: advanced 827 828 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 829 @*/ 830 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) { 831 PetscStageLog stageLog; 832 int stage; 833 834 PetscFunctionBegin; 835 PetscCall(PetscLogGetStageLog(&stageLog)); 836 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 837 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 838 PetscFunctionReturn(0); 839 } 840 841 /*@ 842 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 843 844 Not Collective 845 846 Input Parameter: 847 . event - The event id 848 849 Usage: 850 .vb 851 PetscLogEventDeactivatePush(VEC_SetValues); 852 [code where you do not want to log VecSetValues()] 853 PetscLogEventDeactivatePop(VEC_SetValues); 854 [code where you do want to log VecSetValues()] 855 .ve 856 857 Note: 858 The event may be either a pre-defined PETSc event (found in 859 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 860 861 Level: advanced 862 863 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 864 @*/ 865 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) { 866 PetscStageLog stageLog; 867 int stage; 868 869 PetscFunctionBegin; 870 PetscCall(PetscLogGetStageLog(&stageLog)); 871 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 872 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 873 PetscFunctionReturn(0); 874 } 875 876 /*@ 877 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 878 879 Not Collective 880 881 Input Parameter: 882 . event - The event id 883 884 Usage: 885 .vb 886 PetscLogEventDeactivatePush(VEC_SetValues); 887 [code where you do not want to log VecSetValues()] 888 PetscLogEventDeactivatePop(VEC_SetValues); 889 [code where you do want to log VecSetValues()] 890 .ve 891 892 Note: 893 The event may be either a pre-defined PETSc event (found in 894 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 895 896 Level: advanced 897 898 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 899 @*/ 900 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) { 901 PetscStageLog stageLog; 902 int stage; 903 904 PetscFunctionBegin; 905 PetscCall(PetscLogGetStageLog(&stageLog)); 906 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 907 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 908 PetscFunctionReturn(0); 909 } 910 911 /*@ 912 PetscLogEventSetActiveAll - Turns on logging of all events 913 914 Not Collective 915 916 Input Parameters: 917 + event - The event id 918 - isActive - The activity flag determining whether the event is logged 919 920 Level: advanced 921 922 .seealso: `PlogEventActivate()`, `PlogEventDeactivate()` 923 @*/ 924 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) { 925 PetscStageLog stageLog; 926 int stage; 927 928 PetscFunctionBegin; 929 PetscCall(PetscLogGetStageLog(&stageLog)); 930 for (stage = 0; stage < stageLog->numStages; stage++) { 931 if (isActive) { 932 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 933 } else { 934 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 935 } 936 } 937 PetscFunctionReturn(0); 938 } 939 940 /*@ 941 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 942 943 Not Collective 944 945 Input Parameter: 946 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 947 948 Level: developer 949 950 .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 951 @*/ 952 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) { 953 PetscStageLog stageLog; 954 int stage; 955 956 PetscFunctionBegin; 957 PetscCall(PetscLogGetStageLog(&stageLog)); 958 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 959 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 960 PetscFunctionReturn(0); 961 } 962 963 /*@ 964 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 965 966 Not Collective 967 968 Input Parameter: 969 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 970 971 Level: developer 972 973 .seealso: `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`,`PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 974 @*/ 975 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) { 976 PetscStageLog stageLog; 977 int stage; 978 979 PetscFunctionBegin; 980 PetscCall(PetscLogGetStageLog(&stageLog)); 981 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 982 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 983 PetscFunctionReturn(0); 984 } 985 986 /*MC 987 PetscLogEventSync - Synchronizes the beginning of a user event. 988 989 Synopsis: 990 #include <petsclog.h> 991 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 992 993 Collective 994 995 Input Parameters: 996 + e - integer associated with the event obtained from PetscLogEventRegister() 997 - comm - an MPI communicator 998 999 Usage: 1000 .vb 1001 PetscLogEvent USER_EVENT; 1002 PetscLogEventRegister("User event",0,&USER_EVENT); 1003 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1004 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1005 [code segment to monitor] 1006 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1007 .ve 1008 1009 Note: 1010 This routine should be called only if there is not a 1011 `PetscObject` available to pass to `PetscLogEventBegin()`. 1012 1013 Level: developer 1014 1015 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1016 M*/ 1017 1018 /*MC 1019 PetscLogEventBegin - Logs the beginning of a user event. 1020 1021 Synopsis: 1022 #include <petsclog.h> 1023 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1024 1025 Not Collective 1026 1027 Input Parameters: 1028 + e - integer associated with the event obtained from PetscLogEventRegister() 1029 - o1,o2,o3,o4 - objects associated with the event, or 0 1030 1031 Fortran Synopsis: 1032 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1033 1034 Usage: 1035 .vb 1036 PetscLogEvent USER_EVENT; 1037 PetscLogDouble user_event_flops; 1038 PetscLogEventRegister("User event",0,&USER_EVENT); 1039 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1040 [code segment to monitor] 1041 PetscLogFlops(user_event_flops); 1042 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1043 .ve 1044 1045 Developer Note: 1046 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the 1047 errors that occur in the macro directly because other packages that use this macros have used them in their 1048 own functions or methods that do not return error codes and it would be disruptive to change the current 1049 behavior. 1050 1051 Level: intermediate 1052 1053 .seealso: `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1054 M*/ 1055 1056 /*MC 1057 PetscLogEventEnd - Log the end of a user event. 1058 1059 Synopsis: 1060 #include <petsclog.h> 1061 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1062 1063 Not Collective 1064 1065 Input Parameters: 1066 + e - integer associated with the event obtained with PetscLogEventRegister() 1067 - o1,o2,o3,o4 - objects associated with the event, or 0 1068 1069 Fortran Synopsis: 1070 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1071 1072 Usage: 1073 .vb 1074 PetscLogEvent USER_EVENT; 1075 PetscLogDouble user_event_flops; 1076 PetscLogEventRegister("User event",0,&USER_EVENT,); 1077 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1078 [code segment to monitor] 1079 PetscLogFlops(user_event_flops); 1080 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1081 .ve 1082 1083 Level: intermediate 1084 1085 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1086 M*/ 1087 1088 /*@C 1089 PetscLogEventGetId - Returns the event id when given the event name. 1090 1091 Not Collective 1092 1093 Input Parameter: 1094 . name - The event name 1095 1096 Output Parameter: 1097 . event - The event, or -1 if no event with that name exists 1098 1099 Level: intermediate 1100 1101 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1102 @*/ 1103 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) { 1104 PetscStageLog stageLog; 1105 1106 PetscFunctionBegin; 1107 PetscCall(PetscLogGetStageLog(&stageLog)); 1108 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) { 1113 PetscFunctionBegin; 1114 PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) { 1119 PetscFunctionBegin; 1120 PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) { 1125 PetscBool empty; 1126 1127 PetscFunctionBegin; 1128 PetscValidIntPointer(event, 1); 1129 *event = PETSC_DECIDE; 1130 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1131 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) { 1136 PetscFunctionBegin; 1137 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) { 1142 PetscStageLog stageLog; 1143 PetscEventPerfLog eventLog; 1144 int stage; 1145 1146 PetscFunctionBegin; 1147 if (event == PETSC_DECIDE) PetscFunctionReturn(0); 1148 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1149 PetscCall(PetscLogGetStageLog(&stageLog)); 1150 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1151 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1152 eventLog->eventInfo[event].count--; 1153 PetscFunctionReturn(0); 1154 } 1155 1156 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1157 /*@C 1158 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1159 be read by bin/petscview. This program no longer exists. 1160 1161 Collective on `PETSC_COMM_WORLD` 1162 1163 Input Parameter: 1164 . name - an optional file name 1165 1166 Usage: 1167 .vb 1168 PetscInitialize(...); 1169 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1170 ... code ... 1171 PetscLogDump(filename); 1172 PetscFinalize(); 1173 .ve 1174 1175 Note: 1176 The default file name is 1177 $ Log.<rank> 1178 where <rank> is the processor number. If no name is specified, 1179 this file will be used. 1180 1181 Level: advanced 1182 1183 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1184 @*/ 1185 PetscErrorCode PetscLogDump(const char sname[]) { 1186 PetscStageLog stageLog; 1187 PetscEventPerfInfo *eventInfo; 1188 FILE *fd; 1189 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1190 PetscLogDouble flops, _TotalTime; 1191 PetscMPIInt rank; 1192 int action, object, curStage; 1193 PetscLogEvent event; 1194 1195 PetscFunctionBegin; 1196 /* Calculate the total elapsed time */ 1197 PetscTime(&_TotalTime); 1198 _TotalTime -= petsc_BaseTime; 1199 /* Open log file */ 1200 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1201 if (sname && sname[0]) sprintf(file, "%s.%d", sname, rank); 1202 else sprintf(file, "Log.%d", rank); 1203 PetscCall(PetscFixFilename(file, fname)); 1204 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1205 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1206 /* Output totals */ 1207 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1208 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1209 /* Output actions */ 1210 if (petsc_logActions) { 1211 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1212 for (action = 0; action < petsc_numActions; action++) { 1213 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1214 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1215 } 1216 } 1217 /* Output objects */ 1218 if (petsc_logObjects) { 1219 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1220 for (object = 0; object < petsc_numObjects; object++) { 1221 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1222 if (!petsc_objects[object].name[0]) { 1223 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1224 } else { 1225 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1226 } 1227 if (petsc_objects[object].info[0] != 0) { 1228 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1229 } else { 1230 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1231 } 1232 } 1233 } 1234 /* Output events */ 1235 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1236 PetscCall(PetscLogGetStageLog(&stageLog)); 1237 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1238 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1239 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1240 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1241 else flops = 0.0; 1242 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1243 } 1244 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 /* 1249 PetscLogView_Detailed - Each process prints the times for its own events 1250 1251 */ 1252 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) { 1253 PetscStageLog stageLog; 1254 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1255 PetscLogDouble locTotalTime, numRed, maxMem; 1256 int numStages, numEvents, stage, event; 1257 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1258 PetscMPIInt rank, size; 1259 1260 PetscFunctionBegin; 1261 PetscCallMPI(MPI_Comm_size(comm, &size)); 1262 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1263 /* Must preserve reduction count before we go on */ 1264 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1265 /* Get the total elapsed time */ 1266 PetscTime(&locTotalTime); 1267 locTotalTime -= petsc_BaseTime; 1268 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1269 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1270 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1271 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1272 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1273 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1274 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1275 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1276 PetscCall(PetscLogGetStageLog(&stageLog)); 1277 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1278 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1279 for (stage = 0; stage < numStages; stage++) { 1280 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1281 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1282 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1283 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1284 } 1285 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1286 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1287 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1288 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1289 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1291 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1293 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1294 PetscCall(PetscViewerFlush(viewer)); 1295 for (stage = 0; stage < numStages; stage++) { 1296 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1297 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1298 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1299 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1300 for (event = 0; event < numEvents; event++) { 1301 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1302 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1303 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1304 eventInfo->flops)); 1305 if (eventInfo->dof[0] >= 0.) { 1306 PetscInt d, e; 1307 1308 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1309 for (d = 0; d < 8; ++d) { 1310 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1311 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1312 } 1313 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1314 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1315 for (e = 0; e < 8; ++e) { 1316 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1317 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1318 } 1319 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1320 } 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1322 } 1323 } 1324 PetscCall(PetscViewerFlush(viewer)); 1325 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1326 PetscFunctionReturn(0); 1327 } 1328 1329 /* 1330 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1331 */ 1332 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) { 1333 PetscStageLog stageLog; 1334 PetscEventPerfInfo *eventInfo = NULL; 1335 PetscLogDouble locTotalTime, maxMem; 1336 int numStages, numEvents, stage, event; 1337 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1338 PetscMPIInt rank, size; 1339 1340 PetscFunctionBegin; 1341 PetscCallMPI(MPI_Comm_size(comm, &size)); 1342 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1343 /* Must preserve reduction count before we go on */ 1344 /* Get the total elapsed time */ 1345 PetscTime(&locTotalTime); 1346 locTotalTime -= petsc_BaseTime; 1347 PetscCall(PetscLogGetStageLog(&stageLog)); 1348 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1349 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1350 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1351 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1352 PetscCall(PetscViewerFlush(viewer)); 1353 for (stage = 0; stage < numStages; stage++) { 1354 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1355 1356 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1357 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1358 for (event = 0; event < numEvents; event++) { 1359 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1360 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1361 eventInfo->numReductions, eventInfo->flops)); 1362 if (eventInfo->dof[0] >= 0.) { 1363 PetscInt d, e; 1364 1365 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1366 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1367 } 1368 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1369 } 1370 } 1371 PetscCall(PetscViewerFlush(viewer)); 1372 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) { 1377 PetscFunctionBegin; 1378 if (!PetscLogSyncOn) PetscFunctionReturn(0); 1379 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1380 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1381 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1382 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1383 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1384 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1385 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1386 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1387 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1388 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1389 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) { 1394 PetscFunctionBegin; 1395 if (PetscDefined(USE_DEBUG)) { 1396 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1397 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1398 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1399 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1400 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1401 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1402 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1403 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1404 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1405 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1406 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) { 1412 #if defined(PETSC_HAVE_DEVICE) 1413 PetscMPIInt size; 1414 1415 PetscFunctionBegin; 1416 PetscCallMPI(MPI_Comm_size(comm, &size)); 1417 if (use_gpu_aware_mpi || size == 1) PetscFunctionReturn(0); 1418 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1419 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1420 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1421 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1422 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1423 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1424 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1425 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1426 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1427 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1428 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1429 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1430 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1431 PetscFunctionReturn(0); 1432 #else 1433 return 0; 1434 #endif 1435 } 1436 1437 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) { 1438 #if defined(PETSC_HAVE_DEVICE) 1439 1440 PetscFunctionBegin; 1441 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(0); 1442 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1443 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1444 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1445 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1446 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1447 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1448 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1449 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1450 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1451 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1452 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1453 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1454 PetscFunctionReturn(0); 1455 #else 1456 return 0; 1457 #endif 1458 } 1459 1460 PetscErrorCode PetscLogView_Default(PetscViewer viewer) { 1461 FILE *fd; 1462 PetscLogDouble zero = 0.0; 1463 PetscStageLog stageLog; 1464 PetscStageInfo *stageInfo = NULL; 1465 PetscEventPerfInfo *eventInfo = NULL; 1466 PetscClassPerfInfo *classInfo; 1467 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1468 const char *name; 1469 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1470 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1471 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1472 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1473 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1474 PetscLogDouble min, max, tot, ratio, avg, x, y; 1475 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1476 #if defined(PETSC_HAVE_DEVICE) 1477 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1478 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1479 #endif 1480 PetscMPIInt minC, maxC; 1481 PetscMPIInt size, rank; 1482 PetscBool *localStageUsed, *stageUsed; 1483 PetscBool *localStageVisible, *stageVisible; 1484 int numStages, localNumEvents, numEvents; 1485 int stage, oclass; 1486 PetscLogEvent event; 1487 PetscErrorCode ierr = 0; 1488 char version[256]; 1489 MPI_Comm comm; 1490 #if defined(PETSC_HAVE_DEVICE) 1491 PetscLogEvent eventid; 1492 PetscInt64 nas = 0x7FF0000000000002; 1493 #endif 1494 1495 PetscFunctionBegin; 1496 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1497 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1498 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1499 PetscCallMPI(MPI_Comm_size(comm, &size)); 1500 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1501 /* Get the total elapsed time */ 1502 PetscTime(&locTotalTime); 1503 locTotalTime -= petsc_BaseTime; 1504 1505 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1506 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1507 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1508 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1509 PetscCall(PetscLogViewWarnSync(comm, fd)); 1510 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1511 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1512 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1513 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1514 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1515 PetscCall(PetscGetUserName(username, sizeof(username))); 1516 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1517 PetscCall(PetscGetDate(date, sizeof(date))); 1518 PetscCall(PetscGetVersion(version, sizeof(version))); 1519 if (size == 1) { 1520 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1521 } else { 1522 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1523 } 1524 #if defined(PETSC_HAVE_OPENMP) 1525 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1526 #endif 1527 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1528 1529 /* Must preserve reduction count before we go on */ 1530 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1531 1532 /* Calculate summary information */ 1533 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1534 /* Time */ 1535 PetscCallMPI(MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1536 PetscCallMPI(MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1537 PetscCallMPI(MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1538 avg = tot / ((PetscLogDouble)size); 1539 if (min != 0.0) ratio = max / min; 1540 else ratio = 0.0; 1541 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1542 TotalTime = tot; 1543 /* Objects */ 1544 avg = (PetscLogDouble)petsc_numObjects; 1545 PetscCallMPI(MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1546 PetscCallMPI(MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1547 PetscCallMPI(MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1548 avg = tot / ((PetscLogDouble)size); 1549 if (min != 0.0) ratio = max / min; 1550 else ratio = 0.0; 1551 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1552 /* Flops */ 1553 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1554 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1555 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1556 avg = tot / ((PetscLogDouble)size); 1557 if (min != 0.0) ratio = max / min; 1558 else ratio = 0.0; 1559 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1560 TotalFlops = tot; 1561 /* Flops/sec -- Must talk to Barry here */ 1562 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1563 else flops = 0.0; 1564 PetscCallMPI(MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1565 PetscCallMPI(MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1566 PetscCallMPI(MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1567 avg = tot / ((PetscLogDouble)size); 1568 if (min != 0.0) ratio = max / min; 1569 else ratio = 0.0; 1570 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1571 /* Memory */ 1572 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1573 if (mem > 0.0) { 1574 PetscCallMPI(MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1575 PetscCallMPI(MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1576 PetscCallMPI(MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1577 avg = tot / ((PetscLogDouble)size); 1578 if (min != 0.0) ratio = max / min; 1579 else ratio = 0.0; 1580 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1581 } 1582 /* Messages */ 1583 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1584 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1585 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1586 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1587 avg = tot / ((PetscLogDouble)size); 1588 if (min != 0.0) ratio = max / min; 1589 else ratio = 0.0; 1590 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1591 numMessages = tot; 1592 /* Message Lengths */ 1593 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1594 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1595 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1596 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1597 if (numMessages != 0) avg = tot / numMessages; 1598 else avg = 0.0; 1599 if (min != 0.0) ratio = max / min; 1600 else ratio = 0.0; 1601 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1602 messageLength = tot; 1603 /* Reductions */ 1604 PetscCallMPI(MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1605 PetscCallMPI(MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1606 PetscCallMPI(MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1607 if (min != 0.0) ratio = max / min; 1608 else ratio = 0.0; 1609 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1610 numReductions = red; /* wrong because uses count from process zero */ 1611 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1612 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1613 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1614 1615 /* Get total number of stages -- 1616 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1617 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1618 This seems best accomplished by assoicating a communicator with each stage. 1619 */ 1620 PetscCall(PetscLogGetStageLog(&stageLog)); 1621 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1622 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1623 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1624 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1625 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1626 if (numStages > 0) { 1627 stageInfo = stageLog->stageInfo; 1628 for (stage = 0; stage < numStages; stage++) { 1629 if (stage < stageLog->numStages) { 1630 localStageUsed[stage] = stageInfo[stage].used; 1631 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1632 } else { 1633 localStageUsed[stage] = PETSC_FALSE; 1634 localStageVisible[stage] = PETSC_TRUE; 1635 } 1636 } 1637 PetscCallMPI(MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1638 PetscCallMPI(MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1639 for (stage = 0; stage < numStages; stage++) { 1640 if (stageUsed[stage]) { 1641 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1642 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1643 break; 1644 } 1645 } 1646 for (stage = 0; stage < numStages; stage++) { 1647 if (!stageUsed[stage]) continue; 1648 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1649 if (localStageUsed[stage]) { 1650 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1651 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1652 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1653 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1654 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1655 name = stageInfo[stage].name; 1656 } else { 1657 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1658 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1659 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1660 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1661 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1662 name = ""; 1663 } 1664 mess *= 0.5; 1665 messLen *= 0.5; 1666 red /= size; 1667 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1668 else fracTime = 0.0; 1669 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1670 else fracFlops = 0.0; 1671 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1672 if (numMessages != 0.0) fracMessages = mess / numMessages; 1673 else fracMessages = 0.0; 1674 if (mess != 0.0) avgMessLen = messLen / mess; 1675 else avgMessLen = 0.0; 1676 if (messageLength != 0.0) fracLength = messLen / messageLength; 1677 else fracLength = 0.0; 1678 if (numReductions != 0.0) fracReductions = red / numReductions; 1679 else fracReductions = 0.0; 1680 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1681 } 1682 } 1683 1684 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1685 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1686 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1687 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1688 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1689 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1690 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1691 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1692 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1693 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1694 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1695 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1696 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1697 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1698 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1699 if (PetscLogMemory) { 1700 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1701 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1702 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1703 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1704 } 1705 #if defined(PETSC_HAVE_DEVICE) 1706 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1707 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1708 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1709 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1710 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1711 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1712 #endif 1713 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1714 1715 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1716 1717 /* Report events */ 1718 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1719 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1720 #if defined(PETSC_HAVE_DEVICE) 1721 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1722 #endif 1723 PetscCall(PetscFPrintf(comm, fd, "\n")); 1724 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1725 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1726 #if defined(PETSC_HAVE_DEVICE) 1727 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1728 #endif 1729 PetscCall(PetscFPrintf(comm, fd, "\n")); 1730 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1731 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1732 #if defined(PETSC_HAVE_DEVICE) 1733 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1734 #endif 1735 PetscCall(PetscFPrintf(comm, fd, "\n")); 1736 1737 #if defined(PETSC_HAVE_DEVICE) 1738 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1739 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1740 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1741 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1742 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1743 #endif 1744 1745 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1746 for (stage = 0; stage < numStages; stage++) { 1747 if (!stageVisible[stage]) continue; 1748 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1749 if (localStageUsed[stage]) { 1750 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1751 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1752 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1753 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1754 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1755 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1756 } else { 1757 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1758 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1759 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1760 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1761 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1762 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1763 } 1764 mess *= 0.5; 1765 messLen *= 0.5; 1766 red /= size; 1767 1768 /* Get total number of events in this stage -- 1769 Currently, a single processor can register more events than another, but events must all be registered in order, 1770 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1771 on the event ID. This seems best accomplished by associating a communicator with each stage. 1772 1773 Problem: If the event did not happen on proc 1, its name will not be available. 1774 Problem: Event visibility is not implemented 1775 */ 1776 if (localStageUsed[stage]) { 1777 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1778 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1779 } else localNumEvents = 0; 1780 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1781 for (event = 0; event < numEvents; event++) { 1782 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1783 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1784 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1785 else flopr = 0.0; 1786 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1787 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1788 PetscCallMPI(MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1789 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1790 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1791 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1792 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1793 PetscCallMPI(MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1794 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1795 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1796 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1797 if (PetscLogMemory) { 1798 PetscCallMPI(MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1799 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1800 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1801 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1802 } 1803 #if defined(PETSC_HAVE_DEVICE) 1804 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1805 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1806 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1807 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1808 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1809 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1810 #endif 1811 name = stageLog->eventLog->eventInfo[event].name; 1812 } else { 1813 flopr = 0.0; 1814 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1815 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1816 PetscCallMPI(MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1817 PetscCallMPI(MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1818 PetscCallMPI(MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1819 PetscCallMPI(MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1820 PetscCallMPI(MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1821 PetscCallMPI(MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1822 PetscCallMPI(MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1823 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1824 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1825 if (PetscLogMemory) { 1826 PetscCallMPI(MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1827 PetscCallMPI(MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1828 PetscCallMPI(MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1829 PetscCallMPI(MPI_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1830 } 1831 #if defined(PETSC_HAVE_DEVICE) 1832 PetscCallMPI(MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1833 PetscCallMPI(MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1834 PetscCallMPI(MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1835 PetscCallMPI(MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1836 PetscCallMPI(MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1837 PetscCallMPI(MPI_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1838 #endif 1839 name = ""; 1840 } 1841 if (mint < 0.0) { 1842 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 1843 mint = 0; 1844 } 1845 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 1846 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1847 #if defined(PETSC_HAVE_DEVICE) 1848 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1849 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 1850 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 1851 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 1852 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 1853 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 1854 } 1855 } 1856 #endif 1857 totm *= 0.5; 1858 totml *= 0.5; 1859 totr /= size; 1860 1861 if (maxC != 0) { 1862 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 1863 else ratC = 0.0; 1864 if (mint != 0.0) ratt = maxt / mint; 1865 else ratt = 0.0; 1866 if (minf != 0.0) ratf = maxf / minf; 1867 else ratf = 0.0; 1868 if (TotalTime != 0.0) fracTime = tott / TotalTime; 1869 else fracTime = 0.0; 1870 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 1871 else fracFlops = 0.0; 1872 if (stageTime != 0.0) fracStageTime = tott / stageTime; 1873 else fracStageTime = 0.0; 1874 if (flops != 0.0) fracStageFlops = totf / flops; 1875 else fracStageFlops = 0.0; 1876 if (numMessages != 0.0) fracMess = totm / numMessages; 1877 else fracMess = 0.0; 1878 if (messageLength != 0.0) fracMessLen = totml / messageLength; 1879 else fracMessLen = 0.0; 1880 if (numReductions != 0.0) fracRed = totr / numReductions; 1881 else fracRed = 0.0; 1882 if (mess != 0.0) fracStageMess = totm / mess; 1883 else fracStageMess = 0.0; 1884 if (messLen != 0.0) fracStageMessLen = totml / messLen; 1885 else fracStageMessLen = 0.0; 1886 if (red != 0.0) fracStageRed = totr / red; 1887 else fracStageRed = 0.0; 1888 if (totm != 0.0) totml /= totm; 1889 else totml = 0.0; 1890 if (maxt != 0.0) flopr = totf / maxt; 1891 else flopr = 0.0; 1892 if (fracStageTime > 1.00) PetscCall(PetscFPrintf(comm, fd, "Warning -- total time of event greater than time of entire stage -- something is wrong with the timer\n")); 1893 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f %3.0f%3.0f%3.0f%3.0f%3.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 1894 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 1895 #if defined(PETSC_HAVE_DEVICE) 1896 if (totf != 0.0) fracgflops = gflops / totf; 1897 else fracgflops = 0.0; 1898 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 1899 else gflopr = 0.0; 1900 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e% 3.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1901 #endif 1902 PetscCall(PetscFPrintf(comm, fd, "\n")); 1903 } 1904 } 1905 } 1906 1907 /* Memory usage and object creation */ 1908 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1909 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1910 #if defined(PETSC_HAVE_DEVICE) 1911 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1912 #endif 1913 PetscCall(PetscFPrintf(comm, fd, "\n")); 1914 PetscCall(PetscFPrintf(comm, fd, "\n")); 1915 1916 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1917 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1918 stats for stages local to processor sets. 1919 */ 1920 /* We should figure out the longest object name here (now 20 characters) */ 1921 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1922 for (stage = 0; stage < numStages; stage++) { 1923 if (localStageUsed[stage]) { 1924 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 1925 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1926 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 1927 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 1928 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 1929 } 1930 } 1931 } else { 1932 if (!localStageVisible[stage]) continue; 1933 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1934 } 1935 } 1936 1937 PetscCall(PetscFree(localStageUsed)); 1938 PetscCall(PetscFree(stageUsed)); 1939 PetscCall(PetscFree(localStageVisible)); 1940 PetscCall(PetscFree(stageVisible)); 1941 1942 /* Information unrelated to this particular run */ 1943 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1944 PetscTime(&y); 1945 PetscTime(&x); 1946 PetscTime(&y); 1947 PetscTime(&y); 1948 PetscTime(&y); 1949 PetscTime(&y); 1950 PetscTime(&y); 1951 PetscTime(&y); 1952 PetscTime(&y); 1953 PetscTime(&y); 1954 PetscTime(&y); 1955 PetscTime(&y); 1956 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 1957 /* MPI information */ 1958 if (size > 1) { 1959 MPI_Status status; 1960 PetscMPIInt tag; 1961 MPI_Comm newcomm; 1962 1963 PetscCallMPI(MPI_Barrier(comm)); 1964 PetscTime(&x); 1965 PetscCallMPI(MPI_Barrier(comm)); 1966 PetscCallMPI(MPI_Barrier(comm)); 1967 PetscCallMPI(MPI_Barrier(comm)); 1968 PetscCallMPI(MPI_Barrier(comm)); 1969 PetscCallMPI(MPI_Barrier(comm)); 1970 PetscTime(&y); 1971 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 1972 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 1973 PetscCallMPI(MPI_Barrier(comm)); 1974 if (rank) { 1975 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 1976 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 1977 } else { 1978 PetscTime(&x); 1979 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 1980 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 1981 PetscTime(&y); 1982 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 1983 } 1984 PetscCall(PetscCommDestroy(&newcomm)); 1985 } 1986 PetscCall(PetscOptionsView(NULL, viewer)); 1987 1988 /* Machine and compile information */ 1989 #if defined(PETSC_USE_FORTRAN_KERNELS) 1990 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 1991 #else 1992 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 1993 #endif 1994 #if defined(PETSC_USE_64BIT_INDICES) 1995 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n")); 1996 #elif defined(PETSC_USE___FLOAT128) 1997 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n")); 1998 #endif 1999 #if defined(PETSC_USE_REAL_SINGLE) 2000 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2001 #elif defined(PETSC_USE___FLOAT128) 2002 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2003 #endif 2004 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2005 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2006 #else 2007 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2008 #endif 2009 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 2010 2011 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 2012 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2013 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2014 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2015 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2016 2017 /* Cleanup */ 2018 PetscCall(PetscFPrintf(comm, fd, "\n")); 2019 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 2020 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 2021 PetscCall(PetscFPTrapPop()); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 /*@C 2026 PetscLogView - Prints a summary of the logging. 2027 2028 Collective over MPI_Comm 2029 2030 Input Parameter: 2031 . viewer - an ASCII viewer 2032 2033 Options Database Keys: 2034 + -log_view [:filename] - Prints summary of log information 2035 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2036 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2037 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2038 . -log_view_memory - Also display memory usage in each event 2039 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2040 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2041 - -log_trace [filename] - Displays a trace of what each process is doing 2042 2043 Notes: 2044 It is possible to control the logging programatically but we recommend using the options database approach whenever possible 2045 By default the summary is printed to stdout. 2046 2047 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2048 2049 If PETSc is configured with --with-logging=0 then this functionality is not available 2050 2051 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2052 directory then open filename.xml with your browser. Specific notes for certain browsers 2053 $ Firefox and Internet explorer - simply open the file 2054 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2055 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2056 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2057 your browser. 2058 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2059 window and render the XML log file contents. 2060 2061 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2062 2063 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2064 or using speedscope (https://www.speedscope.app). 2065 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2066 2067 Level: beginner 2068 2069 .seealso: `PetscLogDefaultBegin()`, `PetscLogDump()` 2070 @*/ 2071 PetscErrorCode PetscLogView(PetscViewer viewer) { 2072 PetscBool isascii; 2073 PetscViewerFormat format; 2074 int stage, lastStage; 2075 PetscStageLog stageLog; 2076 2077 PetscFunctionBegin; 2078 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2079 /* Pop off any stages the user forgot to remove */ 2080 lastStage = 0; 2081 PetscCall(PetscLogGetStageLog(&stageLog)); 2082 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2083 while (stage >= 0) { 2084 lastStage = stage; 2085 PetscCall(PetscStageLogPop(stageLog)); 2086 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2087 } 2088 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2089 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2090 PetscCall(PetscViewerGetFormat(viewer, &format)); 2091 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2092 PetscCall(PetscLogView_Default(viewer)); 2093 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2094 PetscCall(PetscLogView_Detailed(viewer)); 2095 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2096 PetscCall(PetscLogView_CSV(viewer)); 2097 } else if (format == PETSC_VIEWER_ASCII_XML) { 2098 PetscCall(PetscLogView_Nested(viewer)); 2099 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2100 PetscCall(PetscLogView_Flamegraph(viewer)); 2101 } 2102 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 /*@C 2107 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2108 2109 Collective on `PETSC_COMM_WORLD` 2110 2111 Level: developer 2112 2113 .seealso: `PetscLogView()` 2114 @*/ 2115 PetscErrorCode PetscLogViewFromOptions(void) { 2116 PetscViewer viewer; 2117 PetscBool flg; 2118 PetscViewerFormat format; 2119 2120 PetscFunctionBegin; 2121 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2122 if (flg) { 2123 PetscCall(PetscViewerPushFormat(viewer, format)); 2124 PetscCall(PetscLogView(viewer)); 2125 PetscCall(PetscViewerPopFormat(viewer)); 2126 PetscCall(PetscViewerDestroy(&viewer)); 2127 } 2128 PetscFunctionReturn(0); 2129 } 2130 2131 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2132 /*@C 2133 PetscGetFlops - Returns the number of flops used on this processor 2134 since the program began. 2135 2136 Not Collective 2137 2138 Output Parameter: 2139 flops - number of floating point operations 2140 2141 Notes: 2142 A global counter logs all PETSc flop counts. The user can use 2143 `PetscLogFlops()` to increment this counter to include flops for the 2144 application code. 2145 2146 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2147 2148 Level: intermediate 2149 2150 .seealso: `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2151 @*/ 2152 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) { 2153 PetscFunctionBegin; 2154 *flops = petsc_TotalFlops; 2155 PetscFunctionReturn(0); 2156 } 2157 2158 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) { 2159 size_t fullLength; 2160 va_list Argp; 2161 2162 PetscFunctionBegin; 2163 if (!petsc_logObjects) PetscFunctionReturn(0); 2164 va_start(Argp, format); 2165 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2166 va_end(Argp); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 /*MC 2171 PetscLogFlops - Adds floating point operations to the global counter. 2172 2173 Synopsis: 2174 #include <petsclog.h> 2175 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2176 2177 Not Collective 2178 2179 Input Parameter: 2180 . f - flop counter 2181 2182 Usage: 2183 .vb 2184 PetscLogEvent USER_EVENT; 2185 PetscLogEventRegister("User event",0,&USER_EVENT); 2186 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2187 [code segment to monitor] 2188 PetscLogFlops(user_flops) 2189 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2190 .ve 2191 2192 Note: 2193 A global counter logs all PETSc flop counts. The user can use 2194 PetscLogFlops() to increment this counter to include flops for the 2195 application code. 2196 2197 Level: intermediate 2198 2199 .seealso: `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2200 M*/ 2201 2202 /*MC 2203 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2204 to get accurate timings 2205 2206 Synopsis: 2207 #include <petsclog.h> 2208 void PetscPreLoadBegin(PetscBool flag,char *name); 2209 2210 Not Collective 2211 2212 Input Parameters: 2213 + flag - PETSC_TRUE to run twice, `PETSC_FALSE` to run once, may be overridden 2214 with command line option -preload true or -preload false 2215 - name - name of first stage (lines of code timed separately with -log_view) to 2216 be preloaded 2217 2218 Usage: 2219 .vb 2220 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2221 lines of code 2222 PetscPreLoadStage("second stage"); 2223 lines of code 2224 PetscPreLoadEnd(); 2225 .ve 2226 2227 Note: 2228 Only works in C/C++, not Fortran 2229 2230 Flags available within the macro. 2231 + PetscPreLoadingUsed - true if we are or have done preloading 2232 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2233 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2234 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2235 The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin() 2236 and PetscPreLoadEnd() 2237 2238 Level: intermediate 2239 2240 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2241 M*/ 2242 2243 /*MC 2244 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2245 to get accurate timings 2246 2247 Synopsis: 2248 #include <petsclog.h> 2249 void PetscPreLoadEnd(void); 2250 2251 Not Collective 2252 2253 Usage: 2254 .vb 2255 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2256 lines of code 2257 PetscPreLoadStage("second stage"); 2258 lines of code 2259 PetscPreLoadEnd(); 2260 .ve 2261 2262 Note: 2263 Only works in C/C++ not fortran 2264 2265 Level: intermediate 2266 2267 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2268 M*/ 2269 2270 /*MC 2271 PetscPreLoadStage - Start a new segment of code to be timed separately. 2272 to get accurate timings 2273 2274 Synopsis: 2275 #include <petsclog.h> 2276 void PetscPreLoadStage(char *name); 2277 2278 Not Collective 2279 2280 Usage: 2281 .vb 2282 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2283 lines of code 2284 PetscPreLoadStage("second stage"); 2285 lines of code 2286 PetscPreLoadEnd(); 2287 .ve 2288 2289 Note: 2290 Only works in C/C++ not fortran 2291 2292 Level: intermediate 2293 2294 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2295 M*/ 2296 2297 #if PetscDefined(HAVE_DEVICE) 2298 #include <petsc/private/deviceimpl.h> 2299 2300 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2301 2302 /* 2303 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2304 because it will result in timing results that cannot be interpreted. 2305 */ 2306 static PetscErrorCode PetscLogGpuTime_Off(void) { 2307 PetscLogGpuTimeFlag = PETSC_FALSE; 2308 return 0; 2309 } 2310 2311 /*@C 2312 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2313 2314 Options Database Key: 2315 . -log_view_gpu_time - provide the GPU times in the -log_view output 2316 2317 Notes: 2318 Turning on the timing of the 2319 GPU kernels can slow down the entire computation and should only be used when studying the performance 2320 of operations on GPU such as vector operations and matrix-vector operations. 2321 2322 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2323 2324 Level: advanced 2325 2326 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2327 @*/ 2328 PetscErrorCode PetscLogGpuTime(void) { 2329 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2330 PetscLogGpuTimeFlag = PETSC_TRUE; 2331 return 0; 2332 } 2333 2334 /*@C 2335 PetscLogGpuTimeBegin - Start timer for device 2336 2337 Notes: 2338 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2339 2340 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2341 2342 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()` 2343 2344 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2345 2346 The regular logging captures the time for data transfers and any CPU activites during the event 2347 2348 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2349 2350 Developer Notes: 2351 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2352 2353 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the 2354 event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution, 2355 but not continued GPU excution, until the timer event is recorded. 2356 2357 Level: intermediate 2358 2359 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2360 @*/ 2361 PetscErrorCode PetscLogGpuTimeBegin(void) { 2362 PetscFunctionBegin; 2363 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2364 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2365 PetscDeviceContext dctx; 2366 2367 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2368 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2369 } else { 2370 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2371 } 2372 PetscFunctionReturn(0); 2373 } 2374 2375 /*@C 2376 PetscLogGpuTimeEnd - Stop timer for device 2377 2378 Level: intermediate 2379 2380 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2381 @*/ 2382 PetscErrorCode PetscLogGpuTimeEnd(void) { 2383 PetscFunctionBegin; 2384 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2385 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2386 PetscDeviceContext dctx; 2387 PetscLogDouble elapsed; 2388 2389 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2390 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2391 petsc_gtime += (elapsed / 1000.0); 2392 } else { 2393 PetscCall(PetscTimeAdd(&petsc_gtime)); 2394 } 2395 PetscFunctionReturn(0); 2396 } 2397 #endif /* end of PETSC_HAVE_DEVICE */ 2398 2399 #else /* end of -DPETSC_USE_LOG section */ 2400 2401 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) { 2402 PetscFunctionBegin; 2403 PetscFunctionReturn(0); 2404 } 2405 2406 #endif /* PETSC_USE_LOG*/ 2407 2408 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2409 PetscClassId PETSC_OBJECT_CLASSID = 0; 2410 2411 /*@C 2412 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2413 2414 Not Collective 2415 2416 Input Parameter: 2417 . name - The class name 2418 2419 Output Parameter: 2420 . oclass - The class id or classid 2421 2422 Level: developer 2423 2424 .seealso: `PetscLogEventRegister()` 2425 @*/ 2426 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) { 2427 #if defined(PETSC_USE_LOG) 2428 PetscStageLog stageLog; 2429 PetscInt stage; 2430 #endif 2431 2432 PetscFunctionBegin; 2433 *oclass = ++PETSC_LARGEST_CLASSID; 2434 #if defined(PETSC_USE_LOG) 2435 PetscCall(PetscLogGetStageLog(&stageLog)); 2436 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2437 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2438 #endif 2439 PetscFunctionReturn(0); 2440 } 2441 2442 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2443 #include <mpe.h> 2444 2445 PetscBool PetscBeganMPE = PETSC_FALSE; 2446 2447 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2448 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2449 2450 /*@C 2451 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2452 and slows the program down. 2453 2454 Collective over `PETSC_COMM_WORLD` 2455 2456 Options Database Key: 2457 . -log_mpe - Prints extensive log information 2458 2459 Note: 2460 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 2461 intended for production runs since it logs only flop rates and object 2462 creation (and should not significantly slow the programs). 2463 2464 Level: advanced 2465 2466 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2467 `PetscLogEventDeactivate()` 2468 @*/ 2469 PetscErrorCode PetscLogMPEBegin(void) { 2470 PetscFunctionBegin; 2471 /* Do MPE initialization */ 2472 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2473 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2474 PetscCall(MPE_Init_log()); 2475 2476 PetscBeganMPE = PETSC_TRUE; 2477 } else { 2478 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2479 } 2480 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2481 PetscFunctionReturn(0); 2482 } 2483 2484 /*@C 2485 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2486 2487 Collective over `PETSC_COMM_WORLD` 2488 2489 Level: advanced 2490 2491 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2492 @*/ 2493 PetscErrorCode PetscLogMPEDump(const char sname[]) { 2494 char name[PETSC_MAX_PATH_LEN]; 2495 2496 PetscFunctionBegin; 2497 if (PetscBeganMPE) { 2498 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2499 if (sname) { 2500 PetscCall(PetscStrcpy(name, sname)); 2501 } else { 2502 PetscCall(PetscGetProgramName(name, sizeof(name))); 2503 } 2504 PetscCall(MPE_Finish_log(name)); 2505 } else { 2506 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2507 } 2508 PetscFunctionReturn(0); 2509 } 2510 2511 #define PETSC_RGB_COLORS_MAX 39 2512 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2513 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2514 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2515 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2516 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2517 2518 /*@C 2519 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2520 2521 Not collective. Maybe it should be? 2522 2523 Output Parameter: 2524 . str - character string representing the color 2525 2526 Level: developer 2527 2528 .seealso: `PetscLogEventRegister()` 2529 @*/ 2530 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) { 2531 static int idx = 0; 2532 2533 PetscFunctionBegin; 2534 *str = PetscLogMPERGBColors[idx]; 2535 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2536 PetscFunctionReturn(0); 2537 } 2538 2539 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2540