1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 15 PetscErrorCode PetscLogObjectParent(PetscObject p,PetscObject c) 16 { 17 if (!c || !p) return 0; 18 c->parent = p; 19 c->parentid = p->id; 20 return 0; 21 } 22 23 /*@C 24 PetscLogObjectMemory - Adds to an object a count of additional amount of memory that is used by the object. 25 26 Not collective. 27 28 Input Parameters: 29 + obj - the PETSc object 30 - mem - the amount of memory that is being added to the object 31 32 Level: developer 33 34 Developer Notes: 35 Currently we do not always do a good job of associating all memory allocations with an object. 36 37 .seealso: `PetscFinalize()`, `PetscInitializeFortran()`, `PetscGetArgs()`, `PetscInitializeNoArguments()` 38 39 @*/ 40 PetscErrorCode PetscLogObjectMemory(PetscObject p,PetscLogDouble m) 41 { 42 if (!p) return 0; 43 p->mem += m; 44 return 0; 45 } 46 47 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 48 49 #if defined(PETSC_USE_LOG) 50 #include <petscmachineinfo.h> 51 #include <petscconfiginfo.h> 52 53 /* used in the MPI_XXX() count macros in petsclog.h */ 54 55 /* Action and object logging variables */ 56 Action *petsc_actions = NULL; 57 Object *petsc_objects = NULL; 58 PetscBool petsc_logActions = PETSC_FALSE; 59 PetscBool petsc_logObjects = PETSC_FALSE; 60 int petsc_numActions = 0, petsc_maxActions = 100; 61 int petsc_numObjects = 0, petsc_maxObjects = 100; 62 int petsc_numObjectsDestroyed = 0; 63 64 /* Global counters */ 65 PetscLogDouble petsc_BaseTime = 0.0; 66 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 67 PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */ 68 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 69 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 70 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 71 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 72 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 73 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 74 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 75 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 76 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 77 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 78 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 79 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 80 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 81 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 82 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 83 #if defined(PETSC_HAVE_DEVICE) 84 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 85 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 86 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 87 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 88 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 89 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 90 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 91 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 92 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 93 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 94 #endif 95 96 /* Logging functions */ 97 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 98 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 99 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 100 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 101 102 /* Tracing event logging variables */ 103 FILE *petsc_tracefile = NULL; 104 int petsc_tracelevel = 0; 105 const char *petsc_traceblanks = " "; 106 char petsc_tracespace[128] = " "; 107 PetscLogDouble petsc_tracetime = 0.0; 108 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 109 110 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 111 { 112 int stage; 113 PetscBool opt; 114 115 PetscFunctionBegin; 116 if (PetscLogInitializeCalled) PetscFunctionReturn(0); 117 PetscLogInitializeCalled = PETSC_TRUE; 118 119 PetscCall(PetscOptionsHasName(NULL,NULL, "-log_exclude_actions", &opt)); 120 if (opt) petsc_logActions = PETSC_FALSE; 121 PetscCall(PetscOptionsHasName(NULL,NULL, "-log_exclude_objects", &opt)); 122 if (opt) petsc_logObjects = PETSC_FALSE; 123 if (petsc_logActions) { 124 PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 125 } 126 if (petsc_logObjects) { 127 PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 128 } 129 PetscLogPHC = PetscLogObjCreateDefault; 130 PetscLogPHD = PetscLogObjDestroyDefault; 131 /* Setup default logging structures */ 132 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 133 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 134 135 /* All processors sync here for more consistent logging */ 136 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 137 PetscTime(&petsc_BaseTime); 138 PetscCall(PetscLogStagePush(stage)); 139 PetscFunctionReturn(0); 140 } 141 142 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 143 { 144 PetscStageLog stageLog; 145 146 PetscFunctionBegin; 147 PetscCall(PetscFree(petsc_actions)); 148 PetscCall(PetscFree(petsc_objects)); 149 PetscCall(PetscLogNestedEnd()); 150 PetscCall(PetscLogSet(NULL, NULL)); 151 152 /* Resetting phase */ 153 PetscCall(PetscLogGetStageLog(&stageLog)); 154 PetscCall(PetscStageLogDestroy(stageLog)); 155 156 petsc_TotalFlops = 0.0; 157 petsc_numActions = 0; 158 petsc_numObjects = 0; 159 petsc_numObjectsDestroyed = 0; 160 petsc_maxActions = 100; 161 petsc_maxObjects = 100; 162 petsc_actions = NULL; 163 petsc_objects = NULL; 164 petsc_logActions = PETSC_FALSE; 165 petsc_logObjects = PETSC_FALSE; 166 petsc_BaseTime = 0.0; 167 petsc_TotalFlops = 0.0; 168 petsc_tmp_flops = 0.0; 169 petsc_send_ct = 0.0; 170 petsc_recv_ct = 0.0; 171 petsc_send_len = 0.0; 172 petsc_recv_len = 0.0; 173 petsc_isend_ct = 0.0; 174 petsc_irecv_ct = 0.0; 175 petsc_isend_len = 0.0; 176 petsc_irecv_len = 0.0; 177 petsc_wait_ct = 0.0; 178 petsc_wait_any_ct = 0.0; 179 petsc_wait_all_ct = 0.0; 180 petsc_sum_of_waits_ct = 0.0; 181 petsc_allreduce_ct = 0.0; 182 petsc_gather_ct = 0.0; 183 petsc_scatter_ct = 0.0; 184 #if defined(PETSC_HAVE_DEVICE) 185 petsc_ctog_ct = 0.0; 186 petsc_gtoc_ct = 0.0; 187 petsc_ctog_sz = 0.0; 188 petsc_gtoc_sz = 0.0; 189 petsc_gflops = 0.0; 190 petsc_gtime = 0.0; 191 #endif 192 PETSC_LARGEST_EVENT = PETSC_EVENT; 193 PetscLogPHC = NULL; 194 PetscLogPHD = NULL; 195 petsc_tracefile = NULL; 196 petsc_tracelevel = 0; 197 petsc_traceblanks = " "; 198 petsc_tracespace[0] = ' '; petsc_tracespace[1] = 0; 199 petsc_tracetime = 0.0; 200 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 201 PETSC_OBJECT_CLASSID = 0; 202 petsc_stageLog = NULL; 203 PetscLogInitializeCalled = PETSC_FALSE; 204 PetscFunctionReturn(0); 205 } 206 207 /*@C 208 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 209 210 Not Collective 211 212 Input Parameters: 213 + b - The function called at beginning of event 214 - e - The function called at end of event 215 216 Level: developer 217 218 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()` 219 @*/ 220 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), 221 PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 222 { 223 PetscFunctionBegin; 224 PetscLogPLB = b; 225 PetscLogPLE = e; 226 PetscFunctionReturn(0); 227 } 228 229 /*@C 230 PetscLogIsActive - Check if logging is currently in progress. 231 232 Not Collective 233 234 Output Parameter: 235 . isActive - PETSC_TRUE if logging is in progress, PETSC_FALSE otherwise 236 237 Level: beginner 238 239 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 240 @*/ 241 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 242 { 243 PetscFunctionBegin; 244 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 245 PetscFunctionReturn(0); 246 } 247 248 /*@C 249 PetscLogDefaultBegin - Turns on logging of objects and events. This logs flop 250 rates and object creation and should not slow programs down too much. 251 This routine may be called more than once. 252 253 Logically Collective over PETSC_COMM_WORLD 254 255 Options Database Keys: 256 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 257 screen (for code configured with --with-log=1 (which is the default)) 258 259 Usage: 260 .vb 261 PetscInitialize(...); 262 PetscLogDefaultBegin(); 263 ... code ... 264 PetscLogView(viewer); or PetscLogDump(); 265 PetscFinalize(); 266 .ve 267 268 Notes: 269 PetscLogView(viewer) or PetscLogDump() actually cause the printing of 270 the logging information. 271 272 Level: advanced 273 274 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 275 @*/ 276 PetscErrorCode PetscLogDefaultBegin(void) 277 { 278 PetscFunctionBegin; 279 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 280 PetscFunctionReturn(0); 281 } 282 283 /*@C 284 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 285 all events. This creates large log files and slows the program down. 286 287 Logically Collective on PETSC_COMM_WORLD 288 289 Options Database Keys: 290 . -log_all - Prints extensive log information 291 292 Usage: 293 .vb 294 PetscInitialize(...); 295 PetscLogAllBegin(); 296 ... code ... 297 PetscLogDump(filename); 298 PetscFinalize(); 299 .ve 300 301 Notes: 302 A related routine is PetscLogDefaultBegin() (with the options key -log), which is 303 intended for production runs since it logs only flop rates and object 304 creation (and shouldn't significantly slow the programs). 305 306 Level: advanced 307 308 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 309 @*/ 310 PetscErrorCode PetscLogAllBegin(void) 311 { 312 PetscFunctionBegin; 313 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 314 PetscFunctionReturn(0); 315 } 316 317 /*@C 318 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 319 begins or ends, the event name is printed. 320 321 Logically Collective on PETSC_COMM_WORLD 322 323 Input Parameter: 324 . file - The file to print trace in (e.g. stdout) 325 326 Options Database Key: 327 . -log_trace [filename] - Activates PetscLogTraceBegin() 328 329 Notes: 330 PetscLogTraceBegin() prints the processor number, the execution time (sec), 331 then "Event begin:" or "Event end:" followed by the event name. 332 333 PetscLogTraceBegin() allows tracing of all PETSc calls, which is useful 334 to determine where a program is hanging without running in the 335 debugger. Can be used in conjunction with the -info option. 336 337 Level: intermediate 338 339 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 340 @*/ 341 PetscErrorCode PetscLogTraceBegin(FILE *file) 342 { 343 PetscFunctionBegin; 344 petsc_tracefile = file; 345 346 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 347 PetscFunctionReturn(0); 348 } 349 350 /*@ 351 PetscLogActions - Determines whether actions are logged for the graphical viewer. 352 353 Not Collective 354 355 Input Parameter: 356 . flag - PETSC_TRUE if actions are to be logged 357 358 Level: intermediate 359 360 Note: Logging of actions continues to consume more memory as the program 361 runs. Long running programs should consider turning this feature off. 362 363 Options Database Keys: 364 . -log_exclude_actions - Turns off actions logging 365 366 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 367 @*/ 368 PetscErrorCode PetscLogActions(PetscBool flag) 369 { 370 PetscFunctionBegin; 371 petsc_logActions = flag; 372 PetscFunctionReturn(0); 373 } 374 375 /*@ 376 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 377 378 Not Collective 379 380 Input Parameter: 381 . flag - PETSC_TRUE if objects are to be logged 382 383 Level: intermediate 384 385 Note: Logging of objects continues to consume more memory as the program 386 runs. Long running programs should consider turning this feature off. 387 388 Options Database Keys: 389 . -log_exclude_objects - Turns off objects logging 390 391 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 392 @*/ 393 PetscErrorCode PetscLogObjects(PetscBool flag) 394 { 395 PetscFunctionBegin; 396 petsc_logObjects = flag; 397 PetscFunctionReturn(0); 398 } 399 400 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 401 /*@C 402 PetscLogStageRegister - Attaches a character string name to a logging stage. 403 404 Not Collective 405 406 Input Parameter: 407 . sname - The name to associate with that stage 408 409 Output Parameter: 410 . stage - The stage number 411 412 Level: intermediate 413 414 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 415 @*/ 416 PetscErrorCode PetscLogStageRegister(const char sname[],PetscLogStage *stage) 417 { 418 PetscStageLog stageLog; 419 PetscLogEvent event; 420 421 PetscFunctionBegin; 422 PetscCall(PetscLogGetStageLog(&stageLog)); 423 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 424 /* Copy events already changed in the main stage, this sucks */ 425 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 426 for (event = 0; event < stageLog->eventLog->numEvents; event++) { 427 PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event],&stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 428 } 429 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 430 PetscFunctionReturn(0); 431 } 432 433 /*@C 434 PetscLogStagePush - This function pushes a stage on the stack. 435 436 Not Collective 437 438 Input Parameter: 439 . stage - The stage on which to log 440 441 Usage: 442 If the option -log_sumary is used to run the program containing the 443 following code, then 2 sets of summary data will be printed during 444 PetscFinalize(). 445 .vb 446 PetscInitialize(int *argc,char ***args,0,0); 447 [stage 0 of code] 448 PetscLogStagePush(1); 449 [stage 1 of code] 450 PetscLogStagePop(); 451 PetscBarrier(...); 452 [more stage 0 of code] 453 PetscFinalize(); 454 .ve 455 456 Notes: 457 Use PetscLogStageRegister() to register a stage. 458 459 Level: intermediate 460 461 .seealso: `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 462 @*/ 463 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 464 { 465 PetscStageLog stageLog; 466 467 PetscFunctionBegin; 468 PetscCall(PetscLogGetStageLog(&stageLog)); 469 PetscCall(PetscStageLogPush(stageLog, stage)); 470 PetscFunctionReturn(0); 471 } 472 473 /*@C 474 PetscLogStagePop - This function pops a stage from the stack. 475 476 Not Collective 477 478 Usage: 479 If the option -log_sumary is used to run the program containing the 480 following code, then 2 sets of summary data will be printed during 481 PetscFinalize(). 482 .vb 483 PetscInitialize(int *argc,char ***args,0,0); 484 [stage 0 of code] 485 PetscLogStagePush(1); 486 [stage 1 of code] 487 PetscLogStagePop(); 488 PetscBarrier(...); 489 [more stage 0 of code] 490 PetscFinalize(); 491 .ve 492 493 Notes: 494 Use PetscLogStageRegister() to register a stage. 495 496 Level: intermediate 497 498 .seealso: `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 499 @*/ 500 PetscErrorCode PetscLogStagePop(void) 501 { 502 PetscStageLog stageLog; 503 504 PetscFunctionBegin; 505 PetscCall(PetscLogGetStageLog(&stageLog)); 506 PetscCall(PetscStageLogPop(stageLog)); 507 PetscFunctionReturn(0); 508 } 509 510 /*@ 511 PetscLogStageSetActive - Determines stage activity for PetscLogEventBegin() and PetscLogEventEnd(). 512 513 Not Collective 514 515 Input Parameters: 516 + stage - The stage 517 - isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE) 518 519 Level: intermediate 520 521 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 522 @*/ 523 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 524 { 525 PetscStageLog stageLog; 526 527 PetscFunctionBegin; 528 PetscCall(PetscLogGetStageLog(&stageLog)); 529 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 530 PetscFunctionReturn(0); 531 } 532 533 /*@ 534 PetscLogStageGetActive - Returns stage activity for PetscLogEventBegin() and PetscLogEventEnd(). 535 536 Not Collective 537 538 Input Parameter: 539 . stage - The stage 540 541 Output Parameter: 542 . isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE) 543 544 Level: intermediate 545 546 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 547 @*/ 548 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 549 { 550 PetscStageLog stageLog; 551 552 PetscFunctionBegin; 553 PetscCall(PetscLogGetStageLog(&stageLog)); 554 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 555 PetscFunctionReturn(0); 556 } 557 558 /*@ 559 PetscLogStageSetVisible - Determines stage visibility in PetscLogView() 560 561 Not Collective 562 563 Input Parameters: 564 + stage - The stage 565 - isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE) 566 567 Level: intermediate 568 569 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 570 @*/ 571 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 572 { 573 PetscStageLog stageLog; 574 575 PetscFunctionBegin; 576 PetscCall(PetscLogGetStageLog(&stageLog)); 577 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 578 PetscFunctionReturn(0); 579 } 580 581 /*@ 582 PetscLogStageGetVisible - Returns stage visibility in PetscLogView() 583 584 Not Collective 585 586 Input Parameter: 587 . stage - The stage 588 589 Output Parameter: 590 . isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE) 591 592 Level: intermediate 593 594 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 595 @*/ 596 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 597 { 598 PetscStageLog stageLog; 599 600 PetscFunctionBegin; 601 PetscCall(PetscLogGetStageLog(&stageLog)); 602 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 603 PetscFunctionReturn(0); 604 } 605 606 /*@C 607 PetscLogStageGetId - Returns the stage id when given the stage name. 608 609 Not Collective 610 611 Input Parameter: 612 . name - The stage name 613 614 Output Parameter: 615 . stage - The stage, , or -1 if no stage with that name exists 616 617 Level: intermediate 618 619 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 620 @*/ 621 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 622 { 623 PetscStageLog stageLog; 624 625 PetscFunctionBegin; 626 PetscCall(PetscLogGetStageLog(&stageLog)); 627 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 628 PetscFunctionReturn(0); 629 } 630 631 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 632 /*@C 633 PetscLogEventRegister - Registers an event name for logging operations in an application code. 634 635 Not Collective 636 637 Input Parameters: 638 + name - The name associated with the event 639 - classid - The classid associated to the class for this event, obtain either with 640 PetscClassIdRegister() or use a predefined one such as KSP_CLASSID, SNES_CLASSID, the predefined ones 641 are only available in C code 642 643 Output Parameter: 644 . event - The event id for use with PetscLogEventBegin() and PetscLogEventEnd(). 645 646 Example of Usage: 647 .vb 648 PetscLogEvent USER_EVENT; 649 PetscClassId classid; 650 PetscLogDouble user_event_flops; 651 PetscClassIdRegister("class name",&classid); 652 PetscLogEventRegister("User event name",classid,&USER_EVENT); 653 PetscLogEventBegin(USER_EVENT,0,0,0,0); 654 [code segment to monitor] 655 PetscLogFlops(user_event_flops); 656 PetscLogEventEnd(USER_EVENT,0,0,0,0); 657 .ve 658 659 Notes: 660 PETSc automatically logs library events if the code has been 661 configured with --with-log (which is the default) and 662 -log_view or -log_all is specified. PetscLogEventRegister() is 663 intended for logging user events to supplement this PETSc 664 information. 665 666 PETSc can gather data for use with the utilities Jumpshot 667 (part of the MPICH distribution). If PETSc has been compiled 668 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 669 MPICH), the user can employ another command line option, -log_mpe, 670 to create a logfile, "mpe.log", which can be visualized 671 Jumpshot. 672 673 The classid is associated with each event so that classes of events 674 can be disabled simultaneously, such as all matrix events. The user 675 can either use an existing classid, such as MAT_CLASSID, or create 676 their own as shown in the example. 677 678 If an existing event with the same name exists, its event handle is 679 returned instead of creating a new event. 680 681 Level: intermediate 682 683 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 684 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 685 @*/ 686 PetscErrorCode PetscLogEventRegister(const char name[],PetscClassId classid,PetscLogEvent *event) 687 { 688 PetscStageLog stageLog; 689 int stage; 690 691 PetscFunctionBegin; 692 *event = PETSC_DECIDE; 693 PetscCall(PetscLogGetStageLog(&stageLog)); 694 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 695 if (*event > 0) PetscFunctionReturn(0); 696 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 697 for (stage = 0; stage < stageLog->numStages; stage++) { 698 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 699 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 700 } 701 PetscFunctionReturn(0); 702 } 703 704 /*@ 705 PetscLogEventSetCollective - Indicates that a particular event is collective. 706 707 Not Collective 708 709 Input Parameters: 710 + event - The event id 711 - collective - Bolean flag indicating whether a particular event is collective 712 713 Note: 714 New events returned from PetscLogEventRegister() are collective by default. 715 716 Level: developer 717 718 .seealso: `PetscLogEventRegister()` 719 @*/ 720 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event,PetscBool collective) 721 { 722 PetscStageLog stageLog; 723 PetscEventRegLog eventRegLog; 724 725 PetscFunctionBegin; 726 PetscCall(PetscLogGetStageLog(&stageLog)); 727 PetscCall(PetscStageLogGetEventRegLog(stageLog,&eventRegLog)); 728 PetscCheck(event >= 0 && event <= eventRegLog->numEvents,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid event id"); 729 eventRegLog->eventInfo[event].collective = collective; 730 PetscFunctionReturn(0); 731 } 732 733 /*@ 734 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 735 736 Not Collective 737 738 Input Parameter: 739 . classid - The object class, for example MAT_CLASSID, SNES_CLASSID, etc. 740 741 Level: developer 742 743 .seealso: `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 744 @*/ 745 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 746 { 747 PetscStageLog stageLog; 748 int stage; 749 750 PetscFunctionBegin; 751 PetscCall(PetscLogGetStageLog(&stageLog)); 752 for (stage = 0; stage < stageLog->numStages; stage++) { 753 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 754 } 755 PetscFunctionReturn(0); 756 } 757 758 /*@ 759 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 760 761 Not Collective 762 763 Input Parameter: 764 . classid - The object class, for example MAT_CLASSID, SNES_CLASSID, etc. 765 766 Level: developer 767 768 .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 769 @*/ 770 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 771 { 772 PetscStageLog stageLog; 773 int stage; 774 775 PetscFunctionBegin; 776 PetscCall(PetscLogGetStageLog(&stageLog)); 777 for (stage = 0; stage < stageLog->numStages; stage++) { 778 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 779 } 780 PetscFunctionReturn(0); 781 } 782 783 /*@ 784 PetscLogEventActivate - Indicates that a particular event should be logged. 785 786 Not Collective 787 788 Input Parameter: 789 . event - The event id 790 791 Usage: 792 .vb 793 PetscLogEventDeactivate(VEC_SetValues); 794 [code where you do not want to log VecSetValues()] 795 PetscLogEventActivate(VEC_SetValues); 796 [code where you do want to log VecSetValues()] 797 .ve 798 799 Note: 800 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 801 or an event number obtained with PetscLogEventRegister(). 802 803 Level: advanced 804 805 .seealso: `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 806 @*/ 807 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 808 { 809 PetscStageLog stageLog; 810 int stage; 811 812 PetscFunctionBegin; 813 PetscCall(PetscLogGetStageLog(&stageLog)); 814 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 815 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 816 PetscFunctionReturn(0); 817 } 818 819 /*@ 820 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 821 822 Not Collective 823 824 Input Parameter: 825 . event - The event id 826 827 Usage: 828 .vb 829 PetscLogEventDeactivate(VEC_SetValues); 830 [code where you do not want to log VecSetValues()] 831 PetscLogEventActivate(VEC_SetValues); 832 [code where you do want to log VecSetValues()] 833 .ve 834 835 Note: 836 The event may be either a pre-defined PETSc event (found in 837 include/petsclog.h) or an event number obtained with PetscLogEventRegister()). 838 839 Level: advanced 840 841 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 842 @*/ 843 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 844 { 845 PetscStageLog stageLog; 846 int stage; 847 848 PetscFunctionBegin; 849 PetscCall(PetscLogGetStageLog(&stageLog)); 850 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 851 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 852 PetscFunctionReturn(0); 853 } 854 855 /*@ 856 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged. 857 858 Not Collective 859 860 Input Parameter: 861 . event - The event id 862 863 Usage: 864 .vb 865 PetscLogEventDeactivatePush(VEC_SetValues); 866 [code where you do not want to log VecSetValues()] 867 PetscLogEventDeactivatePop(VEC_SetValues); 868 [code where you do want to log VecSetValues()] 869 .ve 870 871 Note: 872 The event may be either a pre-defined PETSc event (found in 873 include/petsclog.h) or an event number obtained with PetscLogEventRegister()). 874 875 Level: advanced 876 877 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()` 878 @*/ 879 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 880 { 881 PetscStageLog stageLog; 882 int stage; 883 884 PetscFunctionBegin; 885 PetscCall(PetscLogGetStageLog(&stageLog)); 886 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 887 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 888 PetscFunctionReturn(0); 889 } 890 891 /*@ 892 PetscLogEventDeactivatePop - Indicates that a particular event shouldbe logged. 893 894 Not Collective 895 896 Input Parameter: 897 . event - The event id 898 899 Usage: 900 .vb 901 PetscLogEventDeactivatePush(VEC_SetValues); 902 [code where you do not want to log VecSetValues()] 903 PetscLogEventDeactivatePop(VEC_SetValues); 904 [code where you do want to log VecSetValues()] 905 .ve 906 907 Note: 908 The event may be either a pre-defined PETSc event (found in 909 include/petsclog.h) or an event number obtained with PetscLogEventRegister()). 910 911 Level: advanced 912 913 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 914 @*/ 915 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 916 { 917 PetscStageLog stageLog; 918 int stage; 919 920 PetscFunctionBegin; 921 PetscCall(PetscLogGetStageLog(&stageLog)); 922 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 923 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 924 PetscFunctionReturn(0); 925 } 926 927 /*@ 928 PetscLogEventSetActiveAll - Sets the event activity in every stage. 929 930 Not Collective 931 932 Input Parameters: 933 + event - The event id 934 - isActive - The activity flag determining whether the event is logged 935 936 Level: advanced 937 938 .seealso: `PlogEventActivate()`, `PlogEventDeactivate()` 939 @*/ 940 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 941 { 942 PetscStageLog stageLog; 943 int stage; 944 945 PetscFunctionBegin; 946 PetscCall(PetscLogGetStageLog(&stageLog)); 947 for (stage = 0; stage < stageLog->numStages; stage++) { 948 if (isActive) { 949 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 950 } else { 951 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 952 } 953 } 954 PetscFunctionReturn(0); 955 } 956 957 /*@ 958 PetscLogEventActivateClass - Activates event logging for a PETSc object class. 959 960 Not Collective 961 962 Input Parameter: 963 . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc. 964 965 Level: developer 966 967 .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 968 @*/ 969 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 970 { 971 PetscStageLog stageLog; 972 int stage; 973 974 PetscFunctionBegin; 975 PetscCall(PetscLogGetStageLog(&stageLog)); 976 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 977 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 978 PetscFunctionReturn(0); 979 } 980 981 /*@ 982 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class. 983 984 Not Collective 985 986 Input Parameter: 987 . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc. 988 989 Level: developer 990 991 .seealso: `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 992 @*/ 993 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 994 { 995 PetscStageLog stageLog; 996 int stage; 997 998 PetscFunctionBegin; 999 PetscCall(PetscLogGetStageLog(&stageLog)); 1000 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1001 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 /*MC 1006 PetscLogEventSync - Synchronizes the beginning of a user event. 1007 1008 Synopsis: 1009 #include <petsclog.h> 1010 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 1011 1012 Collective 1013 1014 Input Parameters: 1015 + e - integer associated with the event obtained from PetscLogEventRegister() 1016 - comm - an MPI communicator 1017 1018 Usage: 1019 .vb 1020 PetscLogEvent USER_EVENT; 1021 PetscLogEventRegister("User event",0,&USER_EVENT); 1022 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1023 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1024 [code segment to monitor] 1025 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1026 .ve 1027 1028 Notes: 1029 This routine should be called only if there is not a 1030 PetscObject available to pass to PetscLogEventBegin(). 1031 1032 Level: developer 1033 1034 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1035 1036 M*/ 1037 1038 /*MC 1039 PetscLogEventBegin - Logs the beginning of a user event. 1040 1041 Synopsis: 1042 #include <petsclog.h> 1043 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1044 1045 Not Collective 1046 1047 Input Parameters: 1048 + e - integer associated with the event obtained from PetscLogEventRegister() 1049 - o1,o2,o3,o4 - objects associated with the event, or 0 1050 1051 Fortran Synopsis: 1052 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1053 1054 Usage: 1055 .vb 1056 PetscLogEvent USER_EVENT; 1057 PetscLogDouble user_event_flops; 1058 PetscLogEventRegister("User event",0,&USER_EVENT); 1059 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1060 [code segment to monitor] 1061 PetscLogFlops(user_event_flops); 1062 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1063 .ve 1064 1065 Notes: 1066 You need to register each integer event with the command 1067 PetscLogEventRegister(). 1068 1069 Developer Notes: 1070 PetscLogEventBegin() and PetscLogEventBegin() return error codes instead of explicitly handling the 1071 errors that occur in the macro directly because other packages that use this macros have used them in their 1072 own functions or methods that do not return error codes and it would be disruptive to change the current 1073 behavior. 1074 1075 Level: intermediate 1076 1077 .seealso: `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1078 1079 M*/ 1080 1081 /*MC 1082 PetscLogEventEnd - Log the end of a user event. 1083 1084 Synopsis: 1085 #include <petsclog.h> 1086 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1087 1088 Not Collective 1089 1090 Input Parameters: 1091 + e - integer associated with the event obtained with PetscLogEventRegister() 1092 - o1,o2,o3,o4 - objects associated with the event, or 0 1093 1094 Fortran Synopsis: 1095 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1096 1097 Usage: 1098 .vb 1099 PetscLogEvent USER_EVENT; 1100 PetscLogDouble user_event_flops; 1101 PetscLogEventRegister("User event",0,&USER_EVENT,); 1102 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1103 [code segment to monitor] 1104 PetscLogFlops(user_event_flops); 1105 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1106 .ve 1107 1108 Notes: 1109 You should also register each additional integer event with the command 1110 PetscLogEventRegister(). 1111 1112 Level: intermediate 1113 1114 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1115 1116 M*/ 1117 1118 /*@C 1119 PetscLogEventGetId - Returns the event id when given the event name. 1120 1121 Not Collective 1122 1123 Input Parameter: 1124 . name - The event name 1125 1126 Output Parameter: 1127 . event - The event, or -1 if no event with that name exists 1128 1129 Level: intermediate 1130 1131 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1132 @*/ 1133 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1134 { 1135 PetscStageLog stageLog; 1136 1137 PetscFunctionBegin; 1138 PetscCall(PetscLogGetStageLog(&stageLog)); 1139 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1144 /*@C 1145 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1146 be read by bin/petscview. This program no longer exists. 1147 1148 Collective on PETSC_COMM_WORLD 1149 1150 Input Parameter: 1151 . name - an optional file name 1152 1153 Usage: 1154 .vb 1155 PetscInitialize(...); 1156 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1157 ... code ... 1158 PetscLogDump(filename); 1159 PetscFinalize(); 1160 .ve 1161 1162 Notes: 1163 The default file name is 1164 $ Log.<rank> 1165 where <rank> is the processor number. If no name is specified, 1166 this file will be used. 1167 1168 Level: advanced 1169 1170 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1171 @*/ 1172 PetscErrorCode PetscLogDump(const char sname[]) 1173 { 1174 PetscStageLog stageLog; 1175 PetscEventPerfInfo *eventInfo; 1176 FILE *fd; 1177 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1178 PetscLogDouble flops, _TotalTime; 1179 PetscMPIInt rank; 1180 int action, object, curStage; 1181 PetscLogEvent event; 1182 1183 PetscFunctionBegin; 1184 /* Calculate the total elapsed time */ 1185 PetscTime(&_TotalTime); 1186 _TotalTime -= petsc_BaseTime; 1187 /* Open log file */ 1188 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1189 if (sname && sname[0]) sprintf(file, "%s.%d", sname, rank); 1190 else sprintf(file, "Log.%d", rank); 1191 PetscCall(PetscFixFilename(file, fname)); 1192 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1193 PetscCheck(!(rank == 0) || !(!fd),PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1194 /* Output totals */ 1195 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1196 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1197 /* Output actions */ 1198 if (petsc_logActions) { 1199 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1200 for (action = 0; action < petsc_numActions; action++) { 1201 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", 1202 petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1203 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1204 } 1205 } 1206 /* Output objects */ 1207 if (petsc_logObjects) { 1208 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1209 for (object = 0; object < petsc_numObjects; object++) { 1210 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int) petsc_objects[object].mem)); 1211 if (!petsc_objects[object].name[0]) { 1212 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd,"No Name\n")); 1213 } else { 1214 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1215 } 1216 if (petsc_objects[object].info[0] != 0) { 1217 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1218 } else { 1219 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1220 } 1221 } 1222 } 1223 /* Output events */ 1224 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1225 PetscCall(PetscLogGetStageLog(&stageLog)); 1226 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1227 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1228 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1229 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops/eventInfo[event].time; 1230 else flops = 0.0; 1231 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count,eventInfo[event].flops, eventInfo[event].time, flops)); 1232 } 1233 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 PetscLogView_Detailed - Each process prints the times for its own events 1239 1240 */ 1241 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1242 { 1243 PetscStageLog stageLog; 1244 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1245 PetscLogDouble locTotalTime, numRed, maxMem; 1246 int numStages,numEvents,stage,event; 1247 MPI_Comm comm = PetscObjectComm((PetscObject) viewer); 1248 PetscMPIInt rank,size; 1249 1250 PetscFunctionBegin; 1251 PetscCallMPI(MPI_Comm_size(comm, &size)); 1252 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1253 /* Must preserve reduction count before we go on */ 1254 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1255 /* Get the total elapsed time */ 1256 PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime; 1257 PetscCall(PetscViewerASCIIPrintf(viewer,"size = %d\n",size)); 1258 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalTimes = {}\n")); 1259 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalMessages = {}\n")); 1260 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalMessageLens = {}\n")); 1261 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalReductions = {}\n")); 1262 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalFlop = {}\n")); 1263 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalObjects = {}\n")); 1264 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalMemory = {}\n")); 1265 PetscCall(PetscLogGetStageLog(&stageLog)); 1266 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1267 PetscCall(PetscViewerASCIIPrintf(viewer,"Stages = {}\n")); 1268 for (stage=0; stage<numStages; stage++) { 1269 PetscCall(PetscViewerASCIIPrintf(viewer,"Stages[\"%s\"] = {}\n",stageLog->stageInfo[stage].name)); 1270 PetscCall(PetscViewerASCIIPrintf(viewer,"Stages[\"%s\"][\"summary\"] = {}\n",stageLog->stageInfo[stage].name)); 1271 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1272 for (event = 0; event < numEvents; event++) { 1273 PetscCall(PetscViewerASCIIPrintf(viewer,"Stages[\"%s\"][\"%s\"] = {}\n",stageLog->stageInfo[stage].name,stageLog->eventLog->eventInfo[event].name)); 1274 } 1275 } 1276 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1277 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1278 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalTimes[%d] = %g\n",rank,locTotalTime)); 1279 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalMessages[%d] = %g\n",rank,(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1280 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalMessageLens[%d] = %g\n",rank,(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1281 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalReductions[%d] = %g\n",rank,numRed)); 1282 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalFlop[%d] = %g\n",rank,petsc_TotalFlops)); 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalObjects[%d] = %d\n",rank,petsc_numObjects)); 1284 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalMemory[%d] = %g\n",rank,maxMem)); 1285 PetscCall(PetscViewerFlush(viewer)); 1286 for (stage=0; stage<numStages; stage++) { 1287 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1288 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", 1289 stageLog->stageInfo[stage].name,rank, 1290 stageInfo->time,stageInfo->numMessages,stageInfo->messageLength,stageInfo->numReductions,stageInfo->flops)); 1291 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1292 for (event = 0; event < numEvents; event++) { 1293 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1294 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1295 stageLog->stageInfo[stage].name,stageLog->eventLog->eventInfo[event].name,rank, 1296 eventInfo->count,eventInfo->time,eventInfo->syncTime,eventInfo->numMessages,eventInfo->messageLength,eventInfo->numReductions,eventInfo->flops)); 1297 if (eventInfo->dof[0] >= 0.) { 1298 PetscInt d, e; 1299 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1301 for (d = 0; d < 8; ++d) { 1302 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1303 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1304 } 1305 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1306 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1307 for (e = 0; e < 8; ++e) { 1308 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1310 } 1311 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1312 } 1313 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"}\n")); 1314 } 1315 } 1316 PetscCall(PetscViewerFlush(viewer)); 1317 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 /* 1322 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1323 */ 1324 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1325 { 1326 PetscStageLog stageLog; 1327 PetscEventPerfInfo *eventInfo = NULL; 1328 PetscLogDouble locTotalTime, maxMem; 1329 int numStages,numEvents,stage,event; 1330 MPI_Comm comm = PetscObjectComm((PetscObject) viewer); 1331 PetscMPIInt rank,size; 1332 1333 PetscFunctionBegin; 1334 PetscCallMPI(MPI_Comm_size(comm, &size)); 1335 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1336 /* Must preserve reduction count before we go on */ 1337 /* Get the total elapsed time */ 1338 PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime; 1339 PetscCall(PetscLogGetStageLog(&stageLog)); 1340 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1341 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1342 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1343 PetscCall(PetscViewerASCIIPrintf(viewer,"Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1344 PetscCall(PetscViewerFlush(viewer)); 1345 for (stage=0; stage<numStages; stage++) { 1346 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1347 1348 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"%s,summary,%d,1,%g,%g,%g,%g,%g\n", 1349 stageLog->stageInfo[stage].name,rank,stageInfo->time,stageInfo->numMessages,stageInfo->messageLength,stageInfo->numReductions,stageInfo->flops)); 1350 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1351 for (event = 0; event < numEvents; event++) { 1352 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1353 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"%s,%s,%d,%d,%g,%g,%g,%g,%g",stageLog->stageInfo[stage].name, 1354 stageLog->eventLog->eventInfo[event].name,rank,eventInfo->count,eventInfo->time,eventInfo->numMessages, 1355 eventInfo->messageLength,eventInfo->numReductions,eventInfo->flops)); 1356 if (eventInfo->dof[0] >= 0.) { 1357 PetscInt d, e; 1358 1359 for (d = 0; d < 8; ++d) { 1360 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1361 } 1362 for (e = 0; e < 8; ++e) { 1363 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1364 } 1365 } 1366 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"\n")); 1367 } 1368 } 1369 PetscCall(PetscViewerFlush(viewer)); 1370 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm,FILE *fd) 1375 { 1376 PetscFunctionBegin; 1377 if (!PetscLogSyncOn) PetscFunctionReturn(0); 1378 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1379 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1380 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1381 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1382 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1383 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1384 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1385 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1386 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1387 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1388 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm,FILE *fd) 1393 { 1394 PetscFunctionBegin; 1395 if (PetscDefined(USE_DEBUG)) { 1396 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1397 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1398 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1399 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1400 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1401 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1402 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1403 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1404 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1405 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1406 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm,FILE *fd) 1412 { 1413 #if defined(PETSC_HAVE_DEVICE) 1414 PetscMPIInt size; 1415 1416 PetscFunctionBegin; 1417 PetscCallMPI(MPI_Comm_size(comm, &size)); 1418 if (use_gpu_aware_mpi || size == 1) PetscFunctionReturn(0); 1419 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1420 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1421 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1422 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1423 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1424 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1425 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1426 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1427 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1428 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1429 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1430 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1431 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1432 PetscFunctionReturn(0); 1433 #else 1434 return 0; 1435 #endif 1436 } 1437 1438 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm,FILE *fd) 1439 { 1440 #if defined(PETSC_HAVE_DEVICE) 1441 1442 PetscFunctionBegin; 1443 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(0); 1444 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1445 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1446 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1447 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1448 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1449 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1450 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1451 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1452 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1453 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1454 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1455 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1456 PetscFunctionReturn(0); 1457 #else 1458 return 0; 1459 #endif 1460 } 1461 1462 PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1463 { 1464 FILE *fd; 1465 PetscLogDouble zero = 0.0; 1466 PetscStageLog stageLog; 1467 PetscStageInfo *stageInfo = NULL; 1468 PetscEventPerfInfo *eventInfo = NULL; 1469 PetscClassPerfInfo *classInfo; 1470 char arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128]; 1471 const char *name; 1472 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1473 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1474 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1475 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1476 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1477 PetscLogDouble min, max, tot, ratio, avg, x, y; 1478 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1479 #if defined(PETSC_HAVE_DEVICE) 1480 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1481 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1482 #endif 1483 PetscMPIInt minC, maxC; 1484 PetscMPIInt size, rank; 1485 PetscBool *localStageUsed, *stageUsed; 1486 PetscBool *localStageVisible, *stageVisible; 1487 int numStages, localNumEvents, numEvents; 1488 int stage, oclass; 1489 PetscLogEvent event; 1490 PetscErrorCode ierr = 0; 1491 char version[256]; 1492 MPI_Comm comm; 1493 #if defined(PETSC_HAVE_DEVICE) 1494 PetscLogEvent eventid; 1495 PetscInt64 nas = 0x7FF0000000000002; 1496 #endif 1497 1498 PetscFunctionBegin; 1499 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1500 PetscCall(PetscObjectGetComm((PetscObject)viewer,&comm)); 1501 PetscCall(PetscViewerASCIIGetPointer(viewer,&fd)); 1502 PetscCallMPI(MPI_Comm_size(comm, &size)); 1503 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1504 /* Get the total elapsed time */ 1505 PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime; 1506 1507 PetscCall(PetscFPrintf(comm, fd, "**************************************** ***********************************************************************************************************************\n")); 1508 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1509 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1510 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: -------------------------------------------------------------------\n\n")); 1511 PetscCall(PetscLogViewWarnSync(comm,fd)); 1512 PetscCall(PetscLogViewWarnDebugging(comm,fd)); 1513 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm,fd)); 1514 PetscCall(PetscLogViewWarnGpuTime(comm,fd)); 1515 PetscCall(PetscGetArchType(arch,sizeof(arch))); 1516 PetscCall(PetscGetHostName(hostname,sizeof(hostname))); 1517 PetscCall(PetscGetUserName(username,sizeof(username))); 1518 PetscCall(PetscGetProgramName(pname,sizeof(pname))); 1519 PetscCall(PetscGetDate(date,sizeof(date))); 1520 PetscCall(PetscGetVersion(version,sizeof(version))); 1521 if (size == 1) { 1522 PetscCall(PetscFPrintf(comm,fd,"%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1523 } else { 1524 PetscCall(PetscFPrintf(comm,fd,"%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1525 } 1526 #if defined(PETSC_HAVE_OPENMP) 1527 PetscCall(PetscFPrintf(comm,fd,"Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1528 #endif 1529 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1530 1531 /* Must preserve reduction count before we go on */ 1532 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1533 1534 /* Calculate summary information */ 1535 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1536 /* Time */ 1537 PetscCallMPI(MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1538 PetscCallMPI(MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1539 PetscCallMPI(MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1540 avg = tot/((PetscLogDouble) size); 1541 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1542 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1543 TotalTime = tot; 1544 /* Objects */ 1545 avg = (PetscLogDouble) petsc_numObjects; 1546 PetscCallMPI(MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1547 PetscCallMPI(MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1548 PetscCallMPI(MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1549 avg = tot/((PetscLogDouble) size); 1550 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1551 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1552 /* Flops */ 1553 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1554 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1555 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1556 avg = tot/((PetscLogDouble) size); 1557 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1558 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1559 TotalFlops = tot; 1560 /* Flops/sec -- Must talk to Barry here */ 1561 if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime; else flops = 0.0; 1562 PetscCallMPI(MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1563 PetscCallMPI(MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1564 PetscCallMPI(MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1565 avg = tot/((PetscLogDouble) size); 1566 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1567 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1568 /* Memory */ 1569 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1570 if (mem > 0.0) { 1571 PetscCallMPI(MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1572 PetscCallMPI(MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1573 PetscCallMPI(MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1574 avg = tot/((PetscLogDouble) size); 1575 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1576 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1577 } 1578 /* Messages */ 1579 mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1580 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1581 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1582 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1583 avg = tot/((PetscLogDouble) size); 1584 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1585 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1586 numMessages = tot; 1587 /* Message Lengths */ 1588 mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1589 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1590 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1591 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1592 if (numMessages != 0) avg = tot/numMessages; else avg = 0.0; 1593 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1594 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1595 messageLength = tot; 1596 /* Reductions */ 1597 PetscCallMPI(MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1598 PetscCallMPI(MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1599 PetscCallMPI(MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1600 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1601 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1602 numReductions = red; /* wrong because uses count from process zero */ 1603 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1604 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1605 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1606 1607 /* Get total number of stages -- 1608 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1609 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1610 This seems best accomplished by assoicating a communicator with each stage. 1611 */ 1612 PetscCall(PetscLogGetStageLog(&stageLog)); 1613 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1614 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1615 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1616 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1617 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1618 if (numStages > 0) { 1619 stageInfo = stageLog->stageInfo; 1620 for (stage = 0; stage < numStages; stage++) { 1621 if (stage < stageLog->numStages) { 1622 localStageUsed[stage] = stageInfo[stage].used; 1623 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1624 } else { 1625 localStageUsed[stage] = PETSC_FALSE; 1626 localStageVisible[stage] = PETSC_TRUE; 1627 } 1628 } 1629 PetscCallMPI(MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1630 PetscCallMPI(MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1631 for (stage = 0; stage < numStages; stage++) { 1632 if (stageUsed[stage]) { 1633 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1634 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1635 break; 1636 } 1637 } 1638 for (stage = 0; stage < numStages; stage++) { 1639 if (!stageUsed[stage]) continue; 1640 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1641 if (localStageUsed[stage]) { 1642 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1643 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1644 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1645 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1646 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1647 name = stageInfo[stage].name; 1648 } else { 1649 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1650 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1651 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1652 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1653 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1654 name = ""; 1655 } 1656 mess *= 0.5; messLen *= 0.5; red /= size; 1657 if (TotalTime != 0.0) fracTime = stageTime/TotalTime; else fracTime = 0.0; 1658 if (TotalFlops != 0.0) fracFlops = flops/TotalFlops; else fracFlops = 0.0; 1659 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1660 if (numMessages != 0.0) fracMessages = mess/numMessages; else fracMessages = 0.0; 1661 if (mess != 0.0) avgMessLen = messLen/mess; else avgMessLen = 0.0; 1662 if (messageLength != 0.0) fracLength = messLen/messageLength; else fracLength = 0.0; 1663 if (numReductions != 0.0) fracReductions = red/numReductions; else fracReductions = 0.0; 1664 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", 1665 stage, name, stageTime/size, 100.0*fracTime, flops, 100.0*fracFlops, 1666 mess, 100.0*fracMessages, avgMessLen, 100.0*fracLength, red, 100.0*fracReductions)); 1667 } 1668 } 1669 1670 PetscCall(PetscFPrintf(comm, fd,"\n------------------------------------------------------------------------------------------------------------------------\n")); 1671 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1672 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1673 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1674 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1675 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1676 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1677 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1678 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1679 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1680 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1681 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1682 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1683 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1684 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1685 if (PetscLogMemory) { 1686 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event)\n")); 1687 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events)\n")); 1688 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event)\n")); 1689 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1690 } 1691 #if defined(PETSC_HAVE_DEVICE) 1692 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1693 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1694 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1695 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1696 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1697 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1698 #endif 1699 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1700 1701 PetscCall(PetscLogViewWarnDebugging(comm,fd)); 1702 1703 /* Report events */ 1704 PetscCall(PetscFPrintf(comm, fd,"Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1705 if (PetscLogMemory) { 1706 PetscCall(PetscFPrintf(comm, fd," Malloc EMalloc MMalloc RMI")); 1707 } 1708 #if defined(PETSC_HAVE_DEVICE) 1709 PetscCall(PetscFPrintf(comm, fd," GPU - CpuToGpu - - GpuToCpu - GPU")); 1710 #endif 1711 PetscCall(PetscFPrintf(comm, fd,"\n")); 1712 PetscCall(PetscFPrintf(comm, fd," Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1713 if (PetscLogMemory) { 1714 PetscCall(PetscFPrintf(comm, fd," Mbytes Mbytes Mbytes Mbytes")); 1715 } 1716 #if defined(PETSC_HAVE_DEVICE) 1717 PetscCall(PetscFPrintf(comm, fd," Mflop/s Count Size Count Size %%F")); 1718 #endif 1719 PetscCall(PetscFPrintf(comm, fd,"\n")); 1720 PetscCall(PetscFPrintf(comm, fd,"------------------------------------------------------------------------------------------------------------------------")); 1721 if (PetscLogMemory) { 1722 PetscCall(PetscFPrintf(comm, fd,"-----------------------------")); 1723 } 1724 #if defined(PETSC_HAVE_DEVICE) 1725 PetscCall(PetscFPrintf(comm, fd,"---------------------------------------")); 1726 #endif 1727 PetscCall(PetscFPrintf(comm, fd,"\n")); 1728 1729 #if defined(PETSC_HAVE_DEVICE) 1730 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1731 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1732 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1733 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1734 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1735 #endif 1736 1737 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1738 for (stage = 0; stage < numStages; stage++) { 1739 if (!stageVisible[stage]) continue; 1740 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1741 if (localStageUsed[stage]) { 1742 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1743 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1744 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1745 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1746 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1747 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1748 } else { 1749 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1750 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1751 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1752 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1753 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1754 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1755 } 1756 mess *= 0.5; messLen *= 0.5; red /= size; 1757 1758 /* Get total number of events in this stage -- 1759 Currently, a single processor can register more events than another, but events must all be registered in order, 1760 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1761 on the event ID. This seems best accomplished by associating a communicator with each stage. 1762 1763 Problem: If the event did not happen on proc 1, its name will not be available. 1764 Problem: Event visibility is not implemented 1765 */ 1766 if (localStageUsed[stage]) { 1767 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1768 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1769 } else localNumEvents = 0; 1770 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1771 for (event = 0; event < numEvents; event++) { 1772 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1773 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1774 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; else flopr = 0.0; 1775 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1776 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1777 PetscCallMPI(MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1778 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1779 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1780 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1781 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1782 PetscCallMPI(MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1783 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1784 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1785 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1786 if (PetscLogMemory) { 1787 PetscCallMPI(MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1788 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1789 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1790 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1791 } 1792 #if defined(PETSC_HAVE_DEVICE) 1793 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1794 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1795 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1796 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1797 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1798 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt ,1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1799 #endif 1800 name = stageLog->eventLog->eventInfo[event].name; 1801 } else { 1802 flopr = 0.0; 1803 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1804 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1805 PetscCallMPI(MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1806 PetscCallMPI(MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1807 PetscCallMPI(MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1808 PetscCallMPI(MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1809 PetscCallMPI(MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1810 PetscCallMPI(MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1811 PetscCallMPI(MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1812 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1813 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1814 if (PetscLogMemory) { 1815 PetscCallMPI(MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1816 PetscCallMPI(MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1817 PetscCallMPI(MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1818 PetscCallMPI(MPI_Allreduce(&zero, &emalmax,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1819 } 1820 #if defined(PETSC_HAVE_DEVICE) 1821 PetscCallMPI(MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1822 PetscCallMPI(MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1823 PetscCallMPI(MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1824 PetscCallMPI(MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1825 PetscCallMPI(MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1826 PetscCallMPI(MPI_Allreduce(&zero, &gmaxt , 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1827 #endif 1828 name = ""; 1829 } 1830 if (mint < 0.0) { 1831 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n",mint,name)); 1832 mint = 0; 1833 } 1834 PetscCheck(minf >= 0.0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Minimum flop %g over all processors for %s is negative! Not possible!",minf,name); 1835 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1836 #if defined(PETSC_HAVE_DEVICE) 1837 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1838 memcpy(&gmaxt,&nas,sizeof(PetscLogDouble)); 1839 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 1840 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 1841 memcpy(&mint,&nas,sizeof(PetscLogDouble)); 1842 memcpy(&maxt,&nas,sizeof(PetscLogDouble)); 1843 } 1844 } 1845 #endif 1846 totm *= 0.5; totml *= 0.5; totr /= size; 1847 1848 if (maxC != 0) { 1849 if (minC != 0) ratC = ((PetscLogDouble)maxC)/minC;else ratC = 0.0; 1850 if (mint != 0.0) ratt = maxt/mint; else ratt = 0.0; 1851 if (minf != 0.0) ratf = maxf/minf; else ratf = 0.0; 1852 if (TotalTime != 0.0) fracTime = tott/TotalTime; else fracTime = 0.0; 1853 if (TotalFlops != 0.0) fracFlops = totf/TotalFlops; else fracFlops = 0.0; 1854 if (stageTime != 0.0) fracStageTime = tott/stageTime; else fracStageTime = 0.0; 1855 if (flops != 0.0) fracStageFlops = totf/flops; else fracStageFlops = 0.0; 1856 if (numMessages != 0.0) fracMess = totm/numMessages; else fracMess = 0.0; 1857 if (messageLength != 0.0) fracMessLen = totml/messageLength; else fracMessLen = 0.0; 1858 if (numReductions != 0.0) fracRed = totr/numReductions; else fracRed = 0.0; 1859 if (mess != 0.0) fracStageMess = totm/mess; else fracStageMess = 0.0; 1860 if (messLen != 0.0) fracStageMessLen = totml/messLen; else fracStageMessLen = 0.0; 1861 if (red != 0.0) fracStageRed = totr/red; else fracStageRed = 0.0; 1862 if (totm != 0.0) totml /= totm; else totml = 0.0; 1863 if (maxt != 0.0) flopr = totf/maxt; else flopr = 0.0; 1864 if (fracStageTime > 1.00) PetscCall(PetscFPrintf(comm, fd,"Warning -- total time of event greater than time of entire stage -- something is wrong with the timer\n")); 1865 PetscCall(PetscFPrintf(comm, fd, 1866 "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f %3.0f%3.0f%3.0f%3.0f%3.0f %5.0f", 1867 name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 1868 100.0*fracTime, 100.0*fracFlops, 100.0*fracMess, 100.0*fracMessLen, 100.0*fracRed, 1869 100.0*fracStageTime, 100.0*fracStageFlops, 100.0*fracStageMess, 100.0*fracStageMessLen, 100.0*fracStageRed, 1870 PetscAbs(flopr)/1.0e6)); 1871 if (PetscLogMemory) { 1872 PetscCall(PetscFPrintf(comm, fd," %5.0f %5.0f %5.0f %5.0f",mal/1.0e6,emalmax/1.0e6,malmax/1.0e6,mem/1.0e6)); 1873 } 1874 #if defined(PETSC_HAVE_DEVICE) 1875 if (totf != 0.0) fracgflops = gflops/totf; else fracgflops = 0.0; 1876 if (gmaxt != 0.0) gflopr = gflops/gmaxt; else gflopr = 0.0; 1877 PetscCall(PetscFPrintf(comm, fd," %5.0f %4.0f %3.2e %4.0f %3.2e% 3.0f",PetscAbs(gflopr)/1.0e6,cct/size,csz/(1.0e6*size),gct/size,gsz/(1.0e6*size),100.0*fracgflops)); 1878 #endif 1879 PetscCall(PetscFPrintf(comm, fd,"\n")); 1880 } 1881 } 1882 } 1883 1884 /* Memory usage and object creation */ 1885 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1886 if (PetscLogMemory) { 1887 PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1888 } 1889 #if defined(PETSC_HAVE_DEVICE) 1890 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1891 #endif 1892 PetscCall(PetscFPrintf(comm, fd, "\n")); 1893 PetscCall(PetscFPrintf(comm, fd, "\n")); 1894 PetscCall(PetscFPrintf(comm, fd, "Memory usage is given in bytes:\n\n")); 1895 1896 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1897 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1898 stats for stages local to processor sets. 1899 */ 1900 /* We should figure out the longest object name here (now 20 characters) */ 1901 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions Memory Descendants' Mem.\n")); 1902 PetscCall(PetscFPrintf(comm, fd, "Reports information only for process 0.\n")); 1903 for (stage = 0; stage < numStages; stage++) { 1904 if (localStageUsed[stage]) { 1905 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 1906 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1907 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 1908 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 1909 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d %11.0f %g\n", stageLog->classLog->classInfo[oclass].name, 1910 classInfo[oclass].creations, classInfo[oclass].destructions, classInfo[oclass].mem, 1911 classInfo[oclass].descMem)); 1912 } 1913 } 1914 } else { 1915 if (!localStageVisible[stage]) continue; 1916 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1917 } 1918 } 1919 1920 PetscCall(PetscFree(localStageUsed)); 1921 PetscCall(PetscFree(stageUsed)); 1922 PetscCall(PetscFree(localStageVisible)); 1923 PetscCall(PetscFree(stageVisible)); 1924 1925 /* Information unrelated to this particular run */ 1926 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1927 PetscTime(&y); 1928 PetscTime(&x); 1929 PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); 1930 PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); 1931 PetscCall(PetscFPrintf(comm,fd,"Average time to get PetscTime(): %g\n", (y-x)/10.0)); 1932 /* MPI information */ 1933 if (size > 1) { 1934 MPI_Status status; 1935 PetscMPIInt tag; 1936 MPI_Comm newcomm; 1937 1938 PetscCallMPI(MPI_Barrier(comm)); 1939 PetscTime(&x); 1940 PetscCallMPI(MPI_Barrier(comm)); 1941 PetscCallMPI(MPI_Barrier(comm)); 1942 PetscCallMPI(MPI_Barrier(comm)); 1943 PetscCallMPI(MPI_Barrier(comm)); 1944 PetscCallMPI(MPI_Barrier(comm)); 1945 PetscTime(&y); 1946 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y-x)/5.0)); 1947 PetscCall(PetscCommDuplicate(comm,&newcomm, &tag)); 1948 PetscCallMPI(MPI_Barrier(comm)); 1949 if (rank) { 1950 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank-1, tag, newcomm, &status)); 1951 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank+1)%size, tag, newcomm)); 1952 } else { 1953 PetscTime(&x); 1954 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 1955 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size-1, tag, newcomm, &status)); 1956 PetscTime(&y); 1957 PetscCall(PetscFPrintf(comm,fd,"Average time for zero size MPI_Send(): %g\n", (y-x)/size)); 1958 } 1959 PetscCall(PetscCommDestroy(&newcomm)); 1960 } 1961 PetscCall(PetscOptionsView(NULL,viewer)); 1962 1963 /* Machine and compile information */ 1964 #if defined(PETSC_USE_FORTRAN_KERNELS) 1965 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 1966 #else 1967 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 1968 #endif 1969 #if defined(PETSC_USE_64BIT_INDICES) 1970 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n")); 1971 #elif defined(PETSC_USE___FLOAT128) 1972 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n")); 1973 #endif 1974 #if defined(PETSC_USE_REAL_SINGLE) 1975 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 1976 #elif defined(PETSC_USE___FLOAT128) 1977 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 1978 #endif 1979 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1980 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 1981 #else 1982 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 1983 #endif 1984 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", 1985 (int) sizeof(short), (int) sizeof(int), (int) sizeof(long), (int) sizeof(void*),(int) sizeof(PetscScalar),(int) sizeof(PetscInt))); 1986 1987 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s",petscconfigureoptions)); 1988 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 1989 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 1990 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 1991 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 1992 1993 /* Cleanup */ 1994 PetscCall(PetscFPrintf(comm, fd, "\n")); 1995 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm,fd)); 1996 PetscCall(PetscLogViewWarnDebugging(comm,fd)); 1997 PetscCall(PetscFPTrapPop()); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 /*@C 2002 PetscLogView - Prints a summary of the logging. 2003 2004 Collective over MPI_Comm 2005 2006 Input Parameter: 2007 . viewer - an ASCII viewer 2008 2009 Options Database Keys: 2010 + -log_view [:filename] - Prints summary of log information 2011 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2012 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2013 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2014 . -log_view_memory - Also display memory usage in each event 2015 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2016 . -log_all - Saves a file Log.rank for each MPI process with details of each step of the computation 2017 - -log_trace [filename] - Displays a trace of what each process is doing 2018 2019 Notes: 2020 It is possible to control the logging programatically but we recommend using the options database approach whenever possible 2021 By default the summary is printed to stdout. 2022 2023 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2024 2025 If PETSc is configured with --with-logging=0 then this functionality is not available 2026 2027 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2028 directory then open filename.xml with your browser. Specific notes for certain browsers 2029 $ Firefox and Internet explorer - simply open the file 2030 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2031 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2032 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2033 your browser. 2034 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2035 window and render the XML log file contents. 2036 2037 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2038 2039 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2040 or using speedscope (https://www.speedscope.app). 2041 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2042 2043 Level: beginner 2044 2045 .seealso: `PetscLogDefaultBegin()`, `PetscLogDump()` 2046 @*/ 2047 PetscErrorCode PetscLogView(PetscViewer viewer) 2048 { 2049 PetscBool isascii; 2050 PetscViewerFormat format; 2051 int stage, lastStage; 2052 PetscStageLog stageLog; 2053 2054 PetscFunctionBegin; 2055 PetscCheck(PetscLogPLB,PETSC_COMM_SELF,PETSC_ERR_SUP,"Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2056 /* Pop off any stages the user forgot to remove */ 2057 lastStage = 0; 2058 PetscCall(PetscLogGetStageLog(&stageLog)); 2059 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2060 while (stage >= 0) { 2061 lastStage = stage; 2062 PetscCall(PetscStageLogPop(stageLog)); 2063 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2064 } 2065 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); 2066 PetscCheck(isascii,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Currently can only view logging to ASCII"); 2067 PetscCall(PetscViewerGetFormat(viewer,&format)); 2068 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2069 PetscCall(PetscLogView_Default(viewer)); 2070 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2071 PetscCall(PetscLogView_Detailed(viewer)); 2072 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2073 PetscCall(PetscLogView_CSV(viewer)); 2074 } else if (format == PETSC_VIEWER_ASCII_XML) { 2075 PetscCall(PetscLogView_Nested(viewer)); 2076 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2077 PetscCall(PetscLogView_Flamegraph(viewer)); 2078 } 2079 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2080 PetscFunctionReturn(0); 2081 } 2082 2083 /*@C 2084 PetscLogViewFromOptions - Processes command line options to determine if/how a PetscLog is to be viewed. 2085 2086 Collective on PETSC_COMM_WORLD 2087 2088 Not normally called by user 2089 2090 Level: intermediate 2091 2092 @*/ 2093 PetscErrorCode PetscLogViewFromOptions(void) 2094 { 2095 PetscViewer viewer; 2096 PetscBool flg; 2097 PetscViewerFormat format; 2098 2099 PetscFunctionBegin; 2100 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD,NULL,NULL,"-log_view",&viewer,&format,&flg)); 2101 if (flg) { 2102 PetscCall(PetscViewerPushFormat(viewer,format)); 2103 PetscCall(PetscLogView(viewer)); 2104 PetscCall(PetscViewerPopFormat(viewer)); 2105 PetscCall(PetscViewerDestroy(&viewer)); 2106 } 2107 PetscFunctionReturn(0); 2108 } 2109 2110 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2111 /*@C 2112 PetscGetFlops - Returns the number of flops used on this processor 2113 since the program began. 2114 2115 Not Collective 2116 2117 Output Parameter: 2118 flops - number of floating point operations 2119 2120 Notes: 2121 A global counter logs all PETSc flop counts. The user can use 2122 PetscLogFlops() to increment this counter to include flops for the 2123 application code. 2124 2125 Level: intermediate 2126 2127 .seealso: `PetscTime()`, `PetscLogFlops()` 2128 @*/ 2129 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2130 { 2131 PetscFunctionBegin; 2132 *flops = petsc_TotalFlops; 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2137 { 2138 size_t fullLength; 2139 va_list Argp; 2140 2141 PetscFunctionBegin; 2142 if (!petsc_logObjects) PetscFunctionReturn(0); 2143 va_start(Argp, format); 2144 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64,format,&fullLength, Argp)); 2145 va_end(Argp); 2146 PetscFunctionReturn(0); 2147 } 2148 2149 /*MC 2150 PetscLogFlops - Adds floating point operations to the global counter. 2151 2152 Synopsis: 2153 #include <petsclog.h> 2154 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2155 2156 Not Collective 2157 2158 Input Parameter: 2159 . f - flop counter 2160 2161 Usage: 2162 .vb 2163 PetscLogEvent USER_EVENT; 2164 PetscLogEventRegister("User event",0,&USER_EVENT); 2165 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2166 [code segment to monitor] 2167 PetscLogFlops(user_flops) 2168 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2169 .ve 2170 2171 Notes: 2172 A global counter logs all PETSc flop counts. The user can use 2173 PetscLogFlops() to increment this counter to include flops for the 2174 application code. 2175 2176 Level: intermediate 2177 2178 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2179 2180 M*/ 2181 2182 /*MC 2183 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2184 to get accurate timings 2185 2186 Synopsis: 2187 #include <petsclog.h> 2188 void PetscPreLoadBegin(PetscBool flag,char *name); 2189 2190 Not Collective 2191 2192 Input Parameters: 2193 + flag - PETSC_TRUE to run twice, PETSC_FALSE to run once, may be overridden 2194 with command line option -preload true or -preload false 2195 - name - name of first stage (lines of code timed separately with -log_view) to 2196 be preloaded 2197 2198 Usage: 2199 .vb 2200 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2201 lines of code 2202 PetscPreLoadStage("second stage"); 2203 lines of code 2204 PetscPreLoadEnd(); 2205 .ve 2206 2207 Notes: 2208 Only works in C/C++, not Fortran 2209 2210 Flags available within the macro. 2211 + PetscPreLoadingUsed - true if we are or have done preloading 2212 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2213 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2214 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2215 The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin() 2216 and PetscPreLoadEnd() 2217 2218 Level: intermediate 2219 2220 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2221 2222 M*/ 2223 2224 /*MC 2225 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2226 to get accurate timings 2227 2228 Synopsis: 2229 #include <petsclog.h> 2230 void PetscPreLoadEnd(void); 2231 2232 Not Collective 2233 2234 Usage: 2235 .vb 2236 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2237 lines of code 2238 PetscPreLoadStage("second stage"); 2239 lines of code 2240 PetscPreLoadEnd(); 2241 .ve 2242 2243 Notes: 2244 only works in C/C++ not fortran 2245 2246 Level: intermediate 2247 2248 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2249 2250 M*/ 2251 2252 /*MC 2253 PetscPreLoadStage - Start a new segment of code to be timed separately. 2254 to get accurate timings 2255 2256 Synopsis: 2257 #include <petsclog.h> 2258 void PetscPreLoadStage(char *name); 2259 2260 Not Collective 2261 2262 Usage: 2263 .vb 2264 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2265 lines of code 2266 PetscPreLoadStage("second stage"); 2267 lines of code 2268 PetscPreLoadEnd(); 2269 .ve 2270 2271 Notes: 2272 only works in C/C++ not fortran 2273 2274 Level: intermediate 2275 2276 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2277 2278 M*/ 2279 2280 #if PetscDefined(HAVE_DEVICE) 2281 #include <petsc/private/deviceimpl.h> 2282 2283 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2284 2285 /* 2286 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2287 because it will result in timing results that cannot be interpreted. 2288 */ 2289 static PetscErrorCode PetscLogGpuTime_Off(void) 2290 { 2291 PetscLogGpuTimeFlag = PETSC_FALSE; 2292 return 0; 2293 } 2294 2295 /*@C 2296 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2297 2298 Options Database: 2299 . -log_view_gpu_time - provide the GPU times in the -log_view output 2300 2301 Notes: 2302 Because the logging of GPU time requires blocking the CPU execution for each kernel, turning on the timing of the 2303 GPU kernels can slow down the entire computation and should only be used when studying the performance 2304 of operations on GPU such as vector operations and matrix-vector operations. 2305 2306 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2307 2308 Level: advanced 2309 2310 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2311 @*/ 2312 PetscErrorCode PetscLogGpuTime(void) 2313 { 2314 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2315 PetscLogGpuTimeFlag = PETSC_TRUE; 2316 return 0; 2317 } 2318 2319 /*@C 2320 PetscLogGpuTimeBegin - Start timer for device 2321 2322 Notes: 2323 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2324 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2325 There is no need to call WaitForCUDA() or WaitForHIP() between PetscLogGpuTimeBegin and PetscLogGpuTimeEnd 2326 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2327 The regular logging captures the time for data transfers and any CPU activites during the event 2328 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2329 2330 Developer Notes: 2331 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between PetscLogGpuTimeBegin() and PetsLogGpuTimeEnd(). 2332 PetscLogGpuTimeBegin() and PetsLogGpuTimeEnd() insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the event when it reaches that event in the stream. The function xxxEventSynchronize() is called in PetsLogGpuTimeEnd() to block CPU execution, but not continued GPU excution, until the timer event is recorded. 2333 2334 Level: intermediate 2335 2336 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2337 @*/ 2338 PetscErrorCode PetscLogGpuTimeBegin(void) 2339 { 2340 PetscFunctionBegin; 2341 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2342 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2343 PetscDeviceContext dctx; 2344 2345 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2346 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2347 } else { 2348 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2349 } 2350 PetscFunctionReturn(0); 2351 } 2352 2353 /*@C 2354 PetscLogGpuTimeEnd - Stop timer for device 2355 2356 Level: intermediate 2357 2358 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2359 @*/ 2360 PetscErrorCode PetscLogGpuTimeEnd(void) 2361 { 2362 PetscFunctionBegin; 2363 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2364 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2365 PetscDeviceContext dctx; 2366 PetscLogDouble elapsed; 2367 2368 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2369 PetscCall(PetscDeviceContextEndTimer_Internal(dctx,&elapsed)); 2370 petsc_gtime += (elapsed/1000.0); 2371 } else { 2372 PetscCall(PetscTimeAdd(&petsc_gtime)); 2373 } 2374 PetscFunctionReturn(0); 2375 } 2376 #endif /* end of PETSC_HAVE_DEVICE */ 2377 2378 #else /* end of -DPETSC_USE_LOG section */ 2379 2380 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2381 { 2382 PetscFunctionBegin; 2383 PetscFunctionReturn(0); 2384 } 2385 2386 #endif /* PETSC_USE_LOG*/ 2387 2388 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2389 PetscClassId PETSC_OBJECT_CLASSID = 0; 2390 2391 /*@C 2392 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2393 2394 Not Collective 2395 2396 Input Parameter: 2397 . name - The class name 2398 2399 Output Parameter: 2400 . oclass - The class id or classid 2401 2402 Level: developer 2403 2404 @*/ 2405 PetscErrorCode PetscClassIdRegister(const char name[],PetscClassId *oclass) 2406 { 2407 #if defined(PETSC_USE_LOG) 2408 PetscStageLog stageLog; 2409 PetscInt stage; 2410 #endif 2411 2412 PetscFunctionBegin; 2413 *oclass = ++PETSC_LARGEST_CLASSID; 2414 #if defined(PETSC_USE_LOG) 2415 PetscCall(PetscLogGetStageLog(&stageLog)); 2416 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2417 for (stage = 0; stage < stageLog->numStages; stage++) { 2418 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2419 } 2420 #endif 2421 PetscFunctionReturn(0); 2422 } 2423 2424 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2425 #include <mpe.h> 2426 2427 PetscBool PetscBeganMPE = PETSC_FALSE; 2428 2429 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent,int,PetscObject,PetscObject,PetscObject,PetscObject); 2430 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent,int,PetscObject,PetscObject,PetscObject,PetscObject); 2431 2432 /*@C 2433 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2434 and slows the program down. 2435 2436 Collective over PETSC_COMM_WORLD 2437 2438 Options Database Keys: 2439 . -log_mpe - Prints extensive log information 2440 2441 Notes: 2442 A related routine is PetscLogDefaultBegin() (with the options key -log_view), which is 2443 intended for production runs since it logs only flop rates and object 2444 creation (and should not significantly slow the programs). 2445 2446 Level: advanced 2447 2448 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2449 `PetscLogEventDeactivate()` 2450 @*/ 2451 PetscErrorCode PetscLogMPEBegin(void) 2452 { 2453 PetscFunctionBegin; 2454 /* Do MPE initialization */ 2455 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2456 PetscCall(PetscInfo(0,"Initializing MPE.\n")); 2457 PetscCall(MPE_Init_log()); 2458 2459 PetscBeganMPE = PETSC_TRUE; 2460 } else { 2461 PetscCall(PetscInfo(0,"MPE already initialized. Not attempting to reinitialize.\n")); 2462 } 2463 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 /*@C 2468 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2469 2470 Collective over PETSC_COMM_WORLD 2471 2472 Level: advanced 2473 2474 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2475 @*/ 2476 PetscErrorCode PetscLogMPEDump(const char sname[]) 2477 { 2478 char name[PETSC_MAX_PATH_LEN]; 2479 2480 PetscFunctionBegin; 2481 if (PetscBeganMPE) { 2482 PetscCall(PetscInfo(0,"Finalizing MPE.\n")); 2483 if (sname) { 2484 PetscCall(PetscStrcpy(name,sname)); 2485 } else { 2486 PetscCall(PetscGetProgramName(name,sizeof(name))); 2487 } 2488 PetscCall(MPE_Finish_log(name)); 2489 } else { 2490 PetscCall(PetscInfo(0,"Not finalizing MPE (not started by PETSc).\n")); 2491 } 2492 PetscFunctionReturn(0); 2493 } 2494 2495 #define PETSC_RGB_COLORS_MAX 39 2496 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = { 2497 "OliveDrab: ", 2498 "BlueViolet: ", 2499 "CadetBlue: ", 2500 "CornflowerBlue: ", 2501 "DarkGoldenrod: ", 2502 "DarkGreen: ", 2503 "DarkKhaki: ", 2504 "DarkOliveGreen: ", 2505 "DarkOrange: ", 2506 "DarkOrchid: ", 2507 "DarkSeaGreen: ", 2508 "DarkSlateGray: ", 2509 "DarkTurquoise: ", 2510 "DeepPink: ", 2511 "DarkKhaki: ", 2512 "DimGray: ", 2513 "DodgerBlue: ", 2514 "GreenYellow: ", 2515 "HotPink: ", 2516 "IndianRed: ", 2517 "LavenderBlush: ", 2518 "LawnGreen: ", 2519 "LemonChiffon: ", 2520 "LightCoral: ", 2521 "LightCyan: ", 2522 "LightPink: ", 2523 "LightSalmon: ", 2524 "LightSlateGray: ", 2525 "LightYellow: ", 2526 "LimeGreen: ", 2527 "MediumPurple: ", 2528 "MediumSeaGreen: ", 2529 "MediumSlateBlue:", 2530 "MidnightBlue: ", 2531 "MintCream: ", 2532 "MistyRose: ", 2533 "NavajoWhite: ", 2534 "NavyBlue: ", 2535 "OliveDrab: " 2536 }; 2537 2538 /*@C 2539 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with PetscLogEventRegister() 2540 2541 Not collective. Maybe it should be? 2542 2543 Output Parameter: 2544 . str - character string representing the color 2545 2546 Level: developer 2547 2548 .seealso: `PetscLogEventRegister` 2549 @*/ 2550 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2551 { 2552 static int idx = 0; 2553 2554 PetscFunctionBegin; 2555 *str = PetscLogMPERGBColors[idx]; 2556 idx = (idx + 1)% PETSC_RGB_COLORS_MAX; 2557 PetscFunctionReturn(0); 2558 } 2559 2560 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2561