1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 15 /* this is not consistently used and is difficult to keep correct if done manually, possibly it should be removed */ 16 PetscErrorCode PetscLogObjectParent(PetscObject p,PetscObject c) 17 { 18 if (!c || !p) return 0; 19 c->parent = p; 20 c->parentid = p->id; 21 return 0; 22 } 23 24 /*@C 25 PetscLogObjectMemory - Adds to an object a count of additional amount of memory that is used by the object. 26 27 Not collective. 28 29 Input Parameters: 30 + obj - the PETSc object 31 - mem - the amount of memory that is being added to the object 32 33 Level: developer 34 35 Developer Notes: 36 This is not used consistently. It is very difficult to manually track the memory usage per object so this should 37 likely be removed and replaced with an automated system. 38 39 .seealso: `PetscFinalize()`, `PetscInitializeFortran()`, `PetscGetArgs()`, `PetscInitializeNoArguments()` 40 41 @*/ 42 PetscErrorCode PetscLogObjectMemory(PetscObject p,PetscLogDouble m) 43 { 44 if (!p) return 0; 45 p->mem += m; 46 return 0; 47 } 48 49 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 50 51 #if defined(PETSC_USE_LOG) 52 #include <petscmachineinfo.h> 53 #include <petscconfiginfo.h> 54 55 /* used in the MPI_XXX() count macros in petsclog.h */ 56 57 /* Action and object logging variables */ 58 Action *petsc_actions = NULL; 59 Object *petsc_objects = NULL; 60 PetscBool petsc_logActions = PETSC_FALSE; 61 PetscBool petsc_logObjects = PETSC_FALSE; 62 int petsc_numActions = 0, petsc_maxActions = 100; 63 int petsc_numObjects = 0, petsc_maxObjects = 100; 64 int petsc_numObjectsDestroyed = 0; 65 66 /* Global counters */ 67 PetscLogDouble petsc_BaseTime = 0.0; 68 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 69 PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */ 70 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 71 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 72 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 73 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 74 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 75 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 76 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 77 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 78 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 79 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 80 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 81 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 82 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 83 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 84 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 85 #if defined(PETSC_HAVE_DEVICE) 86 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 87 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 88 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 89 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 90 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 91 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 92 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 93 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 94 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 95 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 96 #endif 97 98 /* Logging functions */ 99 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 100 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 101 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 102 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 103 104 /* Tracing event logging variables */ 105 FILE *petsc_tracefile = NULL; 106 int petsc_tracelevel = 0; 107 const char *petsc_traceblanks = " "; 108 char petsc_tracespace[128] = " "; 109 PetscLogDouble petsc_tracetime = 0.0; 110 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 111 112 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 113 { 114 int stage; 115 PetscBool opt; 116 117 PetscFunctionBegin; 118 if (PetscLogInitializeCalled) PetscFunctionReturn(0); 119 PetscLogInitializeCalled = PETSC_TRUE; 120 121 PetscCall(PetscOptionsHasName(NULL,NULL, "-log_exclude_actions", &opt)); 122 if (opt) petsc_logActions = PETSC_FALSE; 123 PetscCall(PetscOptionsHasName(NULL,NULL, "-log_exclude_objects", &opt)); 124 if (opt) petsc_logObjects = PETSC_FALSE; 125 if (petsc_logActions) { 126 PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 127 } 128 if (petsc_logObjects) { 129 PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 130 } 131 PetscLogPHC = PetscLogObjCreateDefault; 132 PetscLogPHD = PetscLogObjDestroyDefault; 133 /* Setup default logging structures */ 134 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 135 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 136 137 /* All processors sync here for more consistent logging */ 138 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 139 PetscTime(&petsc_BaseTime); 140 PetscCall(PetscLogStagePush(stage)); 141 PetscFunctionReturn(0); 142 } 143 144 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 145 { 146 PetscStageLog stageLog; 147 148 PetscFunctionBegin; 149 PetscCall(PetscFree(petsc_actions)); 150 PetscCall(PetscFree(petsc_objects)); 151 PetscCall(PetscLogNestedEnd()); 152 PetscCall(PetscLogSet(NULL, NULL)); 153 154 /* Resetting phase */ 155 PetscCall(PetscLogGetStageLog(&stageLog)); 156 PetscCall(PetscStageLogDestroy(stageLog)); 157 158 petsc_TotalFlops = 0.0; 159 petsc_numActions = 0; 160 petsc_numObjects = 0; 161 petsc_numObjectsDestroyed = 0; 162 petsc_maxActions = 100; 163 petsc_maxObjects = 100; 164 petsc_actions = NULL; 165 petsc_objects = NULL; 166 petsc_logActions = PETSC_FALSE; 167 petsc_logObjects = PETSC_FALSE; 168 petsc_BaseTime = 0.0; 169 petsc_TotalFlops = 0.0; 170 petsc_tmp_flops = 0.0; 171 petsc_send_ct = 0.0; 172 petsc_recv_ct = 0.0; 173 petsc_send_len = 0.0; 174 petsc_recv_len = 0.0; 175 petsc_isend_ct = 0.0; 176 petsc_irecv_ct = 0.0; 177 petsc_isend_len = 0.0; 178 petsc_irecv_len = 0.0; 179 petsc_wait_ct = 0.0; 180 petsc_wait_any_ct = 0.0; 181 petsc_wait_all_ct = 0.0; 182 petsc_sum_of_waits_ct = 0.0; 183 petsc_allreduce_ct = 0.0; 184 petsc_gather_ct = 0.0; 185 petsc_scatter_ct = 0.0; 186 #if defined(PETSC_HAVE_DEVICE) 187 petsc_ctog_ct = 0.0; 188 petsc_gtoc_ct = 0.0; 189 petsc_ctog_sz = 0.0; 190 petsc_gtoc_sz = 0.0; 191 petsc_gflops = 0.0; 192 petsc_gtime = 0.0; 193 #endif 194 PETSC_LARGEST_EVENT = PETSC_EVENT; 195 PetscLogPHC = NULL; 196 PetscLogPHD = NULL; 197 petsc_tracefile = NULL; 198 petsc_tracelevel = 0; 199 petsc_traceblanks = " "; 200 petsc_tracespace[0] = ' '; petsc_tracespace[1] = 0; 201 petsc_tracetime = 0.0; 202 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 203 PETSC_OBJECT_CLASSID = 0; 204 petsc_stageLog = NULL; 205 PetscLogInitializeCalled = PETSC_FALSE; 206 PetscFunctionReturn(0); 207 } 208 209 /*@C 210 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 211 212 Not Collective 213 214 Input Parameters: 215 + b - The function called at beginning of event 216 - e - The function called at end of event 217 218 Level: developer 219 220 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()` 221 @*/ 222 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), 223 PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 224 { 225 PetscFunctionBegin; 226 PetscLogPLB = b; 227 PetscLogPLE = e; 228 PetscFunctionReturn(0); 229 } 230 231 /*@C 232 PetscLogIsActive - Check if logging is currently in progress. 233 234 Not Collective 235 236 Output Parameter: 237 . isActive - PETSC_TRUE if logging is in progress, PETSC_FALSE otherwise 238 239 Level: beginner 240 241 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 242 @*/ 243 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 244 { 245 PetscFunctionBegin; 246 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 247 PetscFunctionReturn(0); 248 } 249 250 /*@C 251 PetscLogDefaultBegin - Turns on logging of objects and events. This logs flop 252 rates and object creation and should not slow programs down too much. 253 This routine may be called more than once. 254 255 Logically Collective over PETSC_COMM_WORLD 256 257 Options Database Keys: 258 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 259 screen (for code configured with --with-log=1 (which is the default)) 260 261 Usage: 262 .vb 263 PetscInitialize(...); 264 PetscLogDefaultBegin(); 265 ... code ... 266 PetscLogView(viewer); or PetscLogDump(); 267 PetscFinalize(); 268 .ve 269 270 Notes: 271 PetscLogView(viewer) or PetscLogDump() actually cause the printing of 272 the logging information. 273 274 Level: advanced 275 276 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 277 @*/ 278 PetscErrorCode PetscLogDefaultBegin(void) 279 { 280 PetscFunctionBegin; 281 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 282 PetscFunctionReturn(0); 283 } 284 285 /*@C 286 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 287 all events. This creates large log files and slows the program down. 288 289 Logically Collective on PETSC_COMM_WORLD 290 291 Options Database Keys: 292 . -log_all - Prints extensive log information 293 294 Usage: 295 .vb 296 PetscInitialize(...); 297 PetscLogAllBegin(); 298 ... code ... 299 PetscLogDump(filename); 300 PetscFinalize(); 301 .ve 302 303 Notes: 304 A related routine is PetscLogDefaultBegin() (with the options key -log), which is 305 intended for production runs since it logs only flop rates and object 306 creation (and shouldn't significantly slow the programs). 307 308 Level: advanced 309 310 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 311 @*/ 312 PetscErrorCode PetscLogAllBegin(void) 313 { 314 PetscFunctionBegin; 315 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 316 PetscFunctionReturn(0); 317 } 318 319 /*@C 320 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 321 begins or ends, the event name is printed. 322 323 Logically Collective on PETSC_COMM_WORLD 324 325 Input Parameter: 326 . file - The file to print trace in (e.g. stdout) 327 328 Options Database Key: 329 . -log_trace [filename] - Activates PetscLogTraceBegin() 330 331 Notes: 332 PetscLogTraceBegin() prints the processor number, the execution time (sec), 333 then "Event begin:" or "Event end:" followed by the event name. 334 335 PetscLogTraceBegin() allows tracing of all PETSc calls, which is useful 336 to determine where a program is hanging without running in the 337 debugger. Can be used in conjunction with the -info option. 338 339 Level: intermediate 340 341 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 342 @*/ 343 PetscErrorCode PetscLogTraceBegin(FILE *file) 344 { 345 PetscFunctionBegin; 346 petsc_tracefile = file; 347 348 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 349 PetscFunctionReturn(0); 350 } 351 352 /*@ 353 PetscLogActions - Determines whether actions are logged for the graphical viewer. 354 355 Not Collective 356 357 Input Parameter: 358 . flag - PETSC_TRUE if actions are to be logged 359 360 Level: intermediate 361 362 Note: Logging of actions continues to consume more memory as the program 363 runs. Long running programs should consider turning this feature off. 364 365 Options Database Keys: 366 . -log_exclude_actions - Turns off actions logging 367 368 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 369 @*/ 370 PetscErrorCode PetscLogActions(PetscBool flag) 371 { 372 PetscFunctionBegin; 373 petsc_logActions = flag; 374 PetscFunctionReturn(0); 375 } 376 377 /*@ 378 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 379 380 Not Collective 381 382 Input Parameter: 383 . flag - PETSC_TRUE if objects are to be logged 384 385 Level: intermediate 386 387 Note: Logging of objects continues to consume more memory as the program 388 runs. Long running programs should consider turning this feature off. 389 390 Options Database Keys: 391 . -log_exclude_objects - Turns off objects logging 392 393 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 394 @*/ 395 PetscErrorCode PetscLogObjects(PetscBool flag) 396 { 397 PetscFunctionBegin; 398 petsc_logObjects = flag; 399 PetscFunctionReturn(0); 400 } 401 402 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 403 /*@C 404 PetscLogStageRegister - Attaches a character string name to a logging stage. 405 406 Not Collective 407 408 Input Parameter: 409 . sname - The name to associate with that stage 410 411 Output Parameter: 412 . stage - The stage number 413 414 Level: intermediate 415 416 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 417 @*/ 418 PetscErrorCode PetscLogStageRegister(const char sname[],PetscLogStage *stage) 419 { 420 PetscStageLog stageLog; 421 PetscLogEvent event; 422 423 PetscFunctionBegin; 424 PetscCall(PetscLogGetStageLog(&stageLog)); 425 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 426 /* Copy events already changed in the main stage, this sucks */ 427 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 428 for (event = 0; event < stageLog->eventLog->numEvents; event++) { 429 PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event],&stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 430 } 431 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 432 PetscFunctionReturn(0); 433 } 434 435 /*@C 436 PetscLogStagePush - This function pushes a stage on the stack. 437 438 Not Collective 439 440 Input Parameter: 441 . stage - The stage on which to log 442 443 Usage: 444 If the option -log_sumary is used to run the program containing the 445 following code, then 2 sets of summary data will be printed during 446 PetscFinalize(). 447 .vb 448 PetscInitialize(int *argc,char ***args,0,0); 449 [stage 0 of code] 450 PetscLogStagePush(1); 451 [stage 1 of code] 452 PetscLogStagePop(); 453 PetscBarrier(...); 454 [more stage 0 of code] 455 PetscFinalize(); 456 .ve 457 458 Notes: 459 Use PetscLogStageRegister() to register a stage. 460 461 Level: intermediate 462 463 .seealso: `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 464 @*/ 465 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 466 { 467 PetscStageLog stageLog; 468 469 PetscFunctionBegin; 470 PetscCall(PetscLogGetStageLog(&stageLog)); 471 PetscCall(PetscStageLogPush(stageLog, stage)); 472 PetscFunctionReturn(0); 473 } 474 475 /*@C 476 PetscLogStagePop - This function pops a stage from the stack. 477 478 Not Collective 479 480 Usage: 481 If the option -log_sumary is used to run the program containing the 482 following code, then 2 sets of summary data will be printed during 483 PetscFinalize(). 484 .vb 485 PetscInitialize(int *argc,char ***args,0,0); 486 [stage 0 of code] 487 PetscLogStagePush(1); 488 [stage 1 of code] 489 PetscLogStagePop(); 490 PetscBarrier(...); 491 [more stage 0 of code] 492 PetscFinalize(); 493 .ve 494 495 Notes: 496 Use PetscLogStageRegister() to register a stage. 497 498 Level: intermediate 499 500 .seealso: `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 501 @*/ 502 PetscErrorCode PetscLogStagePop(void) 503 { 504 PetscStageLog stageLog; 505 506 PetscFunctionBegin; 507 PetscCall(PetscLogGetStageLog(&stageLog)); 508 PetscCall(PetscStageLogPop(stageLog)); 509 PetscFunctionReturn(0); 510 } 511 512 /*@ 513 PetscLogStageSetActive - Determines stage activity for PetscLogEventBegin() and PetscLogEventEnd(). 514 515 Not Collective 516 517 Input Parameters: 518 + stage - The stage 519 - isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE) 520 521 Level: intermediate 522 523 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 524 @*/ 525 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 526 { 527 PetscStageLog stageLog; 528 529 PetscFunctionBegin; 530 PetscCall(PetscLogGetStageLog(&stageLog)); 531 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 532 PetscFunctionReturn(0); 533 } 534 535 /*@ 536 PetscLogStageGetActive - Returns stage activity for PetscLogEventBegin() and PetscLogEventEnd(). 537 538 Not Collective 539 540 Input Parameter: 541 . stage - The stage 542 543 Output Parameter: 544 . isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE) 545 546 Level: intermediate 547 548 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 549 @*/ 550 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 551 { 552 PetscStageLog stageLog; 553 554 PetscFunctionBegin; 555 PetscCall(PetscLogGetStageLog(&stageLog)); 556 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 557 PetscFunctionReturn(0); 558 } 559 560 /*@ 561 PetscLogStageSetVisible - Determines stage visibility in PetscLogView() 562 563 Not Collective 564 565 Input Parameters: 566 + stage - The stage 567 - isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE) 568 569 Level: intermediate 570 571 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 572 @*/ 573 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 574 { 575 PetscStageLog stageLog; 576 577 PetscFunctionBegin; 578 PetscCall(PetscLogGetStageLog(&stageLog)); 579 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 580 PetscFunctionReturn(0); 581 } 582 583 /*@ 584 PetscLogStageGetVisible - Returns stage visibility in PetscLogView() 585 586 Not Collective 587 588 Input Parameter: 589 . stage - The stage 590 591 Output Parameter: 592 . isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE) 593 594 Level: intermediate 595 596 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 597 @*/ 598 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 599 { 600 PetscStageLog stageLog; 601 602 PetscFunctionBegin; 603 PetscCall(PetscLogGetStageLog(&stageLog)); 604 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 605 PetscFunctionReturn(0); 606 } 607 608 /*@C 609 PetscLogStageGetId - Returns the stage id when given the stage name. 610 611 Not Collective 612 613 Input Parameter: 614 . name - The stage name 615 616 Output Parameter: 617 . stage - The stage, , or -1 if no stage with that name exists 618 619 Level: intermediate 620 621 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 622 @*/ 623 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 624 { 625 PetscStageLog stageLog; 626 627 PetscFunctionBegin; 628 PetscCall(PetscLogGetStageLog(&stageLog)); 629 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 630 PetscFunctionReturn(0); 631 } 632 633 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 634 /*@C 635 PetscLogEventRegister - Registers an event name for logging operations in an application code. 636 637 Not Collective 638 639 Input Parameters: 640 + name - The name associated with the event 641 - classid - The classid associated to the class for this event, obtain either with 642 PetscClassIdRegister() or use a predefined one such as KSP_CLASSID, SNES_CLASSID, the predefined ones 643 are only available in C code 644 645 Output Parameter: 646 . event - The event id for use with PetscLogEventBegin() and PetscLogEventEnd(). 647 648 Example of Usage: 649 .vb 650 PetscLogEvent USER_EVENT; 651 PetscClassId classid; 652 PetscLogDouble user_event_flops; 653 PetscClassIdRegister("class name",&classid); 654 PetscLogEventRegister("User event name",classid,&USER_EVENT); 655 PetscLogEventBegin(USER_EVENT,0,0,0,0); 656 [code segment to monitor] 657 PetscLogFlops(user_event_flops); 658 PetscLogEventEnd(USER_EVENT,0,0,0,0); 659 .ve 660 661 Notes: 662 PETSc automatically logs library events if the code has been 663 configured with --with-log (which is the default) and 664 -log_view or -log_all is specified. PetscLogEventRegister() is 665 intended for logging user events to supplement this PETSc 666 information. 667 668 PETSc can gather data for use with the utilities Jumpshot 669 (part of the MPICH distribution). If PETSc has been compiled 670 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 671 MPICH), the user can employ another command line option, -log_mpe, 672 to create a logfile, "mpe.log", which can be visualized 673 Jumpshot. 674 675 The classid is associated with each event so that classes of events 676 can be disabled simultaneously, such as all matrix events. The user 677 can either use an existing classid, such as MAT_CLASSID, or create 678 their own as shown in the example. 679 680 If an existing event with the same name exists, its event handle is 681 returned instead of creating a new event. 682 683 Level: intermediate 684 685 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 686 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 687 @*/ 688 PetscErrorCode PetscLogEventRegister(const char name[],PetscClassId classid,PetscLogEvent *event) 689 { 690 PetscStageLog stageLog; 691 int stage; 692 693 PetscFunctionBegin; 694 *event = PETSC_DECIDE; 695 PetscCall(PetscLogGetStageLog(&stageLog)); 696 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 697 if (*event > 0) PetscFunctionReturn(0); 698 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 699 for (stage = 0; stage < stageLog->numStages; stage++) { 700 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 701 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 702 } 703 PetscFunctionReturn(0); 704 } 705 706 /*@ 707 PetscLogEventSetCollective - Indicates that a particular event is collective. 708 709 Not Collective 710 711 Input Parameters: 712 + event - The event id 713 - collective - Bolean flag indicating whether a particular event is collective 714 715 Note: 716 New events returned from PetscLogEventRegister() are collective by default. 717 718 Level: developer 719 720 .seealso: `PetscLogEventRegister()` 721 @*/ 722 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event,PetscBool collective) 723 { 724 PetscStageLog stageLog; 725 PetscEventRegLog eventRegLog; 726 727 PetscFunctionBegin; 728 PetscCall(PetscLogGetStageLog(&stageLog)); 729 PetscCall(PetscStageLogGetEventRegLog(stageLog,&eventRegLog)); 730 PetscCheck(event >= 0 && event <= eventRegLog->numEvents,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid event id"); 731 eventRegLog->eventInfo[event].collective = collective; 732 PetscFunctionReturn(0); 733 } 734 735 /*@ 736 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 737 738 Not Collective 739 740 Input Parameter: 741 . classid - The object class, for example MAT_CLASSID, SNES_CLASSID, etc. 742 743 Level: developer 744 745 .seealso: `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 746 @*/ 747 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 748 { 749 PetscStageLog stageLog; 750 int stage; 751 752 PetscFunctionBegin; 753 PetscCall(PetscLogGetStageLog(&stageLog)); 754 for (stage = 0; stage < stageLog->numStages; stage++) { 755 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 756 } 757 PetscFunctionReturn(0); 758 } 759 760 /*@ 761 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 762 763 Not Collective 764 765 Input Parameter: 766 . classid - The object class, for example MAT_CLASSID, SNES_CLASSID, etc. 767 768 Level: developer 769 770 .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 771 @*/ 772 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 773 { 774 PetscStageLog stageLog; 775 int stage; 776 777 PetscFunctionBegin; 778 PetscCall(PetscLogGetStageLog(&stageLog)); 779 for (stage = 0; stage < stageLog->numStages; stage++) { 780 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 781 } 782 PetscFunctionReturn(0); 783 } 784 785 /*@ 786 PetscLogEventActivate - Indicates that a particular event should be logged. 787 788 Not Collective 789 790 Input Parameter: 791 . event - The event id 792 793 Usage: 794 .vb 795 PetscLogEventDeactivate(VEC_SetValues); 796 [code where you do not want to log VecSetValues()] 797 PetscLogEventActivate(VEC_SetValues); 798 [code where you do want to log VecSetValues()] 799 .ve 800 801 Note: 802 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 803 or an event number obtained with PetscLogEventRegister(). 804 805 Level: advanced 806 807 .seealso: `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 808 @*/ 809 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 810 { 811 PetscStageLog stageLog; 812 int stage; 813 814 PetscFunctionBegin; 815 PetscCall(PetscLogGetStageLog(&stageLog)); 816 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 817 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 818 PetscFunctionReturn(0); 819 } 820 821 /*@ 822 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 823 824 Not Collective 825 826 Input Parameter: 827 . event - The event id 828 829 Usage: 830 .vb 831 PetscLogEventDeactivate(VEC_SetValues); 832 [code where you do not want to log VecSetValues()] 833 PetscLogEventActivate(VEC_SetValues); 834 [code where you do want to log VecSetValues()] 835 .ve 836 837 Note: 838 The event may be either a pre-defined PETSc event (found in 839 include/petsclog.h) or an event number obtained with PetscLogEventRegister()). 840 841 Level: advanced 842 843 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 844 @*/ 845 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 846 { 847 PetscStageLog stageLog; 848 int stage; 849 850 PetscFunctionBegin; 851 PetscCall(PetscLogGetStageLog(&stageLog)); 852 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 853 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 854 PetscFunctionReturn(0); 855 } 856 857 /*@ 858 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged. 859 860 Not Collective 861 862 Input Parameter: 863 . event - The event id 864 865 Usage: 866 .vb 867 PetscLogEventDeactivatePush(VEC_SetValues); 868 [code where you do not want to log VecSetValues()] 869 PetscLogEventDeactivatePop(VEC_SetValues); 870 [code where you do want to log VecSetValues()] 871 .ve 872 873 Note: 874 The event may be either a pre-defined PETSc event (found in 875 include/petsclog.h) or an event number obtained with PetscLogEventRegister()). 876 877 Level: advanced 878 879 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()` 880 @*/ 881 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 882 { 883 PetscStageLog stageLog; 884 int stage; 885 886 PetscFunctionBegin; 887 PetscCall(PetscLogGetStageLog(&stageLog)); 888 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 889 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 890 PetscFunctionReturn(0); 891 } 892 893 /*@ 894 PetscLogEventDeactivatePop - Indicates that a particular event should be logged. 895 896 Not Collective 897 898 Input Parameter: 899 . event - The event id 900 901 Usage: 902 .vb 903 PetscLogEventDeactivatePush(VEC_SetValues); 904 [code where you do not want to log VecSetValues()] 905 PetscLogEventDeactivatePop(VEC_SetValues); 906 [code where you do want to log VecSetValues()] 907 .ve 908 909 Note: 910 The event may be either a pre-defined PETSc event (found in 911 include/petsclog.h) or an event number obtained with PetscLogEventRegister()). 912 913 Level: advanced 914 915 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 916 @*/ 917 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 918 { 919 PetscStageLog stageLog; 920 int stage; 921 922 PetscFunctionBegin; 923 PetscCall(PetscLogGetStageLog(&stageLog)); 924 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 925 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 926 PetscFunctionReturn(0); 927 } 928 929 /*@ 930 PetscLogEventSetActiveAll - Sets the event activity in every stage. 931 932 Not Collective 933 934 Input Parameters: 935 + event - The event id 936 - isActive - The activity flag determining whether the event is logged 937 938 Level: advanced 939 940 .seealso: `PlogEventActivate()`, `PlogEventDeactivate()` 941 @*/ 942 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 943 { 944 PetscStageLog stageLog; 945 int stage; 946 947 PetscFunctionBegin; 948 PetscCall(PetscLogGetStageLog(&stageLog)); 949 for (stage = 0; stage < stageLog->numStages; stage++) { 950 if (isActive) { 951 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 952 } else { 953 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 954 } 955 } 956 PetscFunctionReturn(0); 957 } 958 959 /*@ 960 PetscLogEventActivateClass - Activates event logging for a PETSc object class. 961 962 Not Collective 963 964 Input Parameter: 965 . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc. 966 967 Level: developer 968 969 .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 970 @*/ 971 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 972 { 973 PetscStageLog stageLog; 974 int stage; 975 976 PetscFunctionBegin; 977 PetscCall(PetscLogGetStageLog(&stageLog)); 978 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 979 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 980 PetscFunctionReturn(0); 981 } 982 983 /*@ 984 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class. 985 986 Not Collective 987 988 Input Parameter: 989 . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc. 990 991 Level: developer 992 993 .seealso: `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 994 @*/ 995 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 996 { 997 PetscStageLog stageLog; 998 int stage; 999 1000 PetscFunctionBegin; 1001 PetscCall(PetscLogGetStageLog(&stageLog)); 1002 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1003 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 /*MC 1008 PetscLogEventSync - Synchronizes the beginning of a user event. 1009 1010 Synopsis: 1011 #include <petsclog.h> 1012 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 1013 1014 Collective 1015 1016 Input Parameters: 1017 + e - integer associated with the event obtained from PetscLogEventRegister() 1018 - comm - an MPI communicator 1019 1020 Usage: 1021 .vb 1022 PetscLogEvent USER_EVENT; 1023 PetscLogEventRegister("User event",0,&USER_EVENT); 1024 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1025 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1026 [code segment to monitor] 1027 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1028 .ve 1029 1030 Notes: 1031 This routine should be called only if there is not a 1032 PetscObject available to pass to PetscLogEventBegin(). 1033 1034 Level: developer 1035 1036 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1037 1038 M*/ 1039 1040 /*MC 1041 PetscLogEventBegin - Logs the beginning of a user event. 1042 1043 Synopsis: 1044 #include <petsclog.h> 1045 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1046 1047 Not Collective 1048 1049 Input Parameters: 1050 + e - integer associated with the event obtained from PetscLogEventRegister() 1051 - o1,o2,o3,o4 - objects associated with the event, or 0 1052 1053 Fortran Synopsis: 1054 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1055 1056 Usage: 1057 .vb 1058 PetscLogEvent USER_EVENT; 1059 PetscLogDouble user_event_flops; 1060 PetscLogEventRegister("User event",0,&USER_EVENT); 1061 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1062 [code segment to monitor] 1063 PetscLogFlops(user_event_flops); 1064 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1065 .ve 1066 1067 Notes: 1068 You need to register each integer event with the command 1069 PetscLogEventRegister(). 1070 1071 Developer Notes: 1072 PetscLogEventBegin() and PetscLogEventBegin() return error codes instead of explicitly handling the 1073 errors that occur in the macro directly because other packages that use this macros have used them in their 1074 own functions or methods that do not return error codes and it would be disruptive to change the current 1075 behavior. 1076 1077 Level: intermediate 1078 1079 .seealso: `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1080 1081 M*/ 1082 1083 /*MC 1084 PetscLogEventEnd - Log the end of a user event. 1085 1086 Synopsis: 1087 #include <petsclog.h> 1088 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1089 1090 Not Collective 1091 1092 Input Parameters: 1093 + e - integer associated with the event obtained with PetscLogEventRegister() 1094 - o1,o2,o3,o4 - objects associated with the event, or 0 1095 1096 Fortran Synopsis: 1097 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1098 1099 Usage: 1100 .vb 1101 PetscLogEvent USER_EVENT; 1102 PetscLogDouble user_event_flops; 1103 PetscLogEventRegister("User event",0,&USER_EVENT,); 1104 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1105 [code segment to monitor] 1106 PetscLogFlops(user_event_flops); 1107 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1108 .ve 1109 1110 Notes: 1111 You should also register each additional integer event with the command 1112 PetscLogEventRegister(). 1113 1114 Level: intermediate 1115 1116 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1117 1118 M*/ 1119 1120 /*@C 1121 PetscLogEventGetId - Returns the event id when given the event name. 1122 1123 Not Collective 1124 1125 Input Parameter: 1126 . name - The event name 1127 1128 Output Parameter: 1129 . event - The event, or -1 if no event with that name exists 1130 1131 Level: intermediate 1132 1133 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1134 @*/ 1135 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1136 { 1137 PetscStageLog stageLog; 1138 1139 PetscFunctionBegin; 1140 PetscCall(PetscLogGetStageLog(&stageLog)); 1141 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1146 /*@C 1147 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1148 be read by bin/petscview. This program no longer exists. 1149 1150 Collective on PETSC_COMM_WORLD 1151 1152 Input Parameter: 1153 . name - an optional file name 1154 1155 Usage: 1156 .vb 1157 PetscInitialize(...); 1158 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1159 ... code ... 1160 PetscLogDump(filename); 1161 PetscFinalize(); 1162 .ve 1163 1164 Notes: 1165 The default file name is 1166 $ Log.<rank> 1167 where <rank> is the processor number. If no name is specified, 1168 this file will be used. 1169 1170 Level: advanced 1171 1172 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1173 @*/ 1174 PetscErrorCode PetscLogDump(const char sname[]) 1175 { 1176 PetscStageLog stageLog; 1177 PetscEventPerfInfo *eventInfo; 1178 FILE *fd; 1179 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1180 PetscLogDouble flops, _TotalTime; 1181 PetscMPIInt rank; 1182 int action, object, curStage; 1183 PetscLogEvent event; 1184 1185 PetscFunctionBegin; 1186 /* Calculate the total elapsed time */ 1187 PetscTime(&_TotalTime); 1188 _TotalTime -= petsc_BaseTime; 1189 /* Open log file */ 1190 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1191 if (sname && sname[0]) sprintf(file, "%s.%d", sname, rank); 1192 else sprintf(file, "Log.%d", rank); 1193 PetscCall(PetscFixFilename(file, fname)); 1194 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1195 PetscCheck(!(rank == 0) || !(!fd),PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1196 /* Output totals */ 1197 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1198 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1199 /* Output actions */ 1200 if (petsc_logActions) { 1201 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1202 for (action = 0; action < petsc_numActions; action++) { 1203 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", 1204 petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1205 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1206 } 1207 } 1208 /* Output objects */ 1209 if (petsc_logObjects) { 1210 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1211 for (object = 0; object < petsc_numObjects; object++) { 1212 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int) petsc_objects[object].mem)); 1213 if (!petsc_objects[object].name[0]) { 1214 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd,"No Name\n")); 1215 } else { 1216 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1217 } 1218 if (petsc_objects[object].info[0] != 0) { 1219 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1220 } else { 1221 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1222 } 1223 } 1224 } 1225 /* Output events */ 1226 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1227 PetscCall(PetscLogGetStageLog(&stageLog)); 1228 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1229 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1230 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1231 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops/eventInfo[event].time; 1232 else flops = 0.0; 1233 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count,eventInfo[event].flops, eventInfo[event].time, flops)); 1234 } 1235 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 /* 1240 PetscLogView_Detailed - Each process prints the times for its own events 1241 1242 */ 1243 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1244 { 1245 PetscStageLog stageLog; 1246 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1247 PetscLogDouble locTotalTime, numRed, maxMem; 1248 int numStages,numEvents,stage,event; 1249 MPI_Comm comm = PetscObjectComm((PetscObject) viewer); 1250 PetscMPIInt rank,size; 1251 1252 PetscFunctionBegin; 1253 PetscCallMPI(MPI_Comm_size(comm, &size)); 1254 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1255 /* Must preserve reduction count before we go on */ 1256 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1257 /* Get the total elapsed time */ 1258 PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime; 1259 PetscCall(PetscViewerASCIIPrintf(viewer,"size = %d\n",size)); 1260 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalTimes = {}\n")); 1261 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalMessages = {}\n")); 1262 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalMessageLens = {}\n")); 1263 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalReductions = {}\n")); 1264 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalFlop = {}\n")); 1265 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalObjects = {}\n")); 1266 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalMemory = {}\n")); 1267 PetscCall(PetscLogGetStageLog(&stageLog)); 1268 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1269 PetscCall(PetscViewerASCIIPrintf(viewer,"Stages = {}\n")); 1270 for (stage=0; stage<numStages; stage++) { 1271 PetscCall(PetscViewerASCIIPrintf(viewer,"Stages[\"%s\"] = {}\n",stageLog->stageInfo[stage].name)); 1272 PetscCall(PetscViewerASCIIPrintf(viewer,"Stages[\"%s\"][\"summary\"] = {}\n",stageLog->stageInfo[stage].name)); 1273 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1274 for (event = 0; event < numEvents; event++) { 1275 PetscCall(PetscViewerASCIIPrintf(viewer,"Stages[\"%s\"][\"%s\"] = {}\n",stageLog->stageInfo[stage].name,stageLog->eventLog->eventInfo[event].name)); 1276 } 1277 } 1278 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1279 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1280 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalTimes[%d] = %g\n",rank,locTotalTime)); 1281 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalMessages[%d] = %g\n",rank,(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1282 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalMessageLens[%d] = %g\n",rank,(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalReductions[%d] = %g\n",rank,numRed)); 1284 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalFlop[%d] = %g\n",rank,petsc_TotalFlops)); 1285 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalObjects[%d] = %d\n",rank,petsc_numObjects)); 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalMemory[%d] = %g\n",rank,maxMem)); 1287 PetscCall(PetscViewerFlush(viewer)); 1288 for (stage=0; stage<numStages; stage++) { 1289 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", 1291 stageLog->stageInfo[stage].name,rank, 1292 stageInfo->time,stageInfo->numMessages,stageInfo->messageLength,stageInfo->numReductions,stageInfo->flops)); 1293 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1294 for (event = 0; event < numEvents; event++) { 1295 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1296 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1297 stageLog->stageInfo[stage].name,stageLog->eventLog->eventInfo[event].name,rank, 1298 eventInfo->count,eventInfo->time,eventInfo->syncTime,eventInfo->numMessages,eventInfo->messageLength,eventInfo->numReductions,eventInfo->flops)); 1299 if (eventInfo->dof[0] >= 0.) { 1300 PetscInt d, e; 1301 1302 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1303 for (d = 0; d < 8; ++d) { 1304 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1305 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1306 } 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1308 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1309 for (e = 0; e < 8; ++e) { 1310 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1311 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1312 } 1313 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1314 } 1315 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"}\n")); 1316 } 1317 } 1318 PetscCall(PetscViewerFlush(viewer)); 1319 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 /* 1324 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1325 */ 1326 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1327 { 1328 PetscStageLog stageLog; 1329 PetscEventPerfInfo *eventInfo = NULL; 1330 PetscLogDouble locTotalTime, maxMem; 1331 int numStages,numEvents,stage,event; 1332 MPI_Comm comm = PetscObjectComm((PetscObject) viewer); 1333 PetscMPIInt rank,size; 1334 1335 PetscFunctionBegin; 1336 PetscCallMPI(MPI_Comm_size(comm, &size)); 1337 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1338 /* Must preserve reduction count before we go on */ 1339 /* Get the total elapsed time */ 1340 PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime; 1341 PetscCall(PetscLogGetStageLog(&stageLog)); 1342 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1343 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1344 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1345 PetscCall(PetscViewerASCIIPrintf(viewer,"Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1346 PetscCall(PetscViewerFlush(viewer)); 1347 for (stage=0; stage<numStages; stage++) { 1348 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1349 1350 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"%s,summary,%d,1,%g,%g,%g,%g,%g\n", 1351 stageLog->stageInfo[stage].name,rank,stageInfo->time,stageInfo->numMessages,stageInfo->messageLength,stageInfo->numReductions,stageInfo->flops)); 1352 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1353 for (event = 0; event < numEvents; event++) { 1354 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1355 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"%s,%s,%d,%d,%g,%g,%g,%g,%g",stageLog->stageInfo[stage].name, 1356 stageLog->eventLog->eventInfo[event].name,rank,eventInfo->count,eventInfo->time,eventInfo->numMessages, 1357 eventInfo->messageLength,eventInfo->numReductions,eventInfo->flops)); 1358 if (eventInfo->dof[0] >= 0.) { 1359 PetscInt d, e; 1360 1361 for (d = 0; d < 8; ++d) { 1362 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1363 } 1364 for (e = 0; e < 8; ++e) { 1365 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1366 } 1367 } 1368 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"\n")); 1369 } 1370 } 1371 PetscCall(PetscViewerFlush(viewer)); 1372 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm,FILE *fd) 1377 { 1378 PetscFunctionBegin; 1379 if (!PetscLogSyncOn) PetscFunctionReturn(0); 1380 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1381 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1382 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1383 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1384 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1385 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1386 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1387 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1388 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1389 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1390 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1391 PetscFunctionReturn(0); 1392 } 1393 1394 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm,FILE *fd) 1395 { 1396 PetscFunctionBegin; 1397 if (PetscDefined(USE_DEBUG)) { 1398 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1399 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1400 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1401 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1402 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1403 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1404 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1405 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1406 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1407 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1408 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm,FILE *fd) 1414 { 1415 #if defined(PETSC_HAVE_DEVICE) 1416 PetscMPIInt size; 1417 1418 PetscFunctionBegin; 1419 PetscCallMPI(MPI_Comm_size(comm, &size)); 1420 if (use_gpu_aware_mpi || size == 1) PetscFunctionReturn(0); 1421 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1422 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1423 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1424 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1425 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1426 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1427 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1428 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1429 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1430 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1431 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1432 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1433 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1434 PetscFunctionReturn(0); 1435 #else 1436 return 0; 1437 #endif 1438 } 1439 1440 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm,FILE *fd) 1441 { 1442 #if defined(PETSC_HAVE_DEVICE) 1443 1444 PetscFunctionBegin; 1445 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(0); 1446 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1447 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1448 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1449 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1450 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1451 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1452 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1453 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1454 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1455 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1456 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1457 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1458 PetscFunctionReturn(0); 1459 #else 1460 return 0; 1461 #endif 1462 } 1463 1464 PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1465 { 1466 FILE *fd; 1467 PetscLogDouble zero = 0.0; 1468 PetscStageLog stageLog; 1469 PetscStageInfo *stageInfo = NULL; 1470 PetscEventPerfInfo *eventInfo = NULL; 1471 PetscClassPerfInfo *classInfo; 1472 char arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128]; 1473 const char *name; 1474 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1475 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1476 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1477 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1478 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1479 PetscLogDouble min, max, tot, ratio, avg, x, y; 1480 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1481 #if defined(PETSC_HAVE_DEVICE) 1482 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1483 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1484 #endif 1485 PetscMPIInt minC, maxC; 1486 PetscMPIInt size, rank; 1487 PetscBool *localStageUsed, *stageUsed; 1488 PetscBool *localStageVisible, *stageVisible; 1489 int numStages, localNumEvents, numEvents; 1490 int stage, oclass; 1491 PetscLogEvent event; 1492 PetscErrorCode ierr = 0; 1493 char version[256]; 1494 MPI_Comm comm; 1495 #if defined(PETSC_HAVE_DEVICE) 1496 PetscLogEvent eventid; 1497 PetscInt64 nas = 0x7FF0000000000002; 1498 #endif 1499 1500 PetscFunctionBegin; 1501 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1502 PetscCall(PetscObjectGetComm((PetscObject)viewer,&comm)); 1503 PetscCall(PetscViewerASCIIGetPointer(viewer,&fd)); 1504 PetscCallMPI(MPI_Comm_size(comm, &size)); 1505 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1506 /* Get the total elapsed time */ 1507 PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime; 1508 1509 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1510 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1511 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1512 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1513 PetscCall(PetscLogViewWarnSync(comm,fd)); 1514 PetscCall(PetscLogViewWarnDebugging(comm,fd)); 1515 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm,fd)); 1516 PetscCall(PetscLogViewWarnGpuTime(comm,fd)); 1517 PetscCall(PetscGetArchType(arch,sizeof(arch))); 1518 PetscCall(PetscGetHostName(hostname,sizeof(hostname))); 1519 PetscCall(PetscGetUserName(username,sizeof(username))); 1520 PetscCall(PetscGetProgramName(pname,sizeof(pname))); 1521 PetscCall(PetscGetDate(date,sizeof(date))); 1522 PetscCall(PetscGetVersion(version,sizeof(version))); 1523 if (size == 1) { 1524 PetscCall(PetscFPrintf(comm,fd,"%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1525 } else { 1526 PetscCall(PetscFPrintf(comm,fd,"%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1527 } 1528 #if defined(PETSC_HAVE_OPENMP) 1529 PetscCall(PetscFPrintf(comm,fd,"Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1530 #endif 1531 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1532 1533 /* Must preserve reduction count before we go on */ 1534 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1535 1536 /* Calculate summary information */ 1537 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1538 /* Time */ 1539 PetscCallMPI(MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1540 PetscCallMPI(MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1541 PetscCallMPI(MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1542 avg = tot/((PetscLogDouble) size); 1543 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1544 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1545 TotalTime = tot; 1546 /* Objects */ 1547 avg = (PetscLogDouble) petsc_numObjects; 1548 PetscCallMPI(MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1549 PetscCallMPI(MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1550 PetscCallMPI(MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1551 avg = tot/((PetscLogDouble) size); 1552 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1553 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1554 /* Flops */ 1555 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1556 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1557 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1558 avg = tot/((PetscLogDouble) size); 1559 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1560 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1561 TotalFlops = tot; 1562 /* Flops/sec -- Must talk to Barry here */ 1563 if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime; else flops = 0.0; 1564 PetscCallMPI(MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1565 PetscCallMPI(MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1566 PetscCallMPI(MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1567 avg = tot/((PetscLogDouble) size); 1568 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1569 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1570 /* Memory */ 1571 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1572 if (mem > 0.0) { 1573 PetscCallMPI(MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1574 PetscCallMPI(MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1575 PetscCallMPI(MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1576 avg = tot/((PetscLogDouble) size); 1577 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1578 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1579 } 1580 /* Messages */ 1581 mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1582 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1583 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1584 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1585 avg = tot/((PetscLogDouble) size); 1586 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1587 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1588 numMessages = tot; 1589 /* Message Lengths */ 1590 mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1591 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1592 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1593 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1594 if (numMessages != 0) avg = tot/numMessages; else avg = 0.0; 1595 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1596 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1597 messageLength = tot; 1598 /* Reductions */ 1599 PetscCallMPI(MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1600 PetscCallMPI(MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1601 PetscCallMPI(MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1602 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1603 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1604 numReductions = red; /* wrong because uses count from process zero */ 1605 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1606 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1607 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1608 1609 /* Get total number of stages -- 1610 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1611 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1612 This seems best accomplished by assoicating a communicator with each stage. 1613 */ 1614 PetscCall(PetscLogGetStageLog(&stageLog)); 1615 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1616 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1617 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1618 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1619 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1620 if (numStages > 0) { 1621 stageInfo = stageLog->stageInfo; 1622 for (stage = 0; stage < numStages; stage++) { 1623 if (stage < stageLog->numStages) { 1624 localStageUsed[stage] = stageInfo[stage].used; 1625 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1626 } else { 1627 localStageUsed[stage] = PETSC_FALSE; 1628 localStageVisible[stage] = PETSC_TRUE; 1629 } 1630 } 1631 PetscCallMPI(MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1632 PetscCallMPI(MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1633 for (stage = 0; stage < numStages; stage++) { 1634 if (stageUsed[stage]) { 1635 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1636 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1637 break; 1638 } 1639 } 1640 for (stage = 0; stage < numStages; stage++) { 1641 if (!stageUsed[stage]) continue; 1642 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1643 if (localStageUsed[stage]) { 1644 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1645 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1646 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1647 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1648 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1649 name = stageInfo[stage].name; 1650 } else { 1651 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1652 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1653 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1654 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1655 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1656 name = ""; 1657 } 1658 mess *= 0.5; messLen *= 0.5; red /= size; 1659 if (TotalTime != 0.0) fracTime = stageTime/TotalTime; else fracTime = 0.0; 1660 if (TotalFlops != 0.0) fracFlops = flops/TotalFlops; else fracFlops = 0.0; 1661 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1662 if (numMessages != 0.0) fracMessages = mess/numMessages; else fracMessages = 0.0; 1663 if (mess != 0.0) avgMessLen = messLen/mess; else avgMessLen = 0.0; 1664 if (messageLength != 0.0) fracLength = messLen/messageLength; else fracLength = 0.0; 1665 if (numReductions != 0.0) fracReductions = red/numReductions; else fracReductions = 0.0; 1666 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", 1667 stage, name, stageTime/size, 100.0*fracTime, flops, 100.0*fracFlops, 1668 mess, 100.0*fracMessages, avgMessLen, 100.0*fracLength, red, 100.0*fracReductions)); 1669 } 1670 } 1671 1672 PetscCall(PetscFPrintf(comm, fd,"\n------------------------------------------------------------------------------------------------------------------------\n")); 1673 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1674 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1675 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1676 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1677 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1678 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1679 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1680 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1681 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1682 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1683 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1684 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1685 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1686 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1687 if (PetscLogMemory) { 1688 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1689 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1690 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1691 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1692 } 1693 #if defined(PETSC_HAVE_DEVICE) 1694 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1695 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1696 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1697 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1698 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1699 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1700 #endif 1701 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1702 1703 PetscCall(PetscLogViewWarnDebugging(comm,fd)); 1704 1705 /* Report events */ 1706 PetscCall(PetscFPrintf(comm, fd,"Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1707 if (PetscLogMemory) { 1708 PetscCall(PetscFPrintf(comm, fd," Malloc EMalloc MMalloc RMI")); 1709 } 1710 #if defined(PETSC_HAVE_DEVICE) 1711 PetscCall(PetscFPrintf(comm, fd," GPU - CpuToGpu - - GpuToCpu - GPU")); 1712 #endif 1713 PetscCall(PetscFPrintf(comm, fd,"\n")); 1714 PetscCall(PetscFPrintf(comm, fd," Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1715 if (PetscLogMemory) { 1716 PetscCall(PetscFPrintf(comm, fd," Mbytes Mbytes Mbytes Mbytes")); 1717 } 1718 #if defined(PETSC_HAVE_DEVICE) 1719 PetscCall(PetscFPrintf(comm, fd," Mflop/s Count Size Count Size %%F")); 1720 #endif 1721 PetscCall(PetscFPrintf(comm, fd,"\n")); 1722 PetscCall(PetscFPrintf(comm, fd,"------------------------------------------------------------------------------------------------------------------------")); 1723 if (PetscLogMemory) { 1724 PetscCall(PetscFPrintf(comm, fd,"-----------------------------")); 1725 } 1726 #if defined(PETSC_HAVE_DEVICE) 1727 PetscCall(PetscFPrintf(comm, fd,"---------------------------------------")); 1728 #endif 1729 PetscCall(PetscFPrintf(comm, fd,"\n")); 1730 1731 #if defined(PETSC_HAVE_DEVICE) 1732 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1733 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1734 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1735 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1736 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1737 #endif 1738 1739 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1740 for (stage = 0; stage < numStages; stage++) { 1741 if (!stageVisible[stage]) continue; 1742 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1743 if (localStageUsed[stage]) { 1744 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1745 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1746 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1747 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1748 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1749 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1750 } else { 1751 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1752 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1753 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1754 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1755 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1756 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1757 } 1758 mess *= 0.5; messLen *= 0.5; red /= size; 1759 1760 /* Get total number of events in this stage -- 1761 Currently, a single processor can register more events than another, but events must all be registered in order, 1762 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1763 on the event ID. This seems best accomplished by associating a communicator with each stage. 1764 1765 Problem: If the event did not happen on proc 1, its name will not be available. 1766 Problem: Event visibility is not implemented 1767 */ 1768 if (localStageUsed[stage]) { 1769 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1770 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1771 } else localNumEvents = 0; 1772 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1773 for (event = 0; event < numEvents; event++) { 1774 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1775 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1776 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; else flopr = 0.0; 1777 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1778 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1779 PetscCallMPI(MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1780 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1781 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1782 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1783 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1784 PetscCallMPI(MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1785 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1786 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1787 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1788 if (PetscLogMemory) { 1789 PetscCallMPI(MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1790 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1791 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1792 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1793 } 1794 #if defined(PETSC_HAVE_DEVICE) 1795 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1796 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1797 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1798 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1799 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1800 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt ,1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1801 #endif 1802 name = stageLog->eventLog->eventInfo[event].name; 1803 } else { 1804 flopr = 0.0; 1805 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1806 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1807 PetscCallMPI(MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1808 PetscCallMPI(MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1809 PetscCallMPI(MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1810 PetscCallMPI(MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1811 PetscCallMPI(MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1812 PetscCallMPI(MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1813 PetscCallMPI(MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1814 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1815 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1816 if (PetscLogMemory) { 1817 PetscCallMPI(MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1818 PetscCallMPI(MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1819 PetscCallMPI(MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1820 PetscCallMPI(MPI_Allreduce(&zero, &emalmax,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1821 } 1822 #if defined(PETSC_HAVE_DEVICE) 1823 PetscCallMPI(MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1824 PetscCallMPI(MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1825 PetscCallMPI(MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1826 PetscCallMPI(MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1827 PetscCallMPI(MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1828 PetscCallMPI(MPI_Allreduce(&zero, &gmaxt , 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1829 #endif 1830 name = ""; 1831 } 1832 if (mint < 0.0) { 1833 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n",mint,name)); 1834 mint = 0; 1835 } 1836 PetscCheck(minf >= 0.0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Minimum flop %g over all processors for %s is negative! Not possible!",minf,name); 1837 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1838 #if defined(PETSC_HAVE_DEVICE) 1839 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1840 memcpy(&gmaxt,&nas,sizeof(PetscLogDouble)); 1841 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 1842 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 1843 memcpy(&mint,&nas,sizeof(PetscLogDouble)); 1844 memcpy(&maxt,&nas,sizeof(PetscLogDouble)); 1845 } 1846 } 1847 #endif 1848 totm *= 0.5; totml *= 0.5; totr /= size; 1849 1850 if (maxC != 0) { 1851 if (minC != 0) ratC = ((PetscLogDouble)maxC)/minC;else ratC = 0.0; 1852 if (mint != 0.0) ratt = maxt/mint; else ratt = 0.0; 1853 if (minf != 0.0) ratf = maxf/minf; else ratf = 0.0; 1854 if (TotalTime != 0.0) fracTime = tott/TotalTime; else fracTime = 0.0; 1855 if (TotalFlops != 0.0) fracFlops = totf/TotalFlops; else fracFlops = 0.0; 1856 if (stageTime != 0.0) fracStageTime = tott/stageTime; else fracStageTime = 0.0; 1857 if (flops != 0.0) fracStageFlops = totf/flops; else fracStageFlops = 0.0; 1858 if (numMessages != 0.0) fracMess = totm/numMessages; else fracMess = 0.0; 1859 if (messageLength != 0.0) fracMessLen = totml/messageLength; else fracMessLen = 0.0; 1860 if (numReductions != 0.0) fracRed = totr/numReductions; else fracRed = 0.0; 1861 if (mess != 0.0) fracStageMess = totm/mess; else fracStageMess = 0.0; 1862 if (messLen != 0.0) fracStageMessLen = totml/messLen; else fracStageMessLen = 0.0; 1863 if (red != 0.0) fracStageRed = totr/red; else fracStageRed = 0.0; 1864 if (totm != 0.0) totml /= totm; else totml = 0.0; 1865 if (maxt != 0.0) flopr = totf/maxt; else flopr = 0.0; 1866 if (fracStageTime > 1.00) PetscCall(PetscFPrintf(comm, fd,"Warning -- total time of event greater than time of entire stage -- something is wrong with the timer\n")); 1867 PetscCall(PetscFPrintf(comm, fd, 1868 "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f %3.0f%3.0f%3.0f%3.0f%3.0f %5.0f", 1869 name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 1870 100.0*fracTime, 100.0*fracFlops, 100.0*fracMess, 100.0*fracMessLen, 100.0*fracRed, 1871 100.0*fracStageTime, 100.0*fracStageFlops, 100.0*fracStageMess, 100.0*fracStageMessLen, 100.0*fracStageRed, 1872 PetscAbs(flopr)/1.0e6)); 1873 if (PetscLogMemory) { 1874 PetscCall(PetscFPrintf(comm, fd," %5.0f %5.0f %5.0f %5.0f",mal/1.0e6,emalmax/1.0e6,malmax/1.0e6,mem/1.0e6)); 1875 } 1876 #if defined(PETSC_HAVE_DEVICE) 1877 if (totf != 0.0) fracgflops = gflops/totf; else fracgflops = 0.0; 1878 if (gmaxt != 0.0) gflopr = gflops/gmaxt; else gflopr = 0.0; 1879 PetscCall(PetscFPrintf(comm, fd," %5.0f %4.0f %3.2e %4.0f %3.2e% 3.0f",PetscAbs(gflopr)/1.0e6,cct/size,csz/(1.0e6*size),gct/size,gsz/(1.0e6*size),100.0*fracgflops)); 1880 #endif 1881 PetscCall(PetscFPrintf(comm, fd,"\n")); 1882 } 1883 } 1884 } 1885 1886 /* Memory usage and object creation */ 1887 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1888 if (PetscLogMemory) { 1889 PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1890 } 1891 #if defined(PETSC_HAVE_DEVICE) 1892 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1893 #endif 1894 PetscCall(PetscFPrintf(comm, fd, "\n")); 1895 PetscCall(PetscFPrintf(comm, fd, "\n")); 1896 1897 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1898 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1899 stats for stages local to processor sets. 1900 */ 1901 /* We should figure out the longest object name here (now 20 characters) */ 1902 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1903 for (stage = 0; stage < numStages; stage++) { 1904 if (localStageUsed[stage]) { 1905 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 1906 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1907 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 1908 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 1909 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name,classInfo[oclass].creations, classInfo[oclass].destructions)); 1910 } 1911 } 1912 } else { 1913 if (!localStageVisible[stage]) continue; 1914 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1915 } 1916 } 1917 1918 PetscCall(PetscFree(localStageUsed)); 1919 PetscCall(PetscFree(stageUsed)); 1920 PetscCall(PetscFree(localStageVisible)); 1921 PetscCall(PetscFree(stageVisible)); 1922 1923 /* Information unrelated to this particular run */ 1924 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1925 PetscTime(&y); 1926 PetscTime(&x); 1927 PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); 1928 PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); 1929 PetscCall(PetscFPrintf(comm,fd,"Average time to get PetscTime(): %g\n", (y-x)/10.0)); 1930 /* MPI information */ 1931 if (size > 1) { 1932 MPI_Status status; 1933 PetscMPIInt tag; 1934 MPI_Comm newcomm; 1935 1936 PetscCallMPI(MPI_Barrier(comm)); 1937 PetscTime(&x); 1938 PetscCallMPI(MPI_Barrier(comm)); 1939 PetscCallMPI(MPI_Barrier(comm)); 1940 PetscCallMPI(MPI_Barrier(comm)); 1941 PetscCallMPI(MPI_Barrier(comm)); 1942 PetscCallMPI(MPI_Barrier(comm)); 1943 PetscTime(&y); 1944 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y-x)/5.0)); 1945 PetscCall(PetscCommDuplicate(comm,&newcomm, &tag)); 1946 PetscCallMPI(MPI_Barrier(comm)); 1947 if (rank) { 1948 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank-1, tag, newcomm, &status)); 1949 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank+1)%size, tag, newcomm)); 1950 } else { 1951 PetscTime(&x); 1952 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 1953 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size-1, tag, newcomm, &status)); 1954 PetscTime(&y); 1955 PetscCall(PetscFPrintf(comm,fd,"Average time for zero size MPI_Send(): %g\n", (y-x)/size)); 1956 } 1957 PetscCall(PetscCommDestroy(&newcomm)); 1958 } 1959 PetscCall(PetscOptionsView(NULL,viewer)); 1960 1961 /* Machine and compile information */ 1962 #if defined(PETSC_USE_FORTRAN_KERNELS) 1963 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 1964 #else 1965 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 1966 #endif 1967 #if defined(PETSC_USE_64BIT_INDICES) 1968 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n")); 1969 #elif defined(PETSC_USE___FLOAT128) 1970 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n")); 1971 #endif 1972 #if defined(PETSC_USE_REAL_SINGLE) 1973 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 1974 #elif defined(PETSC_USE___FLOAT128) 1975 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 1976 #endif 1977 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1978 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 1979 #else 1980 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 1981 #endif 1982 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", 1983 (int) sizeof(short), (int) sizeof(int), (int) sizeof(long), (int) sizeof(void*),(int) sizeof(PetscScalar),(int) sizeof(PetscInt))); 1984 1985 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s",petscconfigureoptions)); 1986 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 1987 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 1988 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 1989 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 1990 1991 /* Cleanup */ 1992 PetscCall(PetscFPrintf(comm, fd, "\n")); 1993 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm,fd)); 1994 PetscCall(PetscLogViewWarnDebugging(comm,fd)); 1995 PetscCall(PetscFPTrapPop()); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 /*@C 2000 PetscLogView - Prints a summary of the logging. 2001 2002 Collective over MPI_Comm 2003 2004 Input Parameter: 2005 . viewer - an ASCII viewer 2006 2007 Options Database Keys: 2008 + -log_view [:filename] - Prints summary of log information 2009 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2010 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2011 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2012 . -log_view_memory - Also display memory usage in each event 2013 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2014 . -log_all - Saves a file Log.rank for each MPI process with details of each step of the computation 2015 - -log_trace [filename] - Displays a trace of what each process is doing 2016 2017 Notes: 2018 It is possible to control the logging programatically but we recommend using the options database approach whenever possible 2019 By default the summary is printed to stdout. 2020 2021 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2022 2023 If PETSc is configured with --with-logging=0 then this functionality is not available 2024 2025 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2026 directory then open filename.xml with your browser. Specific notes for certain browsers 2027 $ Firefox and Internet explorer - simply open the file 2028 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2029 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2030 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2031 your browser. 2032 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2033 window and render the XML log file contents. 2034 2035 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2036 2037 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2038 or using speedscope (https://www.speedscope.app). 2039 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2040 2041 Level: beginner 2042 2043 .seealso: `PetscLogDefaultBegin()`, `PetscLogDump()` 2044 @*/ 2045 PetscErrorCode PetscLogView(PetscViewer viewer) 2046 { 2047 PetscBool isascii; 2048 PetscViewerFormat format; 2049 int stage, lastStage; 2050 PetscStageLog stageLog; 2051 2052 PetscFunctionBegin; 2053 PetscCheck(PetscLogPLB,PETSC_COMM_SELF,PETSC_ERR_SUP,"Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2054 /* Pop off any stages the user forgot to remove */ 2055 lastStage = 0; 2056 PetscCall(PetscLogGetStageLog(&stageLog)); 2057 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2058 while (stage >= 0) { 2059 lastStage = stage; 2060 PetscCall(PetscStageLogPop(stageLog)); 2061 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2062 } 2063 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); 2064 PetscCheck(isascii,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Currently can only view logging to ASCII"); 2065 PetscCall(PetscViewerGetFormat(viewer,&format)); 2066 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2067 PetscCall(PetscLogView_Default(viewer)); 2068 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2069 PetscCall(PetscLogView_Detailed(viewer)); 2070 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2071 PetscCall(PetscLogView_CSV(viewer)); 2072 } else if (format == PETSC_VIEWER_ASCII_XML) { 2073 PetscCall(PetscLogView_Nested(viewer)); 2074 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2075 PetscCall(PetscLogView_Flamegraph(viewer)); 2076 } 2077 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2078 PetscFunctionReturn(0); 2079 } 2080 2081 /*@C 2082 PetscLogViewFromOptions - Processes command line options to determine if/how a PetscLog is to be viewed. 2083 2084 Collective on PETSC_COMM_WORLD 2085 2086 Not normally called by user 2087 2088 Level: intermediate 2089 2090 @*/ 2091 PetscErrorCode PetscLogViewFromOptions(void) 2092 { 2093 PetscViewer viewer; 2094 PetscBool flg; 2095 PetscViewerFormat format; 2096 2097 PetscFunctionBegin; 2098 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD,NULL,NULL,"-log_view",&viewer,&format,&flg)); 2099 if (flg) { 2100 PetscCall(PetscViewerPushFormat(viewer,format)); 2101 PetscCall(PetscLogView(viewer)); 2102 PetscCall(PetscViewerPopFormat(viewer)); 2103 PetscCall(PetscViewerDestroy(&viewer)); 2104 } 2105 PetscFunctionReturn(0); 2106 } 2107 2108 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2109 /*@C 2110 PetscGetFlops - Returns the number of flops used on this processor 2111 since the program began. 2112 2113 Not Collective 2114 2115 Output Parameter: 2116 flops - number of floating point operations 2117 2118 Notes: 2119 A global counter logs all PETSc flop counts. The user can use 2120 PetscLogFlops() to increment this counter to include flops for the 2121 application code. 2122 2123 Level: intermediate 2124 2125 .seealso: `PetscTime()`, `PetscLogFlops()` 2126 @*/ 2127 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2128 { 2129 PetscFunctionBegin; 2130 *flops = petsc_TotalFlops; 2131 PetscFunctionReturn(0); 2132 } 2133 2134 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2135 { 2136 size_t fullLength; 2137 va_list Argp; 2138 2139 PetscFunctionBegin; 2140 if (!petsc_logObjects) PetscFunctionReturn(0); 2141 va_start(Argp, format); 2142 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64,format,&fullLength, Argp)); 2143 va_end(Argp); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 /*MC 2148 PetscLogFlops - Adds floating point operations to the global counter. 2149 2150 Synopsis: 2151 #include <petsclog.h> 2152 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2153 2154 Not Collective 2155 2156 Input Parameter: 2157 . f - flop counter 2158 2159 Usage: 2160 .vb 2161 PetscLogEvent USER_EVENT; 2162 PetscLogEventRegister("User event",0,&USER_EVENT); 2163 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2164 [code segment to monitor] 2165 PetscLogFlops(user_flops) 2166 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2167 .ve 2168 2169 Notes: 2170 A global counter logs all PETSc flop counts. The user can use 2171 PetscLogFlops() to increment this counter to include flops for the 2172 application code. 2173 2174 Level: intermediate 2175 2176 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2177 2178 M*/ 2179 2180 /*MC 2181 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2182 to get accurate timings 2183 2184 Synopsis: 2185 #include <petsclog.h> 2186 void PetscPreLoadBegin(PetscBool flag,char *name); 2187 2188 Not Collective 2189 2190 Input Parameters: 2191 + flag - PETSC_TRUE to run twice, PETSC_FALSE to run once, may be overridden 2192 with command line option -preload true or -preload false 2193 - name - name of first stage (lines of code timed separately with -log_view) to 2194 be preloaded 2195 2196 Usage: 2197 .vb 2198 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2199 lines of code 2200 PetscPreLoadStage("second stage"); 2201 lines of code 2202 PetscPreLoadEnd(); 2203 .ve 2204 2205 Notes: 2206 Only works in C/C++, not Fortran 2207 2208 Flags available within the macro. 2209 + PetscPreLoadingUsed - true if we are or have done preloading 2210 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2211 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2212 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2213 The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin() 2214 and PetscPreLoadEnd() 2215 2216 Level: intermediate 2217 2218 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2219 2220 M*/ 2221 2222 /*MC 2223 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2224 to get accurate timings 2225 2226 Synopsis: 2227 #include <petsclog.h> 2228 void PetscPreLoadEnd(void); 2229 2230 Not Collective 2231 2232 Usage: 2233 .vb 2234 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2235 lines of code 2236 PetscPreLoadStage("second stage"); 2237 lines of code 2238 PetscPreLoadEnd(); 2239 .ve 2240 2241 Notes: 2242 only works in C/C++ not fortran 2243 2244 Level: intermediate 2245 2246 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2247 2248 M*/ 2249 2250 /*MC 2251 PetscPreLoadStage - Start a new segment of code to be timed separately. 2252 to get accurate timings 2253 2254 Synopsis: 2255 #include <petsclog.h> 2256 void PetscPreLoadStage(char *name); 2257 2258 Not Collective 2259 2260 Usage: 2261 .vb 2262 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2263 lines of code 2264 PetscPreLoadStage("second stage"); 2265 lines of code 2266 PetscPreLoadEnd(); 2267 .ve 2268 2269 Notes: 2270 only works in C/C++ not fortran 2271 2272 Level: intermediate 2273 2274 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2275 2276 M*/ 2277 2278 #if PetscDefined(HAVE_DEVICE) 2279 #include <petsc/private/deviceimpl.h> 2280 2281 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2282 2283 /* 2284 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2285 because it will result in timing results that cannot be interpreted. 2286 */ 2287 static PetscErrorCode PetscLogGpuTime_Off(void) 2288 { 2289 PetscLogGpuTimeFlag = PETSC_FALSE; 2290 return 0; 2291 } 2292 2293 /*@C 2294 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2295 2296 Options Database: 2297 . -log_view_gpu_time - provide the GPU times in the -log_view output 2298 2299 Notes: 2300 Because the logging of GPU time requires blocking the CPU execution for each kernel, turning on the timing of the 2301 GPU kernels can slow down the entire computation and should only be used when studying the performance 2302 of operations on GPU such as vector operations and matrix-vector operations. 2303 2304 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2305 2306 Level: advanced 2307 2308 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2309 @*/ 2310 PetscErrorCode PetscLogGpuTime(void) 2311 { 2312 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2313 PetscLogGpuTimeFlag = PETSC_TRUE; 2314 return 0; 2315 } 2316 2317 /*@C 2318 PetscLogGpuTimeBegin - Start timer for device 2319 2320 Notes: 2321 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2322 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2323 There is no need to call WaitForCUDA() or WaitForHIP() between PetscLogGpuTimeBegin and PetscLogGpuTimeEnd 2324 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2325 The regular logging captures the time for data transfers and any CPU activites during the event 2326 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2327 2328 Developer Notes: 2329 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between PetscLogGpuTimeBegin() and PetsLogGpuTimeEnd(). 2330 PetscLogGpuTimeBegin() and PetsLogGpuTimeEnd() insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the event when it reaches that event in the stream. The function xxxEventSynchronize() is called in PetsLogGpuTimeEnd() to block CPU execution, but not continued GPU excution, until the timer event is recorded. 2331 2332 Level: intermediate 2333 2334 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2335 @*/ 2336 PetscErrorCode PetscLogGpuTimeBegin(void) 2337 { 2338 PetscFunctionBegin; 2339 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2340 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2341 PetscDeviceContext dctx; 2342 2343 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2344 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2345 } else { 2346 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 /*@C 2352 PetscLogGpuTimeEnd - Stop timer for device 2353 2354 Level: intermediate 2355 2356 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2357 @*/ 2358 PetscErrorCode PetscLogGpuTimeEnd(void) 2359 { 2360 PetscFunctionBegin; 2361 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2362 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2363 PetscDeviceContext dctx; 2364 PetscLogDouble elapsed; 2365 2366 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2367 PetscCall(PetscDeviceContextEndTimer_Internal(dctx,&elapsed)); 2368 petsc_gtime += (elapsed/1000.0); 2369 } else { 2370 PetscCall(PetscTimeAdd(&petsc_gtime)); 2371 } 2372 PetscFunctionReturn(0); 2373 } 2374 #endif /* end of PETSC_HAVE_DEVICE */ 2375 2376 #else /* end of -DPETSC_USE_LOG section */ 2377 2378 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2379 { 2380 PetscFunctionBegin; 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #endif /* PETSC_USE_LOG*/ 2385 2386 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2387 PetscClassId PETSC_OBJECT_CLASSID = 0; 2388 2389 /*@C 2390 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2391 2392 Not Collective 2393 2394 Input Parameter: 2395 . name - The class name 2396 2397 Output Parameter: 2398 . oclass - The class id or classid 2399 2400 Level: developer 2401 2402 @*/ 2403 PetscErrorCode PetscClassIdRegister(const char name[],PetscClassId *oclass) 2404 { 2405 #if defined(PETSC_USE_LOG) 2406 PetscStageLog stageLog; 2407 PetscInt stage; 2408 #endif 2409 2410 PetscFunctionBegin; 2411 *oclass = ++PETSC_LARGEST_CLASSID; 2412 #if defined(PETSC_USE_LOG) 2413 PetscCall(PetscLogGetStageLog(&stageLog)); 2414 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2415 for (stage = 0; stage < stageLog->numStages; stage++) { 2416 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2417 } 2418 #endif 2419 PetscFunctionReturn(0); 2420 } 2421 2422 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2423 #include <mpe.h> 2424 2425 PetscBool PetscBeganMPE = PETSC_FALSE; 2426 2427 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent,int,PetscObject,PetscObject,PetscObject,PetscObject); 2428 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent,int,PetscObject,PetscObject,PetscObject,PetscObject); 2429 2430 /*@C 2431 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2432 and slows the program down. 2433 2434 Collective over PETSC_COMM_WORLD 2435 2436 Options Database Keys: 2437 . -log_mpe - Prints extensive log information 2438 2439 Notes: 2440 A related routine is PetscLogDefaultBegin() (with the options key -log_view), which is 2441 intended for production runs since it logs only flop rates and object 2442 creation (and should not significantly slow the programs). 2443 2444 Level: advanced 2445 2446 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2447 `PetscLogEventDeactivate()` 2448 @*/ 2449 PetscErrorCode PetscLogMPEBegin(void) 2450 { 2451 PetscFunctionBegin; 2452 /* Do MPE initialization */ 2453 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2454 PetscCall(PetscInfo(0,"Initializing MPE.\n")); 2455 PetscCall(MPE_Init_log()); 2456 2457 PetscBeganMPE = PETSC_TRUE; 2458 } else { 2459 PetscCall(PetscInfo(0,"MPE already initialized. Not attempting to reinitialize.\n")); 2460 } 2461 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 /*@C 2466 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2467 2468 Collective over PETSC_COMM_WORLD 2469 2470 Level: advanced 2471 2472 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2473 @*/ 2474 PetscErrorCode PetscLogMPEDump(const char sname[]) 2475 { 2476 char name[PETSC_MAX_PATH_LEN]; 2477 2478 PetscFunctionBegin; 2479 if (PetscBeganMPE) { 2480 PetscCall(PetscInfo(0,"Finalizing MPE.\n")); 2481 if (sname) { 2482 PetscCall(PetscStrcpy(name,sname)); 2483 } else { 2484 PetscCall(PetscGetProgramName(name,sizeof(name))); 2485 } 2486 PetscCall(MPE_Finish_log(name)); 2487 } else { 2488 PetscCall(PetscInfo(0,"Not finalizing MPE (not started by PETSc).\n")); 2489 } 2490 PetscFunctionReturn(0); 2491 } 2492 2493 #define PETSC_RGB_COLORS_MAX 39 2494 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = { 2495 "OliveDrab: ", 2496 "BlueViolet: ", 2497 "CadetBlue: ", 2498 "CornflowerBlue: ", 2499 "DarkGoldenrod: ", 2500 "DarkGreen: ", 2501 "DarkKhaki: ", 2502 "DarkOliveGreen: ", 2503 "DarkOrange: ", 2504 "DarkOrchid: ", 2505 "DarkSeaGreen: ", 2506 "DarkSlateGray: ", 2507 "DarkTurquoise: ", 2508 "DeepPink: ", 2509 "DarkKhaki: ", 2510 "DimGray: ", 2511 "DodgerBlue: ", 2512 "GreenYellow: ", 2513 "HotPink: ", 2514 "IndianRed: ", 2515 "LavenderBlush: ", 2516 "LawnGreen: ", 2517 "LemonChiffon: ", 2518 "LightCoral: ", 2519 "LightCyan: ", 2520 "LightPink: ", 2521 "LightSalmon: ", 2522 "LightSlateGray: ", 2523 "LightYellow: ", 2524 "LimeGreen: ", 2525 "MediumPurple: ", 2526 "MediumSeaGreen: ", 2527 "MediumSlateBlue:", 2528 "MidnightBlue: ", 2529 "MintCream: ", 2530 "MistyRose: ", 2531 "NavajoWhite: ", 2532 "NavyBlue: ", 2533 "OliveDrab: " 2534 }; 2535 2536 /*@C 2537 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with PetscLogEventRegister() 2538 2539 Not collective. Maybe it should be? 2540 2541 Output Parameter: 2542 . str - character string representing the color 2543 2544 Level: developer 2545 2546 .seealso: `PetscLogEventRegister` 2547 @*/ 2548 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2549 { 2550 static int idx = 0; 2551 2552 PetscFunctionBegin; 2553 *str = PetscLogMPERGBColors[idx]; 2554 idx = (idx + 1)% PETSC_RGB_COLORS_MAX; 2555 PetscFunctionReturn(0); 2556 } 2557 2558 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2559