1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 15 /* this is not consistently used and is difficult to keep correct if done manually, possibly it should be removed */ 16 PetscErrorCode PetscLogObjectParent(PetscObject p,PetscObject c) 17 { 18 if (!c || !p) return 0; 19 c->parent = p; 20 c->parentid = p->id; 21 return 0; 22 } 23 24 /*@C 25 PetscLogObjectMemory - Adds to an object a count of additional amount of memory that is used by the object. 26 27 Not collective. 28 29 Input Parameters: 30 + obj - the PETSc object 31 - mem - the amount of memory that is being added to the object 32 33 Level: developer 34 35 Developer Notes: 36 This is not used consistently. It is very difficult to manually track the memory usage per object so this should 37 likely be removed and replaced with an automated system. 38 39 .seealso: `PetscFinalize()`, `PetscInitializeFortran()`, `PetscGetArgs()`, `PetscInitializeNoArguments()` 40 41 @*/ 42 PetscErrorCode PetscLogObjectMemory(PetscObject p,PetscLogDouble m) 43 { 44 if (!p) return 0; 45 p->mem += m; 46 return 0; 47 } 48 49 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 50 51 #if defined(PETSC_USE_LOG) 52 #include <petscmachineinfo.h> 53 #include <petscconfiginfo.h> 54 55 /* used in the MPI_XXX() count macros in petsclog.h */ 56 57 /* Action and object logging variables */ 58 Action *petsc_actions = NULL; 59 Object *petsc_objects = NULL; 60 PetscBool petsc_logActions = PETSC_FALSE; 61 PetscBool petsc_logObjects = PETSC_FALSE; 62 int petsc_numActions = 0, petsc_maxActions = 100; 63 int petsc_numObjects = 0, petsc_maxObjects = 100; 64 int petsc_numObjectsDestroyed = 0; 65 66 /* Global counters */ 67 PetscLogDouble petsc_BaseTime = 0.0; 68 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 69 PetscLogDouble petsc_tmp_flops = 0.0; /* The incremental number of flops */ 70 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 71 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 72 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 73 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 74 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 75 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 76 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 77 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 78 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 79 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 80 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 81 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 82 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 83 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 84 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 85 #if defined(PETSC_HAVE_DEVICE) 86 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 87 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 88 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 89 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 90 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 91 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 92 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 93 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 94 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 95 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 96 #endif 97 98 /* Logging functions */ 99 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 100 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 101 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 102 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 103 104 /* Tracing event logging variables */ 105 FILE *petsc_tracefile = NULL; 106 int petsc_tracelevel = 0; 107 const char *petsc_traceblanks = " "; 108 char petsc_tracespace[128] = " "; 109 PetscLogDouble petsc_tracetime = 0.0; 110 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 111 112 static PetscIntStack current_log_event_stack = NULL; 113 114 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 115 { 116 int stage; 117 PetscBool opt; 118 119 PetscFunctionBegin; 120 if (PetscLogInitializeCalled) PetscFunctionReturn(0); 121 PetscLogInitializeCalled = PETSC_TRUE; 122 123 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 124 PetscCall(PetscOptionsHasName(NULL,NULL, "-log_exclude_actions", &opt)); 125 if (opt) petsc_logActions = PETSC_FALSE; 126 PetscCall(PetscOptionsHasName(NULL,NULL, "-log_exclude_objects", &opt)); 127 if (opt) petsc_logObjects = PETSC_FALSE; 128 if (petsc_logActions) { 129 PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 130 } 131 if (petsc_logObjects) { 132 PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 133 } 134 PetscLogPHC = PetscLogObjCreateDefault; 135 PetscLogPHD = PetscLogObjDestroyDefault; 136 /* Setup default logging structures */ 137 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 138 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 139 140 /* All processors sync here for more consistent logging */ 141 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 142 PetscTime(&petsc_BaseTime); 143 PetscCall(PetscLogStagePush(stage)); 144 PetscFunctionReturn(0); 145 } 146 147 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 148 { 149 PetscStageLog stageLog; 150 151 PetscFunctionBegin; 152 PetscCall(PetscFree(petsc_actions)); 153 PetscCall(PetscFree(petsc_objects)); 154 PetscCall(PetscLogNestedEnd()); 155 PetscCall(PetscLogSet(NULL, NULL)); 156 157 /* Resetting phase */ 158 PetscCall(PetscLogGetStageLog(&stageLog)); 159 PetscCall(PetscStageLogDestroy(stageLog)); 160 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 161 current_log_event_stack = NULL; 162 163 petsc_TotalFlops = 0.0; 164 petsc_numActions = 0; 165 petsc_numObjects = 0; 166 petsc_numObjectsDestroyed = 0; 167 petsc_maxActions = 100; 168 petsc_maxObjects = 100; 169 petsc_actions = NULL; 170 petsc_objects = NULL; 171 petsc_logActions = PETSC_FALSE; 172 petsc_logObjects = PETSC_FALSE; 173 petsc_BaseTime = 0.0; 174 petsc_TotalFlops = 0.0; 175 petsc_tmp_flops = 0.0; 176 petsc_send_ct = 0.0; 177 petsc_recv_ct = 0.0; 178 petsc_send_len = 0.0; 179 petsc_recv_len = 0.0; 180 petsc_isend_ct = 0.0; 181 petsc_irecv_ct = 0.0; 182 petsc_isend_len = 0.0; 183 petsc_irecv_len = 0.0; 184 petsc_wait_ct = 0.0; 185 petsc_wait_any_ct = 0.0; 186 petsc_wait_all_ct = 0.0; 187 petsc_sum_of_waits_ct = 0.0; 188 petsc_allreduce_ct = 0.0; 189 petsc_gather_ct = 0.0; 190 petsc_scatter_ct = 0.0; 191 #if defined(PETSC_HAVE_DEVICE) 192 petsc_ctog_ct = 0.0; 193 petsc_gtoc_ct = 0.0; 194 petsc_ctog_sz = 0.0; 195 petsc_gtoc_sz = 0.0; 196 petsc_gflops = 0.0; 197 petsc_gtime = 0.0; 198 #endif 199 PETSC_LARGEST_EVENT = PETSC_EVENT; 200 PetscLogPHC = NULL; 201 PetscLogPHD = NULL; 202 petsc_tracefile = NULL; 203 petsc_tracelevel = 0; 204 petsc_traceblanks = " "; 205 petsc_tracespace[0] = ' '; petsc_tracespace[1] = 0; 206 petsc_tracetime = 0.0; 207 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 208 PETSC_OBJECT_CLASSID = 0; 209 petsc_stageLog = NULL; 210 PetscLogInitializeCalled = PETSC_FALSE; 211 PetscFunctionReturn(0); 212 } 213 214 /*@C 215 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 216 217 Not Collective 218 219 Input Parameters: 220 + b - The function called at beginning of event 221 - e - The function called at end of event 222 223 Level: developer 224 225 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()` 226 @*/ 227 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), 228 PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 229 { 230 PetscFunctionBegin; 231 PetscLogPLB = b; 232 PetscLogPLE = e; 233 PetscFunctionReturn(0); 234 } 235 236 /*@C 237 PetscLogIsActive - Check if logging is currently in progress. 238 239 Not Collective 240 241 Output Parameter: 242 . isActive - PETSC_TRUE if logging is in progress, PETSC_FALSE otherwise 243 244 Level: beginner 245 246 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 247 @*/ 248 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 249 { 250 PetscFunctionBegin; 251 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 252 PetscFunctionReturn(0); 253 } 254 255 /*@C 256 PetscLogDefaultBegin - Turns on logging of objects and events. This logs flop 257 rates and object creation and should not slow programs down too much. 258 This routine may be called more than once. 259 260 Logically Collective over PETSC_COMM_WORLD 261 262 Options Database Keys: 263 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 264 screen (for code configured with --with-log=1 (which is the default)) 265 266 Usage: 267 .vb 268 PetscInitialize(...); 269 PetscLogDefaultBegin(); 270 ... code ... 271 PetscLogView(viewer); or PetscLogDump(); 272 PetscFinalize(); 273 .ve 274 275 Notes: 276 PetscLogView(viewer) or PetscLogDump() actually cause the printing of 277 the logging information. 278 279 Level: advanced 280 281 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 282 @*/ 283 PetscErrorCode PetscLogDefaultBegin(void) 284 { 285 PetscFunctionBegin; 286 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 287 PetscFunctionReturn(0); 288 } 289 290 /*@C 291 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 292 all events. This creates large log files and slows the program down. 293 294 Logically Collective on PETSC_COMM_WORLD 295 296 Options Database Keys: 297 . -log_all - Prints extensive log information 298 299 Usage: 300 .vb 301 PetscInitialize(...); 302 PetscLogAllBegin(); 303 ... code ... 304 PetscLogDump(filename); 305 PetscFinalize(); 306 .ve 307 308 Notes: 309 A related routine is PetscLogDefaultBegin() (with the options key -log), which is 310 intended for production runs since it logs only flop rates and object 311 creation (and shouldn't significantly slow the programs). 312 313 Level: advanced 314 315 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 316 @*/ 317 PetscErrorCode PetscLogAllBegin(void) 318 { 319 PetscFunctionBegin; 320 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 321 PetscFunctionReturn(0); 322 } 323 324 /*@C 325 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 326 begins or ends, the event name is printed. 327 328 Logically Collective on PETSC_COMM_WORLD 329 330 Input Parameter: 331 . file - The file to print trace in (e.g. stdout) 332 333 Options Database Key: 334 . -log_trace [filename] - Activates PetscLogTraceBegin() 335 336 Notes: 337 PetscLogTraceBegin() prints the processor number, the execution time (sec), 338 then "Event begin:" or "Event end:" followed by the event name. 339 340 PetscLogTraceBegin() allows tracing of all PETSc calls, which is useful 341 to determine where a program is hanging without running in the 342 debugger. Can be used in conjunction with the -info option. 343 344 Level: intermediate 345 346 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 347 @*/ 348 PetscErrorCode PetscLogTraceBegin(FILE *file) 349 { 350 PetscFunctionBegin; 351 petsc_tracefile = file; 352 353 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 354 PetscFunctionReturn(0); 355 } 356 357 /*@ 358 PetscLogActions - Determines whether actions are logged for the graphical viewer. 359 360 Not Collective 361 362 Input Parameter: 363 . flag - PETSC_TRUE if actions are to be logged 364 365 Level: intermediate 366 367 Note: Logging of actions continues to consume more memory as the program 368 runs. Long running programs should consider turning this feature off. 369 370 Options Database Keys: 371 . -log_exclude_actions - Turns off actions logging 372 373 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 374 @*/ 375 PetscErrorCode PetscLogActions(PetscBool flag) 376 { 377 PetscFunctionBegin; 378 petsc_logActions = flag; 379 PetscFunctionReturn(0); 380 } 381 382 /*@ 383 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 384 385 Not Collective 386 387 Input Parameter: 388 . flag - PETSC_TRUE if objects are to be logged 389 390 Level: intermediate 391 392 Note: Logging of objects continues to consume more memory as the program 393 runs. Long running programs should consider turning this feature off. 394 395 Options Database Keys: 396 . -log_exclude_objects - Turns off objects logging 397 398 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 399 @*/ 400 PetscErrorCode PetscLogObjects(PetscBool flag) 401 { 402 PetscFunctionBegin; 403 petsc_logObjects = flag; 404 PetscFunctionReturn(0); 405 } 406 407 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 408 /*@C 409 PetscLogStageRegister - Attaches a character string name to a logging stage. 410 411 Not Collective 412 413 Input Parameter: 414 . sname - The name to associate with that stage 415 416 Output Parameter: 417 . stage - The stage number 418 419 Level: intermediate 420 421 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()` 422 @*/ 423 PetscErrorCode PetscLogStageRegister(const char sname[],PetscLogStage *stage) 424 { 425 PetscStageLog stageLog; 426 PetscLogEvent event; 427 428 PetscFunctionBegin; 429 PetscCall(PetscLogGetStageLog(&stageLog)); 430 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 431 /* Copy events already changed in the main stage, this sucks */ 432 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 433 for (event = 0; event < stageLog->eventLog->numEvents; event++) { 434 PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event],&stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 435 } 436 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 437 PetscFunctionReturn(0); 438 } 439 440 /*@C 441 PetscLogStagePush - This function pushes a stage on the stack. 442 443 Not Collective 444 445 Input Parameter: 446 . stage - The stage on which to log 447 448 Usage: 449 If the option -log_sumary is used to run the program containing the 450 following code, then 2 sets of summary data will be printed during 451 PetscFinalize(). 452 .vb 453 PetscInitialize(int *argc,char ***args,0,0); 454 [stage 0 of code] 455 PetscLogStagePush(1); 456 [stage 1 of code] 457 PetscLogStagePop(); 458 PetscBarrier(...); 459 [more stage 0 of code] 460 PetscFinalize(); 461 .ve 462 463 Notes: 464 Use PetscLogStageRegister() to register a stage. 465 466 Level: intermediate 467 468 .seealso: `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 469 @*/ 470 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 471 { 472 PetscStageLog stageLog; 473 474 PetscFunctionBegin; 475 PetscCall(PetscLogGetStageLog(&stageLog)); 476 PetscCall(PetscStageLogPush(stageLog, stage)); 477 PetscFunctionReturn(0); 478 } 479 480 /*@C 481 PetscLogStagePop - This function pops a stage from the stack. 482 483 Not Collective 484 485 Usage: 486 If the option -log_sumary is used to run the program containing the 487 following code, then 2 sets of summary data will be printed during 488 PetscFinalize(). 489 .vb 490 PetscInitialize(int *argc,char ***args,0,0); 491 [stage 0 of code] 492 PetscLogStagePush(1); 493 [stage 1 of code] 494 PetscLogStagePop(); 495 PetscBarrier(...); 496 [more stage 0 of code] 497 PetscFinalize(); 498 .ve 499 500 Notes: 501 Use PetscLogStageRegister() to register a stage. 502 503 Level: intermediate 504 505 .seealso: `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 506 @*/ 507 PetscErrorCode PetscLogStagePop(void) 508 { 509 PetscStageLog stageLog; 510 511 PetscFunctionBegin; 512 PetscCall(PetscLogGetStageLog(&stageLog)); 513 PetscCall(PetscStageLogPop(stageLog)); 514 PetscFunctionReturn(0); 515 } 516 517 /*@ 518 PetscLogStageSetActive - Determines stage activity for PetscLogEventBegin() and PetscLogEventEnd(). 519 520 Not Collective 521 522 Input Parameters: 523 + stage - The stage 524 - isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE) 525 526 Level: intermediate 527 528 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 529 @*/ 530 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 531 { 532 PetscStageLog stageLog; 533 534 PetscFunctionBegin; 535 PetscCall(PetscLogGetStageLog(&stageLog)); 536 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 537 PetscFunctionReturn(0); 538 } 539 540 /*@ 541 PetscLogStageGetActive - Returns stage activity for PetscLogEventBegin() and PetscLogEventEnd(). 542 543 Not Collective 544 545 Input Parameter: 546 . stage - The stage 547 548 Output Parameter: 549 . isActive - The activity flag, PETSC_TRUE for logging, else PETSC_FALSE (defaults to PETSC_TRUE) 550 551 Level: intermediate 552 553 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 554 @*/ 555 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 556 { 557 PetscStageLog stageLog; 558 559 PetscFunctionBegin; 560 PetscCall(PetscLogGetStageLog(&stageLog)); 561 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 562 PetscFunctionReturn(0); 563 } 564 565 /*@ 566 PetscLogStageSetVisible - Determines stage visibility in PetscLogView() 567 568 Not Collective 569 570 Input Parameters: 571 + stage - The stage 572 - isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE) 573 574 Level: intermediate 575 576 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 577 @*/ 578 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 579 { 580 PetscStageLog stageLog; 581 582 PetscFunctionBegin; 583 PetscCall(PetscLogGetStageLog(&stageLog)); 584 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 585 PetscFunctionReturn(0); 586 } 587 588 /*@ 589 PetscLogStageGetVisible - Returns stage visibility in PetscLogView() 590 591 Not Collective 592 593 Input Parameter: 594 . stage - The stage 595 596 Output Parameter: 597 . isVisible - The visibility flag, PETSC_TRUE to print, else PETSC_FALSE (defaults to PETSC_TRUE) 598 599 Level: intermediate 600 601 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 602 @*/ 603 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 604 { 605 PetscStageLog stageLog; 606 607 PetscFunctionBegin; 608 PetscCall(PetscLogGetStageLog(&stageLog)); 609 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 610 PetscFunctionReturn(0); 611 } 612 613 /*@C 614 PetscLogStageGetId - Returns the stage id when given the stage name. 615 616 Not Collective 617 618 Input Parameter: 619 . name - The stage name 620 621 Output Parameter: 622 . stage - The stage, , or -1 if no stage with that name exists 623 624 Level: intermediate 625 626 .seealso: `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 627 @*/ 628 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 629 { 630 PetscStageLog stageLog; 631 632 PetscFunctionBegin; 633 PetscCall(PetscLogGetStageLog(&stageLog)); 634 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 635 PetscFunctionReturn(0); 636 } 637 638 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 639 640 /*@C 641 PetscLogEventRegister - Registers an event name for logging operations in an application code. 642 643 Not Collective 644 645 Input Parameters: 646 + name - The name associated with the event 647 - classid - The classid associated to the class for this event, obtain either with 648 PetscClassIdRegister() or use a predefined one such as KSP_CLASSID, SNES_CLASSID, the predefined ones 649 are only available in C code 650 651 Output Parameter: 652 . event - The event id for use with PetscLogEventBegin() and PetscLogEventEnd(). 653 654 Example of Usage: 655 .vb 656 PetscLogEvent USER_EVENT; 657 PetscClassId classid; 658 PetscLogDouble user_event_flops; 659 PetscClassIdRegister("class name",&classid); 660 PetscLogEventRegister("User event name",classid,&USER_EVENT); 661 PetscLogEventBegin(USER_EVENT,0,0,0,0); 662 [code segment to monitor] 663 PetscLogFlops(user_event_flops); 664 PetscLogEventEnd(USER_EVENT,0,0,0,0); 665 .ve 666 667 Notes: 668 PETSc automatically logs library events if the code has been 669 configured with --with-log (which is the default) and 670 -log_view or -log_all is specified. PetscLogEventRegister() is 671 intended for logging user events to supplement this PETSc 672 information. 673 674 PETSc can gather data for use with the utilities Jumpshot 675 (part of the MPICH distribution). If PETSc has been compiled 676 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 677 MPICH), the user can employ another command line option, -log_mpe, 678 to create a logfile, "mpe.log", which can be visualized 679 Jumpshot. 680 681 The classid is associated with each event so that classes of events 682 can be disabled simultaneously, such as all matrix events. The user 683 can either use an existing classid, such as MAT_CLASSID, or create 684 their own as shown in the example. 685 686 If an existing event with the same name exists, its event handle is 687 returned instead of creating a new event. 688 689 Level: intermediate 690 691 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 692 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 693 @*/ 694 PetscErrorCode PetscLogEventRegister(const char name[],PetscClassId classid,PetscLogEvent *event) 695 { 696 PetscStageLog stageLog; 697 int stage; 698 699 PetscFunctionBegin; 700 *event = PETSC_DECIDE; 701 PetscCall(PetscLogGetStageLog(&stageLog)); 702 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 703 if (*event > 0) PetscFunctionReturn(0); 704 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 705 for (stage = 0; stage < stageLog->numStages; stage++) { 706 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 707 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 708 } 709 PetscFunctionReturn(0); 710 } 711 712 /*@ 713 PetscLogEventSetCollective - Indicates that a particular event is collective. 714 715 Not Collective 716 717 Input Parameters: 718 + event - The event id 719 - collective - Bolean flag indicating whether a particular event is collective 720 721 Note: 722 New events returned from PetscLogEventRegister() are collective by default. 723 724 Level: developer 725 726 .seealso: `PetscLogEventRegister()` 727 @*/ 728 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event,PetscBool collective) 729 { 730 PetscStageLog stageLog; 731 PetscEventRegLog eventRegLog; 732 733 PetscFunctionBegin; 734 PetscCall(PetscLogGetStageLog(&stageLog)); 735 PetscCall(PetscStageLogGetEventRegLog(stageLog,&eventRegLog)); 736 PetscCheck(event >= 0 && event <= eventRegLog->numEvents,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid event id"); 737 eventRegLog->eventInfo[event].collective = collective; 738 PetscFunctionReturn(0); 739 } 740 741 /*@ 742 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 743 744 Not Collective 745 746 Input Parameter: 747 . classid - The object class, for example MAT_CLASSID, SNES_CLASSID, etc. 748 749 Level: developer 750 751 .seealso: `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 752 @*/ 753 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 754 { 755 PetscStageLog stageLog; 756 int stage; 757 758 PetscFunctionBegin; 759 PetscCall(PetscLogGetStageLog(&stageLog)); 760 for (stage = 0; stage < stageLog->numStages; stage++) { 761 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 762 } 763 PetscFunctionReturn(0); 764 } 765 766 /*@ 767 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 768 769 Not Collective 770 771 Input Parameter: 772 . classid - The object class, for example MAT_CLASSID, SNES_CLASSID, etc. 773 774 Level: developer 775 776 .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 777 @*/ 778 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 779 { 780 PetscStageLog stageLog; 781 int stage; 782 783 PetscFunctionBegin; 784 PetscCall(PetscLogGetStageLog(&stageLog)); 785 for (stage = 0; stage < stageLog->numStages; stage++) { 786 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 787 } 788 PetscFunctionReturn(0); 789 } 790 791 /*@ 792 PetscLogEventActivate - Indicates that a particular event should be logged. 793 794 Not Collective 795 796 Input Parameter: 797 . event - The event id 798 799 Usage: 800 .vb 801 PetscLogEventDeactivate(VEC_SetValues); 802 [code where you do not want to log VecSetValues()] 803 PetscLogEventActivate(VEC_SetValues); 804 [code where you do want to log VecSetValues()] 805 .ve 806 807 Note: 808 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 809 or an event number obtained with PetscLogEventRegister(). 810 811 Level: advanced 812 813 .seealso: `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 814 @*/ 815 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 816 { 817 PetscStageLog stageLog; 818 int stage; 819 820 PetscFunctionBegin; 821 PetscCall(PetscLogGetStageLog(&stageLog)); 822 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 823 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 824 PetscFunctionReturn(0); 825 } 826 827 /*@ 828 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 829 830 Not Collective 831 832 Input Parameter: 833 . event - The event id 834 835 Usage: 836 .vb 837 PetscLogEventDeactivate(VEC_SetValues); 838 [code where you do not want to log VecSetValues()] 839 PetscLogEventActivate(VEC_SetValues); 840 [code where you do want to log VecSetValues()] 841 .ve 842 843 Note: 844 The event may be either a pre-defined PETSc event (found in 845 include/petsclog.h) or an event number obtained with PetscLogEventRegister()). 846 847 Level: advanced 848 849 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 850 @*/ 851 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 852 { 853 PetscStageLog stageLog; 854 int stage; 855 856 PetscFunctionBegin; 857 PetscCall(PetscLogGetStageLog(&stageLog)); 858 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 859 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 860 PetscFunctionReturn(0); 861 } 862 863 /*@ 864 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged. 865 866 Not Collective 867 868 Input Parameter: 869 . event - The event id 870 871 Usage: 872 .vb 873 PetscLogEventDeactivatePush(VEC_SetValues); 874 [code where you do not want to log VecSetValues()] 875 PetscLogEventDeactivatePop(VEC_SetValues); 876 [code where you do want to log VecSetValues()] 877 .ve 878 879 Note: 880 The event may be either a pre-defined PETSc event (found in 881 include/petsclog.h) or an event number obtained with PetscLogEventRegister()). 882 883 Level: advanced 884 885 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()` 886 @*/ 887 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 888 { 889 PetscStageLog stageLog; 890 int stage; 891 892 PetscFunctionBegin; 893 PetscCall(PetscLogGetStageLog(&stageLog)); 894 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 895 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 896 PetscFunctionReturn(0); 897 } 898 899 /*@ 900 PetscLogEventDeactivatePop - Indicates that a particular event should be logged. 901 902 Not Collective 903 904 Input Parameter: 905 . event - The event id 906 907 Usage: 908 .vb 909 PetscLogEventDeactivatePush(VEC_SetValues); 910 [code where you do not want to log VecSetValues()] 911 PetscLogEventDeactivatePop(VEC_SetValues); 912 [code where you do want to log VecSetValues()] 913 .ve 914 915 Note: 916 The event may be either a pre-defined PETSc event (found in 917 include/petsclog.h) or an event number obtained with PetscLogEventRegister()). 918 919 Level: advanced 920 921 .seealso: `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 922 @*/ 923 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 924 { 925 PetscStageLog stageLog; 926 int stage; 927 928 PetscFunctionBegin; 929 PetscCall(PetscLogGetStageLog(&stageLog)); 930 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 931 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 932 PetscFunctionReturn(0); 933 } 934 935 /*@ 936 PetscLogEventSetActiveAll - Sets the event activity in every stage. 937 938 Not Collective 939 940 Input Parameters: 941 + event - The event id 942 - isActive - The activity flag determining whether the event is logged 943 944 Level: advanced 945 946 .seealso: `PlogEventActivate()`, `PlogEventDeactivate()` 947 @*/ 948 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 949 { 950 PetscStageLog stageLog; 951 int stage; 952 953 PetscFunctionBegin; 954 PetscCall(PetscLogGetStageLog(&stageLog)); 955 for (stage = 0; stage < stageLog->numStages; stage++) { 956 if (isActive) { 957 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 958 } else { 959 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 960 } 961 } 962 PetscFunctionReturn(0); 963 } 964 965 /*@ 966 PetscLogEventActivateClass - Activates event logging for a PETSc object class. 967 968 Not Collective 969 970 Input Parameter: 971 . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc. 972 973 Level: developer 974 975 .seealso: `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 976 @*/ 977 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 978 { 979 PetscStageLog stageLog; 980 int stage; 981 982 PetscFunctionBegin; 983 PetscCall(PetscLogGetStageLog(&stageLog)); 984 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 985 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 986 PetscFunctionReturn(0); 987 } 988 989 /*@ 990 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class. 991 992 Not Collective 993 994 Input Parameter: 995 . classid - The event class, for example MAT_CLASSID, SNES_CLASSID, etc. 996 997 Level: developer 998 999 .seealso: `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1000 @*/ 1001 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 1002 { 1003 PetscStageLog stageLog; 1004 int stage; 1005 1006 PetscFunctionBegin; 1007 PetscCall(PetscLogGetStageLog(&stageLog)); 1008 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1009 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 /*MC 1014 PetscLogEventSync - Synchronizes the beginning of a user event. 1015 1016 Synopsis: 1017 #include <petsclog.h> 1018 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 1019 1020 Collective 1021 1022 Input Parameters: 1023 + e - integer associated with the event obtained from PetscLogEventRegister() 1024 - comm - an MPI communicator 1025 1026 Usage: 1027 .vb 1028 PetscLogEvent USER_EVENT; 1029 PetscLogEventRegister("User event",0,&USER_EVENT); 1030 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1031 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1032 [code segment to monitor] 1033 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1034 .ve 1035 1036 Notes: 1037 This routine should be called only if there is not a 1038 PetscObject available to pass to PetscLogEventBegin(). 1039 1040 Level: developer 1041 1042 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1043 1044 M*/ 1045 1046 /*MC 1047 PetscLogEventBegin - Logs the beginning of a user event. 1048 1049 Synopsis: 1050 #include <petsclog.h> 1051 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1052 1053 Not Collective 1054 1055 Input Parameters: 1056 + e - integer associated with the event obtained from PetscLogEventRegister() 1057 - o1,o2,o3,o4 - objects associated with the event, or 0 1058 1059 Fortran Synopsis: 1060 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1061 1062 Usage: 1063 .vb 1064 PetscLogEvent USER_EVENT; 1065 PetscLogDouble user_event_flops; 1066 PetscLogEventRegister("User event",0,&USER_EVENT); 1067 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1068 [code segment to monitor] 1069 PetscLogFlops(user_event_flops); 1070 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1071 .ve 1072 1073 Notes: 1074 You need to register each integer event with the command 1075 PetscLogEventRegister(). 1076 1077 Developer Notes: 1078 PetscLogEventBegin() and PetscLogEventBegin() return error codes instead of explicitly handling the 1079 errors that occur in the macro directly because other packages that use this macros have used them in their 1080 own functions or methods that do not return error codes and it would be disruptive to change the current 1081 behavior. 1082 1083 Level: intermediate 1084 1085 .seealso: `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1086 1087 M*/ 1088 1089 /*MC 1090 PetscLogEventEnd - Log the end of a user event. 1091 1092 Synopsis: 1093 #include <petsclog.h> 1094 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1095 1096 Not Collective 1097 1098 Input Parameters: 1099 + e - integer associated with the event obtained with PetscLogEventRegister() 1100 - o1,o2,o3,o4 - objects associated with the event, or 0 1101 1102 Fortran Synopsis: 1103 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1104 1105 Usage: 1106 .vb 1107 PetscLogEvent USER_EVENT; 1108 PetscLogDouble user_event_flops; 1109 PetscLogEventRegister("User event",0,&USER_EVENT,); 1110 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1111 [code segment to monitor] 1112 PetscLogFlops(user_event_flops); 1113 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1114 .ve 1115 1116 Notes: 1117 You should also register each additional integer event with the command 1118 PetscLogEventRegister(). 1119 1120 Level: intermediate 1121 1122 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1123 1124 M*/ 1125 1126 /*@C 1127 PetscLogEventGetId - Returns the event id when given the event name. 1128 1129 Not Collective 1130 1131 Input Parameter: 1132 . name - The event name 1133 1134 Output Parameter: 1135 . event - The event, or -1 if no event with that name exists 1136 1137 Level: intermediate 1138 1139 .seealso: `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1140 @*/ 1141 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1142 { 1143 PetscStageLog stageLog; 1144 1145 PetscFunctionBegin; 1146 PetscCall(PetscLogGetStageLog(&stageLog)); 1147 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) 1152 { 1153 PetscFunctionBegin; 1154 PetscCall(PetscIntStackPush(current_log_event_stack,event)); 1155 PetscFunctionReturn(0); 1156 } 1157 1158 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) 1159 { 1160 PetscFunctionBegin; 1161 PetscCall(PetscIntStackPop(current_log_event_stack,NULL)); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) 1166 { 1167 PetscBool empty; 1168 1169 PetscFunctionBegin; 1170 PetscValidIntPointer(event,1); 1171 *event = PETSC_DECIDE; 1172 PetscCall(PetscIntStackEmpty(current_log_event_stack,&empty)); 1173 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack,event)); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) 1178 { 1179 PetscFunctionBegin; 1180 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event,NULL,NULL,NULL,NULL)); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) 1185 { 1186 PetscStageLog stageLog; 1187 PetscEventPerfLog eventLog; 1188 int stage; 1189 1190 PetscFunctionBegin; 1191 if (event == PETSC_DECIDE) PetscFunctionReturn(0); 1192 PetscCall(PetscLogEventBegin(event,NULL,NULL,NULL,NULL)); 1193 PetscCall(PetscLogGetStageLog(&stageLog)); 1194 PetscCall(PetscStageLogGetCurrent(stageLog,&stage)); 1195 PetscCall(PetscStageLogGetEventPerfLog(stageLog,stage,&eventLog)); 1196 eventLog->eventInfo[event].count--; 1197 PetscFunctionReturn(0); 1198 } 1199 1200 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1201 /*@C 1202 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1203 be read by bin/petscview. This program no longer exists. 1204 1205 Collective on PETSC_COMM_WORLD 1206 1207 Input Parameter: 1208 . name - an optional file name 1209 1210 Usage: 1211 .vb 1212 PetscInitialize(...); 1213 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1214 ... code ... 1215 PetscLogDump(filename); 1216 PetscFinalize(); 1217 .ve 1218 1219 Notes: 1220 The default file name is 1221 $ Log.<rank> 1222 where <rank> is the processor number. If no name is specified, 1223 this file will be used. 1224 1225 Level: advanced 1226 1227 .seealso: `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1228 @*/ 1229 PetscErrorCode PetscLogDump(const char sname[]) 1230 { 1231 PetscStageLog stageLog; 1232 PetscEventPerfInfo *eventInfo; 1233 FILE *fd; 1234 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1235 PetscLogDouble flops, _TotalTime; 1236 PetscMPIInt rank; 1237 int action, object, curStage; 1238 PetscLogEvent event; 1239 1240 PetscFunctionBegin; 1241 /* Calculate the total elapsed time */ 1242 PetscTime(&_TotalTime); 1243 _TotalTime -= petsc_BaseTime; 1244 /* Open log file */ 1245 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1246 if (sname && sname[0]) sprintf(file, "%s.%d", sname, rank); 1247 else sprintf(file, "Log.%d", rank); 1248 PetscCall(PetscFixFilename(file, fname)); 1249 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1250 PetscCheck(!(rank == 0) || !(!fd),PETSC_COMM_SELF,PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1251 /* Output totals */ 1252 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1253 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1254 /* Output actions */ 1255 if (petsc_logActions) { 1256 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1257 for (action = 0; action < petsc_numActions; action++) { 1258 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", 1259 petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1260 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1261 } 1262 } 1263 /* Output objects */ 1264 if (petsc_logObjects) { 1265 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1266 for (object = 0; object < petsc_numObjects; object++) { 1267 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int) petsc_objects[object].mem)); 1268 if (!petsc_objects[object].name[0]) { 1269 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd,"No Name\n")); 1270 } else { 1271 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1272 } 1273 if (petsc_objects[object].info[0] != 0) { 1274 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1275 } else { 1276 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1277 } 1278 } 1279 } 1280 /* Output events */ 1281 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1282 PetscCall(PetscLogGetStageLog(&stageLog)); 1283 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1284 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1285 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1286 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops/eventInfo[event].time; 1287 else flops = 0.0; 1288 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count,eventInfo[event].flops, eventInfo[event].time, flops)); 1289 } 1290 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 /* 1295 PetscLogView_Detailed - Each process prints the times for its own events 1296 1297 */ 1298 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1299 { 1300 PetscStageLog stageLog; 1301 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1302 PetscLogDouble locTotalTime, numRed, maxMem; 1303 int numStages,numEvents,stage,event; 1304 MPI_Comm comm = PetscObjectComm((PetscObject) viewer); 1305 PetscMPIInt rank,size; 1306 1307 PetscFunctionBegin; 1308 PetscCallMPI(MPI_Comm_size(comm, &size)); 1309 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1310 /* Must preserve reduction count before we go on */ 1311 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1312 /* Get the total elapsed time */ 1313 PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime; 1314 PetscCall(PetscViewerASCIIPrintf(viewer,"size = %d\n",size)); 1315 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalTimes = {}\n")); 1316 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalMessages = {}\n")); 1317 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalMessageLens = {}\n")); 1318 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalReductions = {}\n")); 1319 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalFlop = {}\n")); 1320 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalObjects = {}\n")); 1321 PetscCall(PetscViewerASCIIPrintf(viewer,"LocalMemory = {}\n")); 1322 PetscCall(PetscLogGetStageLog(&stageLog)); 1323 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1324 PetscCall(PetscViewerASCIIPrintf(viewer,"Stages = {}\n")); 1325 for (stage=0; stage<numStages; stage++) { 1326 PetscCall(PetscViewerASCIIPrintf(viewer,"Stages[\"%s\"] = {}\n",stageLog->stageInfo[stage].name)); 1327 PetscCall(PetscViewerASCIIPrintf(viewer,"Stages[\"%s\"][\"summary\"] = {}\n",stageLog->stageInfo[stage].name)); 1328 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1329 for (event = 0; event < numEvents; event++) { 1330 PetscCall(PetscViewerASCIIPrintf(viewer,"Stages[\"%s\"][\"%s\"] = {}\n",stageLog->stageInfo[stage].name,stageLog->eventLog->eventInfo[event].name)); 1331 } 1332 } 1333 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1334 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1335 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalTimes[%d] = %g\n",rank,locTotalTime)); 1336 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalMessages[%d] = %g\n",rank,(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1337 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalMessageLens[%d] = %g\n",rank,(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1338 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalReductions[%d] = %g\n",rank,numRed)); 1339 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalFlop[%d] = %g\n",rank,petsc_TotalFlops)); 1340 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalObjects[%d] = %d\n",rank,petsc_numObjects)); 1341 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"LocalMemory[%d] = %g\n",rank,maxMem)); 1342 PetscCall(PetscViewerFlush(viewer)); 1343 for (stage=0; stage<numStages; stage++) { 1344 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1345 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", 1346 stageLog->stageInfo[stage].name,rank, 1347 stageInfo->time,stageInfo->numMessages,stageInfo->messageLength,stageInfo->numReductions,stageInfo->flops)); 1348 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1349 for (event = 0; event < numEvents; event++) { 1350 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1351 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1352 stageLog->stageInfo[stage].name,stageLog->eventLog->eventInfo[event].name,rank, 1353 eventInfo->count,eventInfo->time,eventInfo->syncTime,eventInfo->numMessages,eventInfo->messageLength,eventInfo->numReductions,eventInfo->flops)); 1354 if (eventInfo->dof[0] >= 0.) { 1355 PetscInt d, e; 1356 1357 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1358 for (d = 0; d < 8; ++d) { 1359 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1360 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1361 } 1362 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1363 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1364 for (e = 0; e < 8; ++e) { 1365 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1366 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1367 } 1368 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1369 } 1370 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"}\n")); 1371 } 1372 } 1373 PetscCall(PetscViewerFlush(viewer)); 1374 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 /* 1379 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1380 */ 1381 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1382 { 1383 PetscStageLog stageLog; 1384 PetscEventPerfInfo *eventInfo = NULL; 1385 PetscLogDouble locTotalTime, maxMem; 1386 int numStages,numEvents,stage,event; 1387 MPI_Comm comm = PetscObjectComm((PetscObject) viewer); 1388 PetscMPIInt rank,size; 1389 1390 PetscFunctionBegin; 1391 PetscCallMPI(MPI_Comm_size(comm, &size)); 1392 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1393 /* Must preserve reduction count before we go on */ 1394 /* Get the total elapsed time */ 1395 PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime; 1396 PetscCall(PetscLogGetStageLog(&stageLog)); 1397 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1398 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1399 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1400 PetscCall(PetscViewerASCIIPrintf(viewer,"Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1401 PetscCall(PetscViewerFlush(viewer)); 1402 for (stage=0; stage<numStages; stage++) { 1403 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1404 1405 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"%s,summary,%d,1,%g,%g,%g,%g,%g\n", 1406 stageLog->stageInfo[stage].name,rank,stageInfo->time,stageInfo->numMessages,stageInfo->messageLength,stageInfo->numReductions,stageInfo->flops)); 1407 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1408 for (event = 0; event < numEvents; event++) { 1409 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1410 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"%s,%s,%d,%d,%g,%g,%g,%g,%g",stageLog->stageInfo[stage].name, 1411 stageLog->eventLog->eventInfo[event].name,rank,eventInfo->count,eventInfo->time,eventInfo->numMessages, 1412 eventInfo->messageLength,eventInfo->numReductions,eventInfo->flops)); 1413 if (eventInfo->dof[0] >= 0.) { 1414 PetscInt d, e; 1415 1416 for (d = 0; d < 8; ++d) { 1417 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1418 } 1419 for (e = 0; e < 8; ++e) { 1420 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1421 } 1422 } 1423 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"\n")); 1424 } 1425 } 1426 PetscCall(PetscViewerFlush(viewer)); 1427 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm,FILE *fd) 1432 { 1433 PetscFunctionBegin; 1434 if (!PetscLogSyncOn) PetscFunctionReturn(0); 1435 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1436 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1437 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1438 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1439 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1440 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1441 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1442 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1443 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1444 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1445 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm,FILE *fd) 1450 { 1451 PetscFunctionBegin; 1452 if (PetscDefined(USE_DEBUG)) { 1453 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1454 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1455 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1456 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1457 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1458 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1459 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1460 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1461 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1462 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1463 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm,FILE *fd) 1469 { 1470 #if defined(PETSC_HAVE_DEVICE) 1471 PetscMPIInt size; 1472 1473 PetscFunctionBegin; 1474 PetscCallMPI(MPI_Comm_size(comm, &size)); 1475 if (use_gpu_aware_mpi || size == 1) PetscFunctionReturn(0); 1476 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1477 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1478 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1479 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1480 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1481 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1482 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1483 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1484 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1485 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1486 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1487 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1488 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1489 PetscFunctionReturn(0); 1490 #else 1491 return 0; 1492 #endif 1493 } 1494 1495 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm,FILE *fd) 1496 { 1497 #if defined(PETSC_HAVE_DEVICE) 1498 1499 PetscFunctionBegin; 1500 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(0); 1501 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1502 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1503 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1504 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1505 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1506 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1507 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1508 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1509 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1510 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1511 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1512 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1513 PetscFunctionReturn(0); 1514 #else 1515 return 0; 1516 #endif 1517 } 1518 1519 PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1520 { 1521 FILE *fd; 1522 PetscLogDouble zero = 0.0; 1523 PetscStageLog stageLog; 1524 PetscStageInfo *stageInfo = NULL; 1525 PetscEventPerfInfo *eventInfo = NULL; 1526 PetscClassPerfInfo *classInfo; 1527 char arch[128],hostname[128],username[128],pname[PETSC_MAX_PATH_LEN],date[128]; 1528 const char *name; 1529 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1530 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1531 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1532 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1533 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1534 PetscLogDouble min, max, tot, ratio, avg, x, y; 1535 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1536 #if defined(PETSC_HAVE_DEVICE) 1537 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1538 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1539 #endif 1540 PetscMPIInt minC, maxC; 1541 PetscMPIInt size, rank; 1542 PetscBool *localStageUsed, *stageUsed; 1543 PetscBool *localStageVisible, *stageVisible; 1544 int numStages, localNumEvents, numEvents; 1545 int stage, oclass; 1546 PetscLogEvent event; 1547 PetscErrorCode ierr = 0; 1548 char version[256]; 1549 MPI_Comm comm; 1550 #if defined(PETSC_HAVE_DEVICE) 1551 PetscLogEvent eventid; 1552 PetscInt64 nas = 0x7FF0000000000002; 1553 #endif 1554 1555 PetscFunctionBegin; 1556 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1557 PetscCall(PetscObjectGetComm((PetscObject)viewer,&comm)); 1558 PetscCall(PetscViewerASCIIGetPointer(viewer,&fd)); 1559 PetscCallMPI(MPI_Comm_size(comm, &size)); 1560 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1561 /* Get the total elapsed time */ 1562 PetscTime(&locTotalTime); locTotalTime -= petsc_BaseTime; 1563 1564 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1565 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1566 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1567 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1568 PetscCall(PetscLogViewWarnSync(comm,fd)); 1569 PetscCall(PetscLogViewWarnDebugging(comm,fd)); 1570 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm,fd)); 1571 PetscCall(PetscLogViewWarnGpuTime(comm,fd)); 1572 PetscCall(PetscGetArchType(arch,sizeof(arch))); 1573 PetscCall(PetscGetHostName(hostname,sizeof(hostname))); 1574 PetscCall(PetscGetUserName(username,sizeof(username))); 1575 PetscCall(PetscGetProgramName(pname,sizeof(pname))); 1576 PetscCall(PetscGetDate(date,sizeof(date))); 1577 PetscCall(PetscGetVersion(version,sizeof(version))); 1578 if (size == 1) { 1579 PetscCall(PetscFPrintf(comm,fd,"%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1580 } else { 1581 PetscCall(PetscFPrintf(comm,fd,"%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1582 } 1583 #if defined(PETSC_HAVE_OPENMP) 1584 PetscCall(PetscFPrintf(comm,fd,"Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1585 #endif 1586 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1587 1588 /* Must preserve reduction count before we go on */ 1589 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1590 1591 /* Calculate summary information */ 1592 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1593 /* Time */ 1594 PetscCallMPI(MPI_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1595 PetscCallMPI(MPI_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1596 PetscCallMPI(MPI_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1597 avg = tot/((PetscLogDouble) size); 1598 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1599 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1600 TotalTime = tot; 1601 /* Objects */ 1602 avg = (PetscLogDouble) petsc_numObjects; 1603 PetscCallMPI(MPI_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1604 PetscCallMPI(MPI_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1605 PetscCallMPI(MPI_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1606 avg = tot/((PetscLogDouble) size); 1607 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1608 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1609 /* Flops */ 1610 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1611 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1612 PetscCallMPI(MPI_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1613 avg = tot/((PetscLogDouble) size); 1614 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1615 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1616 TotalFlops = tot; 1617 /* Flops/sec -- Must talk to Barry here */ 1618 if (locTotalTime != 0.0) flops = petsc_TotalFlops/locTotalTime; else flops = 0.0; 1619 PetscCallMPI(MPI_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1620 PetscCallMPI(MPI_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1621 PetscCallMPI(MPI_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1622 avg = tot/((PetscLogDouble) size); 1623 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1624 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1625 /* Memory */ 1626 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1627 if (mem > 0.0) { 1628 PetscCallMPI(MPI_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1629 PetscCallMPI(MPI_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1630 PetscCallMPI(MPI_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1631 avg = tot/((PetscLogDouble) size); 1632 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1633 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1634 } 1635 /* Messages */ 1636 mess = 0.5*(petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1637 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1638 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1639 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1640 avg = tot/((PetscLogDouble) size); 1641 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1642 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1643 numMessages = tot; 1644 /* Message Lengths */ 1645 mess = 0.5*(petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1646 PetscCallMPI(MPI_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1647 PetscCallMPI(MPI_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1648 PetscCallMPI(MPI_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1649 if (numMessages != 0) avg = tot/numMessages; else avg = 0.0; 1650 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1651 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1652 messageLength = tot; 1653 /* Reductions */ 1654 PetscCallMPI(MPI_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1655 PetscCallMPI(MPI_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1656 PetscCallMPI(MPI_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1657 if (min != 0.0) ratio = max/min; else ratio = 0.0; 1658 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1659 numReductions = red; /* wrong because uses count from process zero */ 1660 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1661 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1662 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1663 1664 /* Get total number of stages -- 1665 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1666 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1667 This seems best accomplished by assoicating a communicator with each stage. 1668 */ 1669 PetscCall(PetscLogGetStageLog(&stageLog)); 1670 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1671 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1672 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1673 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1674 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1675 if (numStages > 0) { 1676 stageInfo = stageLog->stageInfo; 1677 for (stage = 0; stage < numStages; stage++) { 1678 if (stage < stageLog->numStages) { 1679 localStageUsed[stage] = stageInfo[stage].used; 1680 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1681 } else { 1682 localStageUsed[stage] = PETSC_FALSE; 1683 localStageVisible[stage] = PETSC_TRUE; 1684 } 1685 } 1686 PetscCallMPI(MPI_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1687 PetscCallMPI(MPI_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1688 for (stage = 0; stage < numStages; stage++) { 1689 if (stageUsed[stage]) { 1690 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1691 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1692 break; 1693 } 1694 } 1695 for (stage = 0; stage < numStages; stage++) { 1696 if (!stageUsed[stage]) continue; 1697 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1698 if (localStageUsed[stage]) { 1699 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1700 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1701 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1702 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1703 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1704 name = stageInfo[stage].name; 1705 } else { 1706 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1707 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1708 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1709 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1710 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1711 name = ""; 1712 } 1713 mess *= 0.5; messLen *= 0.5; red /= size; 1714 if (TotalTime != 0.0) fracTime = stageTime/TotalTime; else fracTime = 0.0; 1715 if (TotalFlops != 0.0) fracFlops = flops/TotalFlops; else fracFlops = 0.0; 1716 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1717 if (numMessages != 0.0) fracMessages = mess/numMessages; else fracMessages = 0.0; 1718 if (mess != 0.0) avgMessLen = messLen/mess; else avgMessLen = 0.0; 1719 if (messageLength != 0.0) fracLength = messLen/messageLength; else fracLength = 0.0; 1720 if (numReductions != 0.0) fracReductions = red/numReductions; else fracReductions = 0.0; 1721 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", 1722 stage, name, stageTime/size, 100.0*fracTime, flops, 100.0*fracFlops, 1723 mess, 100.0*fracMessages, avgMessLen, 100.0*fracLength, red, 100.0*fracReductions)); 1724 } 1725 } 1726 1727 PetscCall(PetscFPrintf(comm, fd,"\n------------------------------------------------------------------------------------------------------------------------\n")); 1728 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1729 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1730 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1731 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1732 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1733 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1734 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1735 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1736 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1737 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1738 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1739 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1740 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1741 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1742 if (PetscLogMemory) { 1743 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1744 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1745 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1746 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1747 } 1748 #if defined(PETSC_HAVE_DEVICE) 1749 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1750 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1751 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1752 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1753 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1754 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1755 #endif 1756 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1757 1758 PetscCall(PetscLogViewWarnDebugging(comm,fd)); 1759 1760 /* Report events */ 1761 PetscCall(PetscFPrintf(comm, fd,"Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1762 if (PetscLogMemory) { 1763 PetscCall(PetscFPrintf(comm, fd," Malloc EMalloc MMalloc RMI")); 1764 } 1765 #if defined(PETSC_HAVE_DEVICE) 1766 PetscCall(PetscFPrintf(comm, fd," GPU - CpuToGpu - - GpuToCpu - GPU")); 1767 #endif 1768 PetscCall(PetscFPrintf(comm, fd,"\n")); 1769 PetscCall(PetscFPrintf(comm, fd," Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1770 if (PetscLogMemory) { 1771 PetscCall(PetscFPrintf(comm, fd," Mbytes Mbytes Mbytes Mbytes")); 1772 } 1773 #if defined(PETSC_HAVE_DEVICE) 1774 PetscCall(PetscFPrintf(comm, fd," Mflop/s Count Size Count Size %%F")); 1775 #endif 1776 PetscCall(PetscFPrintf(comm, fd,"\n")); 1777 PetscCall(PetscFPrintf(comm, fd,"------------------------------------------------------------------------------------------------------------------------")); 1778 if (PetscLogMemory) { 1779 PetscCall(PetscFPrintf(comm, fd,"-----------------------------")); 1780 } 1781 #if defined(PETSC_HAVE_DEVICE) 1782 PetscCall(PetscFPrintf(comm, fd,"---------------------------------------")); 1783 #endif 1784 PetscCall(PetscFPrintf(comm, fd,"\n")); 1785 1786 #if defined(PETSC_HAVE_DEVICE) 1787 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1788 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1789 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1790 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1791 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1792 #endif 1793 1794 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1795 for (stage = 0; stage < numStages; stage++) { 1796 if (!stageVisible[stage]) continue; 1797 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1798 if (localStageUsed[stage]) { 1799 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1800 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1801 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1802 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1803 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1804 PetscCallMPI(MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1805 } else { 1806 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1807 PetscCallMPI(MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1808 PetscCallMPI(MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1809 PetscCallMPI(MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1810 PetscCallMPI(MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1811 PetscCallMPI(MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1812 } 1813 mess *= 0.5; messLen *= 0.5; red /= size; 1814 1815 /* Get total number of events in this stage -- 1816 Currently, a single processor can register more events than another, but events must all be registered in order, 1817 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1818 on the event ID. This seems best accomplished by associating a communicator with each stage. 1819 1820 Problem: If the event did not happen on proc 1, its name will not be available. 1821 Problem: Event visibility is not implemented 1822 */ 1823 if (localStageUsed[stage]) { 1824 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1825 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1826 } else localNumEvents = 0; 1827 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1828 for (event = 0; event < numEvents; event++) { 1829 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1830 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1831 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; else flopr = 0.0; 1832 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1833 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1834 PetscCallMPI(MPI_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1835 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1836 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1837 PetscCallMPI(MPI_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1838 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1839 PetscCallMPI(MPI_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1840 PetscCallMPI(MPI_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1841 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1842 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1843 if (PetscLogMemory) { 1844 PetscCallMPI(MPI_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1845 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1846 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncrease, &malmax,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1847 PetscCallMPI(MPI_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1848 } 1849 #if defined(PETSC_HAVE_DEVICE) 1850 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1851 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1852 PetscCallMPI(MPI_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1853 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1854 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuFlops, &gflops,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1855 PetscCallMPI(MPI_Allreduce(&eventInfo[event].GpuTime, &gmaxt ,1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1856 #endif 1857 name = stageLog->eventLog->eventInfo[event].name; 1858 } else { 1859 flopr = 0.0; 1860 PetscCallMPI(MPI_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1861 PetscCallMPI(MPI_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1862 PetscCallMPI(MPI_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1863 PetscCallMPI(MPI_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1864 PetscCallMPI(MPI_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1865 PetscCallMPI(MPI_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1866 PetscCallMPI(MPI_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1867 PetscCallMPI(MPI_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1868 PetscCallMPI(MPI_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1869 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1870 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1871 if (PetscLogMemory) { 1872 PetscCallMPI(MPI_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1873 PetscCallMPI(MPI_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1874 PetscCallMPI(MPI_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1875 PetscCallMPI(MPI_Allreduce(&zero, &emalmax,1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1876 } 1877 #if defined(PETSC_HAVE_DEVICE) 1878 PetscCallMPI(MPI_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1879 PetscCallMPI(MPI_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1880 PetscCallMPI(MPI_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1881 PetscCallMPI(MPI_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1882 PetscCallMPI(MPI_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1883 PetscCallMPI(MPI_Allreduce(&zero, &gmaxt , 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1884 #endif 1885 name = ""; 1886 } 1887 if (mint < 0.0) { 1888 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n",mint,name)); 1889 mint = 0; 1890 } 1891 PetscCheck(minf >= 0.0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Minimum flop %g over all processors for %s is negative! Not possible!",minf,name); 1892 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1893 #if defined(PETSC_HAVE_DEVICE) 1894 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1895 memcpy(&gmaxt,&nas,sizeof(PetscLogDouble)); 1896 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 1897 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 1898 memcpy(&mint,&nas,sizeof(PetscLogDouble)); 1899 memcpy(&maxt,&nas,sizeof(PetscLogDouble)); 1900 } 1901 } 1902 #endif 1903 totm *= 0.5; totml *= 0.5; totr /= size; 1904 1905 if (maxC != 0) { 1906 if (minC != 0) ratC = ((PetscLogDouble)maxC)/minC;else ratC = 0.0; 1907 if (mint != 0.0) ratt = maxt/mint; else ratt = 0.0; 1908 if (minf != 0.0) ratf = maxf/minf; else ratf = 0.0; 1909 if (TotalTime != 0.0) fracTime = tott/TotalTime; else fracTime = 0.0; 1910 if (TotalFlops != 0.0) fracFlops = totf/TotalFlops; else fracFlops = 0.0; 1911 if (stageTime != 0.0) fracStageTime = tott/stageTime; else fracStageTime = 0.0; 1912 if (flops != 0.0) fracStageFlops = totf/flops; else fracStageFlops = 0.0; 1913 if (numMessages != 0.0) fracMess = totm/numMessages; else fracMess = 0.0; 1914 if (messageLength != 0.0) fracMessLen = totml/messageLength; else fracMessLen = 0.0; 1915 if (numReductions != 0.0) fracRed = totr/numReductions; else fracRed = 0.0; 1916 if (mess != 0.0) fracStageMess = totm/mess; else fracStageMess = 0.0; 1917 if (messLen != 0.0) fracStageMessLen = totml/messLen; else fracStageMessLen = 0.0; 1918 if (red != 0.0) fracStageRed = totr/red; else fracStageRed = 0.0; 1919 if (totm != 0.0) totml /= totm; else totml = 0.0; 1920 if (maxt != 0.0) flopr = totf/maxt; else flopr = 0.0; 1921 if (fracStageTime > 1.00) PetscCall(PetscFPrintf(comm, fd,"Warning -- total time of event greater than time of entire stage -- something is wrong with the timer\n")); 1922 PetscCall(PetscFPrintf(comm, fd, 1923 "%-16s %7d%4.1f %5.4e%4.1f %3.2e%4.1f %2.1e %2.1e %2.1e%3.0f%3.0f%3.0f%3.0f%3.0f %3.0f%3.0f%3.0f%3.0f%3.0f %5.0f", 1924 name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 1925 100.0*fracTime, 100.0*fracFlops, 100.0*fracMess, 100.0*fracMessLen, 100.0*fracRed, 1926 100.0*fracStageTime, 100.0*fracStageFlops, 100.0*fracStageMess, 100.0*fracStageMessLen, 100.0*fracStageRed, 1927 PetscAbs(flopr)/1.0e6)); 1928 if (PetscLogMemory) { 1929 PetscCall(PetscFPrintf(comm, fd," %5.0f %5.0f %5.0f %5.0f",mal/1.0e6,emalmax/1.0e6,malmax/1.0e6,mem/1.0e6)); 1930 } 1931 #if defined(PETSC_HAVE_DEVICE) 1932 if (totf != 0.0) fracgflops = gflops/totf; else fracgflops = 0.0; 1933 if (gmaxt != 0.0) gflopr = gflops/gmaxt; else gflopr = 0.0; 1934 PetscCall(PetscFPrintf(comm, fd," %5.0f %4.0f %3.2e %4.0f %3.2e% 3.0f",PetscAbs(gflopr)/1.0e6,cct/size,csz/(1.0e6*size),gct/size,gsz/(1.0e6*size),100.0*fracgflops)); 1935 #endif 1936 PetscCall(PetscFPrintf(comm, fd,"\n")); 1937 } 1938 } 1939 } 1940 1941 /* Memory usage and object creation */ 1942 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1943 if (PetscLogMemory) { 1944 PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1945 } 1946 #if defined(PETSC_HAVE_DEVICE) 1947 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1948 #endif 1949 PetscCall(PetscFPrintf(comm, fd, "\n")); 1950 PetscCall(PetscFPrintf(comm, fd, "\n")); 1951 1952 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1953 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1954 stats for stages local to processor sets. 1955 */ 1956 /* We should figure out the longest object name here (now 20 characters) */ 1957 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1958 for (stage = 0; stage < numStages; stage++) { 1959 if (localStageUsed[stage]) { 1960 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 1961 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1962 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 1963 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 1964 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name,classInfo[oclass].creations, classInfo[oclass].destructions)); 1965 } 1966 } 1967 } else { 1968 if (!localStageVisible[stage]) continue; 1969 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1970 } 1971 } 1972 1973 PetscCall(PetscFree(localStageUsed)); 1974 PetscCall(PetscFree(stageUsed)); 1975 PetscCall(PetscFree(localStageVisible)); 1976 PetscCall(PetscFree(stageVisible)); 1977 1978 /* Information unrelated to this particular run */ 1979 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1980 PetscTime(&y); 1981 PetscTime(&x); 1982 PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); 1983 PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); PetscTime(&y); 1984 PetscCall(PetscFPrintf(comm,fd,"Average time to get PetscTime(): %g\n", (y-x)/10.0)); 1985 /* MPI information */ 1986 if (size > 1) { 1987 MPI_Status status; 1988 PetscMPIInt tag; 1989 MPI_Comm newcomm; 1990 1991 PetscCallMPI(MPI_Barrier(comm)); 1992 PetscTime(&x); 1993 PetscCallMPI(MPI_Barrier(comm)); 1994 PetscCallMPI(MPI_Barrier(comm)); 1995 PetscCallMPI(MPI_Barrier(comm)); 1996 PetscCallMPI(MPI_Barrier(comm)); 1997 PetscCallMPI(MPI_Barrier(comm)); 1998 PetscTime(&y); 1999 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y-x)/5.0)); 2000 PetscCall(PetscCommDuplicate(comm,&newcomm, &tag)); 2001 PetscCallMPI(MPI_Barrier(comm)); 2002 if (rank) { 2003 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank-1, tag, newcomm, &status)); 2004 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank+1)%size, tag, newcomm)); 2005 } else { 2006 PetscTime(&x); 2007 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 2008 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size-1, tag, newcomm, &status)); 2009 PetscTime(&y); 2010 PetscCall(PetscFPrintf(comm,fd,"Average time for zero size MPI_Send(): %g\n", (y-x)/size)); 2011 } 2012 PetscCall(PetscCommDestroy(&newcomm)); 2013 } 2014 PetscCall(PetscOptionsView(NULL,viewer)); 2015 2016 /* Machine and compile information */ 2017 #if defined(PETSC_USE_FORTRAN_KERNELS) 2018 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 2019 #else 2020 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 2021 #endif 2022 #if defined(PETSC_USE_64BIT_INDICES) 2023 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64 bit PetscInt\n")); 2024 #elif defined(PETSC_USE___FLOAT128) 2025 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32 bit PetscInt\n")); 2026 #endif 2027 #if defined(PETSC_USE_REAL_SINGLE) 2028 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2029 #elif defined(PETSC_USE___FLOAT128) 2030 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2031 #endif 2032 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2033 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2034 #else 2035 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2036 #endif 2037 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", 2038 (int) sizeof(short), (int) sizeof(int), (int) sizeof(long), (int) sizeof(void*),(int) sizeof(PetscScalar),(int) sizeof(PetscInt))); 2039 2040 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s",petscconfigureoptions)); 2041 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2042 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2043 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2044 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2045 2046 /* Cleanup */ 2047 PetscCall(PetscFPrintf(comm, fd, "\n")); 2048 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm,fd)); 2049 PetscCall(PetscLogViewWarnDebugging(comm,fd)); 2050 PetscCall(PetscFPTrapPop()); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 /*@C 2055 PetscLogView - Prints a summary of the logging. 2056 2057 Collective over MPI_Comm 2058 2059 Input Parameter: 2060 . viewer - an ASCII viewer 2061 2062 Options Database Keys: 2063 + -log_view [:filename] - Prints summary of log information 2064 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2065 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2066 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2067 . -log_view_memory - Also display memory usage in each event 2068 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2069 . -log_all - Saves a file Log.rank for each MPI process with details of each step of the computation 2070 - -log_trace [filename] - Displays a trace of what each process is doing 2071 2072 Notes: 2073 It is possible to control the logging programatically but we recommend using the options database approach whenever possible 2074 By default the summary is printed to stdout. 2075 2076 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2077 2078 If PETSc is configured with --with-logging=0 then this functionality is not available 2079 2080 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2081 directory then open filename.xml with your browser. Specific notes for certain browsers 2082 $ Firefox and Internet explorer - simply open the file 2083 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2084 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2085 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2086 your browser. 2087 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2088 window and render the XML log file contents. 2089 2090 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2091 2092 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2093 or using speedscope (https://www.speedscope.app). 2094 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2095 2096 Level: beginner 2097 2098 .seealso: `PetscLogDefaultBegin()`, `PetscLogDump()` 2099 @*/ 2100 PetscErrorCode PetscLogView(PetscViewer viewer) 2101 { 2102 PetscBool isascii; 2103 PetscViewerFormat format; 2104 int stage, lastStage; 2105 PetscStageLog stageLog; 2106 2107 PetscFunctionBegin; 2108 PetscCheck(PetscLogPLB,PETSC_COMM_SELF,PETSC_ERR_SUP,"Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2109 /* Pop off any stages the user forgot to remove */ 2110 lastStage = 0; 2111 PetscCall(PetscLogGetStageLog(&stageLog)); 2112 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2113 while (stage >= 0) { 2114 lastStage = stage; 2115 PetscCall(PetscStageLogPop(stageLog)); 2116 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2117 } 2118 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); 2119 PetscCheck(isascii,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Currently can only view logging to ASCII"); 2120 PetscCall(PetscViewerGetFormat(viewer,&format)); 2121 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2122 PetscCall(PetscLogView_Default(viewer)); 2123 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2124 PetscCall(PetscLogView_Detailed(viewer)); 2125 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2126 PetscCall(PetscLogView_CSV(viewer)); 2127 } else if (format == PETSC_VIEWER_ASCII_XML) { 2128 PetscCall(PetscLogView_Nested(viewer)); 2129 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2130 PetscCall(PetscLogView_Flamegraph(viewer)); 2131 } 2132 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 /*@C 2137 PetscLogViewFromOptions - Processes command line options to determine if/how a PetscLog is to be viewed. 2138 2139 Collective on PETSC_COMM_WORLD 2140 2141 Not normally called by user 2142 2143 Level: intermediate 2144 2145 @*/ 2146 PetscErrorCode PetscLogViewFromOptions(void) 2147 { 2148 PetscViewer viewer; 2149 PetscBool flg; 2150 PetscViewerFormat format; 2151 2152 PetscFunctionBegin; 2153 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD,NULL,NULL,"-log_view",&viewer,&format,&flg)); 2154 if (flg) { 2155 PetscCall(PetscViewerPushFormat(viewer,format)); 2156 PetscCall(PetscLogView(viewer)); 2157 PetscCall(PetscViewerPopFormat(viewer)); 2158 PetscCall(PetscViewerDestroy(&viewer)); 2159 } 2160 PetscFunctionReturn(0); 2161 } 2162 2163 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2164 /*@C 2165 PetscGetFlops - Returns the number of flops used on this processor 2166 since the program began. 2167 2168 Not Collective 2169 2170 Output Parameter: 2171 flops - number of floating point operations 2172 2173 Notes: 2174 A global counter logs all PETSc flop counts. The user can use 2175 PetscLogFlops() to increment this counter to include flops for the 2176 application code. 2177 2178 Level: intermediate 2179 2180 .seealso: `PetscTime()`, `PetscLogFlops()` 2181 @*/ 2182 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2183 { 2184 PetscFunctionBegin; 2185 *flops = petsc_TotalFlops; 2186 PetscFunctionReturn(0); 2187 } 2188 2189 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2190 { 2191 size_t fullLength; 2192 va_list Argp; 2193 2194 PetscFunctionBegin; 2195 if (!petsc_logObjects) PetscFunctionReturn(0); 2196 va_start(Argp, format); 2197 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64,format,&fullLength, Argp)); 2198 va_end(Argp); 2199 PetscFunctionReturn(0); 2200 } 2201 2202 /*MC 2203 PetscLogFlops - Adds floating point operations to the global counter. 2204 2205 Synopsis: 2206 #include <petsclog.h> 2207 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2208 2209 Not Collective 2210 2211 Input Parameter: 2212 . f - flop counter 2213 2214 Usage: 2215 .vb 2216 PetscLogEvent USER_EVENT; 2217 PetscLogEventRegister("User event",0,&USER_EVENT); 2218 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2219 [code segment to monitor] 2220 PetscLogFlops(user_flops) 2221 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2222 .ve 2223 2224 Notes: 2225 A global counter logs all PETSc flop counts. The user can use 2226 PetscLogFlops() to increment this counter to include flops for the 2227 application code. 2228 2229 Level: intermediate 2230 2231 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2232 2233 M*/ 2234 2235 /*MC 2236 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2237 to get accurate timings 2238 2239 Synopsis: 2240 #include <petsclog.h> 2241 void PetscPreLoadBegin(PetscBool flag,char *name); 2242 2243 Not Collective 2244 2245 Input Parameters: 2246 + flag - PETSC_TRUE to run twice, PETSC_FALSE to run once, may be overridden 2247 with command line option -preload true or -preload false 2248 - name - name of first stage (lines of code timed separately with -log_view) to 2249 be preloaded 2250 2251 Usage: 2252 .vb 2253 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2254 lines of code 2255 PetscPreLoadStage("second stage"); 2256 lines of code 2257 PetscPreLoadEnd(); 2258 .ve 2259 2260 Notes: 2261 Only works in C/C++, not Fortran 2262 2263 Flags available within the macro. 2264 + PetscPreLoadingUsed - true if we are or have done preloading 2265 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2266 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2267 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2268 The first two variables are available throughout the program, the second two only between the PetscPreLoadBegin() 2269 and PetscPreLoadEnd() 2270 2271 Level: intermediate 2272 2273 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2274 2275 M*/ 2276 2277 /*MC 2278 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2279 to get accurate timings 2280 2281 Synopsis: 2282 #include <petsclog.h> 2283 void PetscPreLoadEnd(void); 2284 2285 Not Collective 2286 2287 Usage: 2288 .vb 2289 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2290 lines of code 2291 PetscPreLoadStage("second stage"); 2292 lines of code 2293 PetscPreLoadEnd(); 2294 .ve 2295 2296 Notes: 2297 only works in C/C++ not fortran 2298 2299 Level: intermediate 2300 2301 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2302 2303 M*/ 2304 2305 /*MC 2306 PetscPreLoadStage - Start a new segment of code to be timed separately. 2307 to get accurate timings 2308 2309 Synopsis: 2310 #include <petsclog.h> 2311 void PetscPreLoadStage(char *name); 2312 2313 Not Collective 2314 2315 Usage: 2316 .vb 2317 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2318 lines of code 2319 PetscPreLoadStage("second stage"); 2320 lines of code 2321 PetscPreLoadEnd(); 2322 .ve 2323 2324 Notes: 2325 only works in C/C++ not fortran 2326 2327 Level: intermediate 2328 2329 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2330 2331 M*/ 2332 2333 #if PetscDefined(HAVE_DEVICE) 2334 #include <petsc/private/deviceimpl.h> 2335 2336 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2337 2338 /* 2339 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2340 because it will result in timing results that cannot be interpreted. 2341 */ 2342 static PetscErrorCode PetscLogGpuTime_Off(void) 2343 { 2344 PetscLogGpuTimeFlag = PETSC_FALSE; 2345 return 0; 2346 } 2347 2348 /*@C 2349 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2350 2351 Options Database: 2352 . -log_view_gpu_time - provide the GPU times in the -log_view output 2353 2354 Notes: 2355 Because the logging of GPU time requires blocking the CPU execution for each kernel, turning on the timing of the 2356 GPU kernels can slow down the entire computation and should only be used when studying the performance 2357 of operations on GPU such as vector operations and matrix-vector operations. 2358 2359 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2360 2361 Level: advanced 2362 2363 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2364 @*/ 2365 PetscErrorCode PetscLogGpuTime(void) 2366 { 2367 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2368 PetscLogGpuTimeFlag = PETSC_TRUE; 2369 return 0; 2370 } 2371 2372 /*@C 2373 PetscLogGpuTimeBegin - Start timer for device 2374 2375 Notes: 2376 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2377 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2378 There is no need to call WaitForCUDA() or WaitForHIP() between PetscLogGpuTimeBegin and PetscLogGpuTimeEnd 2379 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2380 The regular logging captures the time for data transfers and any CPU activites during the event 2381 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2382 2383 Developer Notes: 2384 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between PetscLogGpuTimeBegin() and PetsLogGpuTimeEnd(). 2385 PetscLogGpuTimeBegin() and PetsLogGpuTimeEnd() insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the event when it reaches that event in the stream. The function xxxEventSynchronize() is called in PetsLogGpuTimeEnd() to block CPU execution, but not continued GPU excution, until the timer event is recorded. 2386 2387 Level: intermediate 2388 2389 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2390 @*/ 2391 PetscErrorCode PetscLogGpuTimeBegin(void) 2392 { 2393 PetscFunctionBegin; 2394 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2395 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2396 PetscDeviceContext dctx; 2397 2398 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2399 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2400 } else { 2401 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2402 } 2403 PetscFunctionReturn(0); 2404 } 2405 2406 /*@C 2407 PetscLogGpuTimeEnd - Stop timer for device 2408 2409 Level: intermediate 2410 2411 .seealso: `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2412 @*/ 2413 PetscErrorCode PetscLogGpuTimeEnd(void) 2414 { 2415 PetscFunctionBegin; 2416 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(0); 2417 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2418 PetscDeviceContext dctx; 2419 PetscLogDouble elapsed; 2420 2421 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2422 PetscCall(PetscDeviceContextEndTimer_Internal(dctx,&elapsed)); 2423 petsc_gtime += (elapsed/1000.0); 2424 } else { 2425 PetscCall(PetscTimeAdd(&petsc_gtime)); 2426 } 2427 PetscFunctionReturn(0); 2428 } 2429 #endif /* end of PETSC_HAVE_DEVICE */ 2430 2431 #else /* end of -DPETSC_USE_LOG section */ 2432 2433 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2434 { 2435 PetscFunctionBegin; 2436 PetscFunctionReturn(0); 2437 } 2438 2439 #endif /* PETSC_USE_LOG*/ 2440 2441 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2442 PetscClassId PETSC_OBJECT_CLASSID = 0; 2443 2444 /*@C 2445 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2446 2447 Not Collective 2448 2449 Input Parameter: 2450 . name - The class name 2451 2452 Output Parameter: 2453 . oclass - The class id or classid 2454 2455 Level: developer 2456 2457 @*/ 2458 PetscErrorCode PetscClassIdRegister(const char name[],PetscClassId *oclass) 2459 { 2460 #if defined(PETSC_USE_LOG) 2461 PetscStageLog stageLog; 2462 PetscInt stage; 2463 #endif 2464 2465 PetscFunctionBegin; 2466 *oclass = ++PETSC_LARGEST_CLASSID; 2467 #if defined(PETSC_USE_LOG) 2468 PetscCall(PetscLogGetStageLog(&stageLog)); 2469 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2470 for (stage = 0; stage < stageLog->numStages; stage++) { 2471 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2472 } 2473 #endif 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2478 #include <mpe.h> 2479 2480 PetscBool PetscBeganMPE = PETSC_FALSE; 2481 2482 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent,int,PetscObject,PetscObject,PetscObject,PetscObject); 2483 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent,int,PetscObject,PetscObject,PetscObject,PetscObject); 2484 2485 /*@C 2486 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2487 and slows the program down. 2488 2489 Collective over PETSC_COMM_WORLD 2490 2491 Options Database Keys: 2492 . -log_mpe - Prints extensive log information 2493 2494 Notes: 2495 A related routine is PetscLogDefaultBegin() (with the options key -log_view), which is 2496 intended for production runs since it logs only flop rates and object 2497 creation (and should not significantly slow the programs). 2498 2499 Level: advanced 2500 2501 .seealso: `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2502 `PetscLogEventDeactivate()` 2503 @*/ 2504 PetscErrorCode PetscLogMPEBegin(void) 2505 { 2506 PetscFunctionBegin; 2507 /* Do MPE initialization */ 2508 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2509 PetscCall(PetscInfo(0,"Initializing MPE.\n")); 2510 PetscCall(MPE_Init_log()); 2511 2512 PetscBeganMPE = PETSC_TRUE; 2513 } else { 2514 PetscCall(PetscInfo(0,"MPE already initialized. Not attempting to reinitialize.\n")); 2515 } 2516 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2517 PetscFunctionReturn(0); 2518 } 2519 2520 /*@C 2521 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2522 2523 Collective over PETSC_COMM_WORLD 2524 2525 Level: advanced 2526 2527 .seealso: `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2528 @*/ 2529 PetscErrorCode PetscLogMPEDump(const char sname[]) 2530 { 2531 char name[PETSC_MAX_PATH_LEN]; 2532 2533 PetscFunctionBegin; 2534 if (PetscBeganMPE) { 2535 PetscCall(PetscInfo(0,"Finalizing MPE.\n")); 2536 if (sname) { 2537 PetscCall(PetscStrcpy(name,sname)); 2538 } else { 2539 PetscCall(PetscGetProgramName(name,sizeof(name))); 2540 } 2541 PetscCall(MPE_Finish_log(name)); 2542 } else { 2543 PetscCall(PetscInfo(0,"Not finalizing MPE (not started by PETSc).\n")); 2544 } 2545 PetscFunctionReturn(0); 2546 } 2547 2548 #define PETSC_RGB_COLORS_MAX 39 2549 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = { 2550 "OliveDrab: ", 2551 "BlueViolet: ", 2552 "CadetBlue: ", 2553 "CornflowerBlue: ", 2554 "DarkGoldenrod: ", 2555 "DarkGreen: ", 2556 "DarkKhaki: ", 2557 "DarkOliveGreen: ", 2558 "DarkOrange: ", 2559 "DarkOrchid: ", 2560 "DarkSeaGreen: ", 2561 "DarkSlateGray: ", 2562 "DarkTurquoise: ", 2563 "DeepPink: ", 2564 "DarkKhaki: ", 2565 "DimGray: ", 2566 "DodgerBlue: ", 2567 "GreenYellow: ", 2568 "HotPink: ", 2569 "IndianRed: ", 2570 "LavenderBlush: ", 2571 "LawnGreen: ", 2572 "LemonChiffon: ", 2573 "LightCoral: ", 2574 "LightCyan: ", 2575 "LightPink: ", 2576 "LightSalmon: ", 2577 "LightSlateGray: ", 2578 "LightYellow: ", 2579 "LimeGreen: ", 2580 "MediumPurple: ", 2581 "MediumSeaGreen: ", 2582 "MediumSlateBlue:", 2583 "MidnightBlue: ", 2584 "MintCream: ", 2585 "MistyRose: ", 2586 "NavajoWhite: ", 2587 "NavyBlue: ", 2588 "OliveDrab: " 2589 }; 2590 2591 /*@C 2592 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with PetscLogEventRegister() 2593 2594 Not collective. Maybe it should be? 2595 2596 Output Parameter: 2597 . str - character string representing the color 2598 2599 Level: developer 2600 2601 .seealso: `PetscLogEventRegister` 2602 @*/ 2603 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2604 { 2605 static int idx = 0; 2606 2607 PetscFunctionBegin; 2608 *str = PetscLogMPERGBColors[idx]; 2609 idx = (idx + 1)% PETSC_RGB_COLORS_MAX; 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2614