1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 #include <petscdevice.h> 15 #include <petsc/private/deviceimpl.h> 16 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 17 #include <../src/sys/perfstubs/timer.h> 18 #endif 19 20 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 21 22 #if defined(PETSC_USE_LOG) 23 #include <petscmachineinfo.h> 24 #include <petscconfiginfo.h> 25 26 #if defined(PETSC_HAVE_THREADSAFETY) 27 28 PetscInt petsc_log_gid = -1; /* Global threadId counter */ 29 PETSC_TLS PetscInt petsc_log_tid = -1; /* Local threadId */ 30 31 /* shared variables */ 32 PetscSpinlock PetscLogSpinLock; 33 PetscHMapEvent eventInfoMap_th = NULL; 34 35 #endif 36 37 /* used in the MPI_XXX() count macros in petsclog.h */ 38 39 /* Action and object logging variables */ 40 Action *petsc_actions = NULL; 41 Object *petsc_objects = NULL; 42 PetscBool petsc_logActions = PETSC_FALSE; 43 PetscBool petsc_logObjects = PETSC_FALSE; 44 int petsc_numActions = 0, petsc_maxActions = 100; 45 int petsc_numObjects = 0, petsc_maxObjects = 100; 46 int petsc_numObjectsDestroyed = 0; 47 48 /* Global counters */ 49 PetscLogDouble petsc_BaseTime = 0.0; 50 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 51 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 52 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 53 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 54 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 55 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 56 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 57 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 58 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 59 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 60 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 61 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 62 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 63 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 64 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 65 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 66 67 /* Thread Local storage */ 68 PETSC_TLS PetscLogDouble petsc_TotalFlops_th = 0.0; 69 PETSC_TLS PetscLogDouble petsc_send_ct_th = 0.0; 70 PETSC_TLS PetscLogDouble petsc_recv_ct_th = 0.0; 71 PETSC_TLS PetscLogDouble petsc_send_len_th = 0.0; 72 PETSC_TLS PetscLogDouble petsc_recv_len_th = 0.0; 73 PETSC_TLS PetscLogDouble petsc_isend_ct_th = 0.0; 74 PETSC_TLS PetscLogDouble petsc_irecv_ct_th = 0.0; 75 PETSC_TLS PetscLogDouble petsc_isend_len_th = 0.0; 76 PETSC_TLS PetscLogDouble petsc_irecv_len_th = 0.0; 77 PETSC_TLS PetscLogDouble petsc_wait_ct_th = 0.0; 78 PETSC_TLS PetscLogDouble petsc_wait_any_ct_th = 0.0; 79 PETSC_TLS PetscLogDouble petsc_wait_all_ct_th = 0.0; 80 PETSC_TLS PetscLogDouble petsc_sum_of_waits_ct_th = 0.0; 81 PETSC_TLS PetscLogDouble petsc_allreduce_ct_th = 0.0; 82 PETSC_TLS PetscLogDouble petsc_gather_ct_th = 0.0; 83 PETSC_TLS PetscLogDouble petsc_scatter_ct_th = 0.0; 84 85 #if defined(PETSC_HAVE_DEVICE) 86 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 87 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 88 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 89 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 90 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 91 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 92 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 93 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 94 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 95 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 96 97 PETSC_TLS PetscLogDouble petsc_ctog_ct_th = 0.0; 98 PETSC_TLS PetscLogDouble petsc_gtoc_ct_th = 0.0; 99 PETSC_TLS PetscLogDouble petsc_ctog_sz_th = 0.0; 100 PETSC_TLS PetscLogDouble petsc_gtoc_sz_th = 0.0; 101 PETSC_TLS PetscLogDouble petsc_ctog_ct_scalar_th = 0.0; 102 PETSC_TLS PetscLogDouble petsc_gtoc_ct_scalar_th = 0.0; 103 PETSC_TLS PetscLogDouble petsc_ctog_sz_scalar_th = 0.0; 104 PETSC_TLS PetscLogDouble petsc_gtoc_sz_scalar_th = 0.0; 105 PETSC_TLS PetscLogDouble petsc_gflops_th = 0.0; 106 PETSC_TLS PetscLogDouble petsc_gtime_th = 0.0; 107 #endif 108 109 #if defined(PETSC_HAVE_THREADSAFETY) 110 PetscErrorCode PetscAddLogDouble(PetscLogDouble *tot, PetscLogDouble *tot_th, PetscLogDouble tmp) 111 { 112 *tot_th += tmp; 113 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 114 *tot += tmp; 115 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 116 return PETSC_SUCCESS; 117 } 118 119 PetscErrorCode PetscAddLogDoubleCnt(PetscLogDouble *cnt, PetscLogDouble *tot, PetscLogDouble *cnt_th, PetscLogDouble *tot_th, PetscLogDouble tmp) 120 { 121 *cnt_th = *cnt_th + 1; 122 *tot_th += tmp; 123 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 124 *tot += (PetscLogDouble)(tmp); 125 *cnt += *cnt + 1; 126 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 127 return PETSC_SUCCESS; 128 } 129 130 PetscInt PetscLogGetTid(void) 131 { 132 if (petsc_log_tid < 0) { 133 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 134 petsc_log_tid = ++petsc_log_gid; 135 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 136 } 137 return petsc_log_tid; 138 } 139 140 #endif 141 142 /* Logging functions */ 143 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 144 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 145 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 146 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 147 148 /* Tracing event logging variables */ 149 FILE *petsc_tracefile = NULL; 150 int petsc_tracelevel = 0; 151 const char *petsc_traceblanks = " "; 152 char petsc_tracespace[128] = " "; 153 PetscLogDouble petsc_tracetime = 0.0; 154 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 155 156 static PetscIntStack current_log_event_stack = NULL; 157 158 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 159 { 160 int stage; 161 PetscBool opt; 162 163 PetscFunctionBegin; 164 if (PetscLogInitializeCalled) PetscFunctionReturn(PETSC_SUCCESS); 165 PetscLogInitializeCalled = PETSC_TRUE; 166 167 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 168 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 169 if (opt) petsc_logActions = PETSC_FALSE; 170 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 171 if (opt) petsc_logObjects = PETSC_FALSE; 172 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 173 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 174 PetscLogPHC = PetscLogObjCreateDefault; 175 PetscLogPHD = PetscLogObjDestroyDefault; 176 /* Setup default logging structures */ 177 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 178 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 179 180 PetscCall(PetscSpinlockCreate(&PetscLogSpinLock)); 181 #if defined(PETSC_HAVE_THREADSAFETY) 182 petsc_log_tid = 0; 183 petsc_log_gid = 0; 184 PetscCall(PetscHMapEventCreate(&eventInfoMap_th)); 185 #endif 186 187 /* All processors sync here for more consistent logging */ 188 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 189 PetscCall(PetscTime(&petsc_BaseTime)); 190 PetscCall(PetscLogStagePush(stage)); 191 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 192 PetscStackCallExternalVoid("ps_initialize_", ps_initialize_()); 193 #endif 194 PetscFunctionReturn(PETSC_SUCCESS); 195 } 196 197 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 198 { 199 PetscStageLog stageLog; 200 201 PetscFunctionBegin; 202 #if defined(PETSC_HAVE_THREADSAFETY) 203 if (eventInfoMap_th) { 204 PetscEventPerfInfo **array; 205 PetscInt n, off = 0; 206 207 PetscCall(PetscHMapEventGetSize(eventInfoMap_th, &n)); 208 PetscCall(PetscMalloc1(n, &array)); 209 PetscCall(PetscHMapEventGetVals(eventInfoMap_th, &off, array)); 210 for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i])); 211 PetscCall(PetscFree(array)); 212 PetscCall(PetscHMapEventDestroy(&eventInfoMap_th)); 213 } 214 #endif 215 PetscCall(PetscFree(petsc_actions)); 216 PetscCall(PetscFree(petsc_objects)); 217 PetscCall(PetscLogNestedEnd()); 218 PetscCall(PetscLogSet(NULL, NULL)); 219 220 /* Resetting phase */ 221 PetscCall(PetscLogGetStageLog(&stageLog)); 222 PetscCall(PetscStageLogDestroy(stageLog)); 223 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 224 current_log_event_stack = NULL; 225 226 petsc_TotalFlops = 0.0; 227 petsc_numActions = 0; 228 petsc_numObjects = 0; 229 petsc_numObjectsDestroyed = 0; 230 petsc_maxActions = 100; 231 petsc_maxObjects = 100; 232 petsc_actions = NULL; 233 petsc_objects = NULL; 234 petsc_logActions = PETSC_FALSE; 235 petsc_logObjects = PETSC_FALSE; 236 petsc_BaseTime = 0.0; 237 petsc_TotalFlops = 0.0; 238 petsc_send_ct = 0.0; 239 petsc_recv_ct = 0.0; 240 petsc_send_len = 0.0; 241 petsc_recv_len = 0.0; 242 petsc_isend_ct = 0.0; 243 petsc_irecv_ct = 0.0; 244 petsc_isend_len = 0.0; 245 petsc_irecv_len = 0.0; 246 petsc_wait_ct = 0.0; 247 petsc_wait_any_ct = 0.0; 248 petsc_wait_all_ct = 0.0; 249 petsc_sum_of_waits_ct = 0.0; 250 petsc_allreduce_ct = 0.0; 251 petsc_gather_ct = 0.0; 252 petsc_scatter_ct = 0.0; 253 petsc_TotalFlops_th = 0.0; 254 petsc_send_ct_th = 0.0; 255 petsc_recv_ct_th = 0.0; 256 petsc_send_len_th = 0.0; 257 petsc_recv_len_th = 0.0; 258 petsc_isend_ct_th = 0.0; 259 petsc_irecv_ct_th = 0.0; 260 petsc_isend_len_th = 0.0; 261 petsc_irecv_len_th = 0.0; 262 petsc_wait_ct_th = 0.0; 263 petsc_wait_any_ct_th = 0.0; 264 petsc_wait_all_ct_th = 0.0; 265 petsc_sum_of_waits_ct_th = 0.0; 266 petsc_allreduce_ct_th = 0.0; 267 petsc_gather_ct_th = 0.0; 268 petsc_scatter_ct_th = 0.0; 269 270 #if defined(PETSC_HAVE_DEVICE) 271 petsc_ctog_ct = 0.0; 272 petsc_gtoc_ct = 0.0; 273 petsc_ctog_sz = 0.0; 274 petsc_gtoc_sz = 0.0; 275 petsc_gflops = 0.0; 276 petsc_gtime = 0.0; 277 petsc_ctog_ct_th = 0.0; 278 petsc_gtoc_ct_th = 0.0; 279 petsc_ctog_sz_th = 0.0; 280 petsc_gtoc_sz_th = 0.0; 281 petsc_gflops_th = 0.0; 282 petsc_gtime_th = 0.0; 283 #endif 284 285 PETSC_LARGEST_EVENT = PETSC_EVENT; 286 PetscLogPHC = NULL; 287 PetscLogPHD = NULL; 288 petsc_tracefile = NULL; 289 petsc_tracelevel = 0; 290 petsc_traceblanks = " "; 291 petsc_tracespace[0] = ' '; 292 petsc_tracespace[1] = 0; 293 petsc_tracetime = 0.0; 294 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 295 PETSC_OBJECT_CLASSID = 0; 296 petsc_stageLog = NULL; 297 PetscLogInitializeCalled = PETSC_FALSE; 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 /*@C 302 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 303 304 Not Collective 305 306 Input Parameters: 307 + b - The function called at beginning of event 308 - e - The function called at end of event 309 310 Level: developer 311 312 Developer Notes: 313 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 314 315 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 316 @*/ 317 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 318 { 319 PetscFunctionBegin; 320 PetscLogPLB = b; 321 PetscLogPLE = e; 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 /*@C 326 PetscLogIsActive - Check if logging is currently in progress. 327 328 Not Collective 329 330 Output Parameter: 331 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 332 333 Level: beginner 334 335 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 336 @*/ 337 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 338 { 339 PetscFunctionBegin; 340 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 /*@C 345 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 346 rates and object creation and should not slow programs down too much. 347 This routine may be called more than once. 348 349 Logically Collective over `PETSC_COMM_WORLD` 350 351 Options Database Key: 352 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 353 screen (for code configured with --with-log=1 (which is the default)) 354 355 Example Usage: 356 .vb 357 PetscInitialize(...); 358 PetscLogDefaultBegin(); 359 ... code ... 360 PetscLogView(viewer); or PetscLogDump(); 361 PetscFinalize(); 362 .ve 363 364 Level: advanced 365 366 Note: 367 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 368 the logging information. 369 370 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 371 @*/ 372 PetscErrorCode PetscLogDefaultBegin(void) 373 { 374 PetscFunctionBegin; 375 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 376 PetscFunctionReturn(PETSC_SUCCESS); 377 } 378 379 /*@C 380 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 381 all events. This creates large log files and slows the program down. 382 383 Logically Collective on `PETSC_COMM_WORLD` 384 385 Options Database Key: 386 . -log_all - Prints extensive log information 387 388 Example Usage: 389 .vb 390 PetscInitialize(...); 391 PetscLogAllBegin(); 392 ... code ... 393 PetscLogDump(filename); 394 PetscFinalize(); 395 .ve 396 397 Level: advanced 398 399 Note: 400 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 401 intended for production runs since it logs only flop rates and object 402 creation (and shouldn't significantly slow the programs). 403 404 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 405 @*/ 406 PetscErrorCode PetscLogAllBegin(void) 407 { 408 PetscFunctionBegin; 409 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 410 PetscFunctionReturn(PETSC_SUCCESS); 411 } 412 413 /*@C 414 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 415 begins or ends, the event name is printed. 416 417 Logically Collective on `PETSC_COMM_WORLD` 418 419 Input Parameter: 420 . file - The file to print trace in (e.g. stdout) 421 422 Options Database Key: 423 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 424 425 Level: intermediate 426 427 Notes: 428 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 429 then "Event begin:" or "Event end:" followed by the event name. 430 431 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 432 to determine where a program is hanging without running in the 433 debugger. Can be used in conjunction with the -info option. 434 435 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 436 @*/ 437 PetscErrorCode PetscLogTraceBegin(FILE *file) 438 { 439 PetscFunctionBegin; 440 petsc_tracefile = file; 441 442 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 443 PetscFunctionReturn(PETSC_SUCCESS); 444 } 445 446 /*@ 447 PetscLogActions - Determines whether actions are logged for the graphical viewer. 448 449 Not Collective 450 451 Input Parameter: 452 . flag - `PETSC_TRUE` if actions are to be logged 453 454 Options Database Key: 455 . -log_exclude_actions - Turns off actions logging 456 457 Level: intermediate 458 459 Note: 460 Logging of actions continues to consume more memory as the program 461 runs. Long running programs should consider turning this feature off. 462 463 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 464 @*/ 465 PetscErrorCode PetscLogActions(PetscBool flag) 466 { 467 PetscFunctionBegin; 468 petsc_logActions = flag; 469 PetscFunctionReturn(PETSC_SUCCESS); 470 } 471 472 /*@ 473 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 474 475 Not Collective 476 477 Input Parameter: 478 . flag - `PETSC_TRUE` if objects are to be logged 479 480 Options Database Key: 481 . -log_exclude_objects - Turns off objects logging 482 483 Level: intermediate 484 485 Note: 486 Logging of objects continues to consume more memory as the program 487 runs. Long running programs should consider turning this feature off. 488 489 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 490 @*/ 491 PetscErrorCode PetscLogObjects(PetscBool flag) 492 { 493 PetscFunctionBegin; 494 petsc_logObjects = flag; 495 PetscFunctionReturn(PETSC_SUCCESS); 496 } 497 498 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 499 /*@C 500 PetscLogStageRegister - Attaches a character string name to a logging stage. 501 502 Not Collective 503 504 Input Parameter: 505 . sname - The name to associate with that stage 506 507 Output Parameter: 508 . stage - The stage number 509 510 Level: intermediate 511 512 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 513 @*/ 514 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) 515 { 516 PetscStageLog stageLog; 517 PetscLogEvent event; 518 519 PetscFunctionBegin; 520 PetscCall(PetscLogGetStageLog(&stageLog)); 521 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 522 /* Copy events already changed in the main stage, this sucks */ 523 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 524 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 525 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 526 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 527 if (perfstubs_initialized == PERFSTUBS_SUCCESS) PetscStackCallExternalVoid("ps_timer_create_", stageLog->stageInfo[*stage].timer = ps_timer_create_(sname)); 528 #endif 529 PetscFunctionReturn(PETSC_SUCCESS); 530 } 531 532 /*@C 533 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 534 535 Not Collective 536 537 Input Parameter: 538 . stage - The stage on which to log 539 540 Example Usage: 541 If the option -log_view is used to run the program containing the 542 following code, then 2 sets of summary data will be printed during 543 PetscFinalize(). 544 .vb 545 PetscInitialize(int *argc,char ***args,0,0); 546 [stage 0 of code] 547 PetscLogStagePush(1); 548 [stage 1 of code] 549 PetscLogStagePop(); 550 PetscBarrier(...); 551 [more stage 0 of code] 552 PetscFinalize(); 553 .ve 554 555 Level: intermediate 556 557 Note: 558 Use `PetscLogStageRegister()` to register a stage. 559 560 .seealso: [](ch_profiling), `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 561 @*/ 562 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 563 { 564 PetscStageLog stageLog; 565 566 PetscFunctionBegin; 567 PetscCall(PetscLogGetStageLog(&stageLog)); 568 PetscCall(PetscStageLogPush(stageLog, stage)); 569 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 570 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stage].timer != NULL) PetscStackCallExternalVoid("ps_timer_start_", ps_timer_start_(stageLog->stageInfo[stage].timer)); 571 #endif 572 PetscFunctionReturn(PETSC_SUCCESS); 573 } 574 575 /*@C 576 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 577 578 Not Collective 579 580 Example Usage: 581 If the option -log_view is used to run the program containing the 582 following code, then 2 sets of summary data will be printed during 583 PetscFinalize(). 584 .vb 585 PetscInitialize(int *argc,char ***args,0,0); 586 [stage 0 of code] 587 PetscLogStagePush(1); 588 [stage 1 of code] 589 PetscLogStagePop(); 590 PetscBarrier(...); 591 [more stage 0 of code] 592 PetscFinalize(); 593 .ve 594 595 Level: intermediate 596 597 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 598 @*/ 599 PetscErrorCode PetscLogStagePop(void) 600 { 601 PetscStageLog stageLog; 602 603 PetscFunctionBegin; 604 PetscCall(PetscLogGetStageLog(&stageLog)); 605 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 606 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stageLog->curStage].timer != NULL) PetscStackCallExternalVoid("ps_timer_stop_", ps_timer_stop_(stageLog->stageInfo[stageLog->curStage].timer)); 607 #endif 608 PetscCall(PetscStageLogPop(stageLog)); 609 PetscFunctionReturn(PETSC_SUCCESS); 610 } 611 612 /*@ 613 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 614 615 Not Collective 616 617 Input Parameters: 618 + stage - The stage 619 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 620 621 Level: intermediate 622 623 Note: 624 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 625 626 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 627 @*/ 628 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 629 { 630 PetscStageLog stageLog; 631 632 PetscFunctionBegin; 633 PetscCall(PetscLogGetStageLog(&stageLog)); 634 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /*@ 639 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 640 641 Not Collective 642 643 Input Parameter: 644 . stage - The stage 645 646 Output Parameter: 647 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 648 649 Level: intermediate 650 651 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 652 @*/ 653 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 654 { 655 PetscStageLog stageLog; 656 657 PetscFunctionBegin; 658 PetscCall(PetscLogGetStageLog(&stageLog)); 659 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 660 PetscFunctionReturn(PETSC_SUCCESS); 661 } 662 663 /*@ 664 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 665 666 Not Collective 667 668 Input Parameters: 669 + stage - The stage 670 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 671 672 Level: intermediate 673 674 Developer Notes: 675 What does visible mean, needs to be documented. 676 677 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 678 @*/ 679 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 680 { 681 PetscStageLog stageLog; 682 683 PetscFunctionBegin; 684 PetscCall(PetscLogGetStageLog(&stageLog)); 685 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 /*@ 690 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 691 692 Not Collective 693 694 Input Parameter: 695 . stage - The stage 696 697 Output Parameter: 698 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 699 700 Level: intermediate 701 702 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 703 @*/ 704 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 705 { 706 PetscStageLog stageLog; 707 708 PetscFunctionBegin; 709 PetscCall(PetscLogGetStageLog(&stageLog)); 710 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 711 PetscFunctionReturn(PETSC_SUCCESS); 712 } 713 714 /*@C 715 PetscLogStageGetId - Returns the stage id when given the stage name. 716 717 Not Collective 718 719 Input Parameter: 720 . name - The stage name 721 722 Output Parameter: 723 . stage - The stage, , or -1 if no stage with that name exists 724 725 Level: intermediate 726 727 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 728 @*/ 729 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 730 { 731 PetscStageLog stageLog; 732 733 PetscFunctionBegin; 734 PetscCall(PetscLogGetStageLog(&stageLog)); 735 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 736 PetscFunctionReturn(PETSC_SUCCESS); 737 } 738 739 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 740 741 /*@C 742 PetscLogEventRegister - Registers an event name for logging operations 743 744 Not Collective 745 746 Input Parameters: 747 + name - The name associated with the event 748 - classid - The classid associated to the class for this event, obtain either with 749 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 750 are only available in C code 751 752 Output Parameter: 753 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 754 755 Example Usage: 756 .vb 757 PetscLogEvent USER_EVENT; 758 PetscClassId classid; 759 PetscLogDouble user_event_flops; 760 PetscClassIdRegister("class name",&classid); 761 PetscLogEventRegister("User event name",classid,&USER_EVENT); 762 PetscLogEventBegin(USER_EVENT,0,0,0,0); 763 [code segment to monitor] 764 PetscLogFlops(user_event_flops); 765 PetscLogEventEnd(USER_EVENT,0,0,0,0); 766 .ve 767 768 Level: intermediate 769 770 Notes: 771 PETSc automatically logs library events if the code has been 772 configured with --with-log (which is the default) and 773 -log_view or -log_all is specified. `PetscLogEventRegister()` is 774 intended for logging user events to supplement this PETSc 775 information. 776 777 PETSc can gather data for use with the utilities Jumpshot 778 (part of the MPICH distribution). If PETSc has been compiled 779 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 780 MPICH), the user can employ another command line option, -log_mpe, 781 to create a logfile, "mpe.log", which can be visualized 782 Jumpshot. 783 784 The classid is associated with each event so that classes of events 785 can be disabled simultaneously, such as all matrix events. The user 786 can either use an existing classid, such as `MAT_CLASSID`, or create 787 their own as shown in the example. 788 789 If an existing event with the same name exists, its event handle is 790 returned instead of creating a new event. 791 792 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 793 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 794 @*/ 795 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) 796 { 797 PetscStageLog stageLog; 798 int stage; 799 800 PetscFunctionBegin; 801 *event = PETSC_DECIDE; 802 PetscCall(PetscLogGetStageLog(&stageLog)); 803 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 804 if (*event > 0) PetscFunctionReturn(PETSC_SUCCESS); 805 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 806 for (stage = 0; stage < stageLog->numStages; stage++) { 807 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 808 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 809 } 810 PetscFunctionReturn(PETSC_SUCCESS); 811 } 812 813 /*@ 814 PetscLogEventSetCollective - Indicates that a particular event is collective. 815 816 Not Collective 817 818 Input Parameters: 819 + event - The event id 820 - collective - Boolean flag indicating whether a particular event is collective 821 822 Level: developer 823 824 Notes: 825 New events returned from `PetscLogEventRegister()` are collective by default. 826 827 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 828 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 829 to be performed. This option is useful to debug imbalance within the computations or communications 830 831 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 832 @*/ 833 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) 834 { 835 PetscStageLog stageLog; 836 PetscEventRegLog eventRegLog; 837 838 PetscFunctionBegin; 839 PetscCall(PetscLogGetStageLog(&stageLog)); 840 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 841 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 842 eventRegLog->eventInfo[event].collective = collective; 843 PetscFunctionReturn(PETSC_SUCCESS); 844 } 845 846 /*@ 847 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 848 849 Not Collective 850 851 Input Parameter: 852 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 853 854 Level: developer 855 856 .seealso: [](ch_profiling), `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 857 @*/ 858 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 859 { 860 PetscStageLog stageLog; 861 int stage; 862 863 PetscFunctionBegin; 864 PetscCall(PetscLogGetStageLog(&stageLog)); 865 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 866 PetscFunctionReturn(PETSC_SUCCESS); 867 } 868 869 /*@ 870 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 871 872 Not Collective 873 874 Input Parameter: 875 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 876 877 Level: developer 878 879 Note: 880 If a class is excluded then events associated with that class are not logged. 881 882 .seealso: [](ch_profiling), `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 883 @*/ 884 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 885 { 886 PetscStageLog stageLog; 887 int stage; 888 889 PetscFunctionBegin; 890 PetscCall(PetscLogGetStageLog(&stageLog)); 891 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 892 PetscFunctionReturn(PETSC_SUCCESS); 893 } 894 895 /*@ 896 PetscLogEventActivate - Indicates that a particular event should be logged. 897 898 Not Collective 899 900 Input Parameter: 901 . event - The event id 902 903 Example Usage: 904 .vb 905 PetscLogEventDeactivate(VEC_SetValues); 906 [code where you do not want to log VecSetValues()] 907 PetscLogEventActivate(VEC_SetValues); 908 [code where you do want to log VecSetValues()] 909 .ve 910 911 Level: advanced 912 913 Note: 914 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 915 or an event number obtained with `PetscLogEventRegister()`. 916 917 .seealso: [](ch_profiling), `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 918 @*/ 919 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 920 { 921 PetscStageLog stageLog; 922 int stage; 923 924 PetscFunctionBegin; 925 PetscCall(PetscLogGetStageLog(&stageLog)); 926 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 927 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 928 PetscFunctionReturn(PETSC_SUCCESS); 929 } 930 931 /*@ 932 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 933 934 Not Collective 935 936 Input Parameter: 937 . event - The event id 938 939 Example Usage: 940 .vb 941 PetscLogEventDeactivate(VEC_SetValues); 942 [code where you do not want to log VecSetValues()] 943 PetscLogEventActivate(VEC_SetValues); 944 [code where you do want to log VecSetValues()] 945 .ve 946 947 Level: advanced 948 949 Note: 950 The event may be either a pre-defined PETSc event (found in 951 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 952 953 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 954 @*/ 955 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 956 { 957 PetscStageLog stageLog; 958 int stage; 959 960 PetscFunctionBegin; 961 PetscCall(PetscLogGetStageLog(&stageLog)); 962 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 963 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 964 PetscFunctionReturn(PETSC_SUCCESS); 965 } 966 967 /*@ 968 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 969 970 Not Collective 971 972 Input Parameter: 973 . event - The event id 974 975 Example Usage: 976 .vb 977 PetscLogEventDeactivatePush(VEC_SetValues); 978 [code where you do not want to log VecSetValues()] 979 PetscLogEventDeactivatePop(VEC_SetValues); 980 [code where you do want to log VecSetValues()] 981 .ve 982 983 Level: advanced 984 985 Note: 986 The event may be either a pre-defined PETSc event (found in 987 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 988 989 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 990 @*/ 991 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 992 { 993 PetscStageLog stageLog; 994 int stage; 995 996 PetscFunctionBegin; 997 PetscCall(PetscLogGetStageLog(&stageLog)); 998 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 999 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 1000 PetscFunctionReturn(PETSC_SUCCESS); 1001 } 1002 1003 /*@ 1004 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 1005 1006 Not Collective 1007 1008 Input Parameter: 1009 . event - The event id 1010 1011 Example Usage: 1012 .vb 1013 PetscLogEventDeactivatePush(VEC_SetValues); 1014 [code where you do not want to log VecSetValues()] 1015 PetscLogEventDeactivatePop(VEC_SetValues); 1016 [code where you do want to log VecSetValues()] 1017 .ve 1018 1019 Level: advanced 1020 1021 Note: 1022 The event may be either a pre-defined PETSc event (found in 1023 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 1024 1025 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 1026 @*/ 1027 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 1028 { 1029 PetscStageLog stageLog; 1030 int stage; 1031 1032 PetscFunctionBegin; 1033 PetscCall(PetscLogGetStageLog(&stageLog)); 1034 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1035 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } 1038 1039 /*@ 1040 PetscLogEventSetActiveAll - Turns on logging of all events 1041 1042 Not Collective 1043 1044 Input Parameters: 1045 + event - The event id 1046 - isActive - The activity flag determining whether the event is logged 1047 1048 Level: advanced 1049 1050 .seealso: [](ch_profiling), `PlogEventActivate()`, `PlogEventDeactivate()` 1051 @*/ 1052 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 1053 { 1054 PetscStageLog stageLog; 1055 int stage; 1056 1057 PetscFunctionBegin; 1058 PetscCall(PetscLogGetStageLog(&stageLog)); 1059 for (stage = 0; stage < stageLog->numStages; stage++) { 1060 if (isActive) { 1061 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 1062 } else { 1063 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 1064 } 1065 } 1066 PetscFunctionReturn(PETSC_SUCCESS); 1067 } 1068 1069 /*@ 1070 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 1071 1072 Not Collective 1073 1074 Input Parameter: 1075 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 1076 1077 Level: developer 1078 1079 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1080 @*/ 1081 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 1082 { 1083 PetscStageLog stageLog; 1084 int stage; 1085 1086 PetscFunctionBegin; 1087 PetscCall(PetscLogGetStageLog(&stageLog)); 1088 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1089 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1090 PetscFunctionReturn(PETSC_SUCCESS); 1091 } 1092 1093 /*@ 1094 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 1095 1096 Not Collective 1097 1098 Input Parameter: 1099 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 1100 1101 Level: developer 1102 1103 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1104 @*/ 1105 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 1106 { 1107 PetscStageLog stageLog; 1108 int stage; 1109 1110 PetscFunctionBegin; 1111 PetscCall(PetscLogGetStageLog(&stageLog)); 1112 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1113 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1114 PetscFunctionReturn(PETSC_SUCCESS); 1115 } 1116 1117 /*MC 1118 PetscLogEventSync - Synchronizes the beginning of a user event. 1119 1120 Synopsis: 1121 #include <petsclog.h> 1122 PetscErrorCode PetscLogEventSync(int e, MPI_Comm comm) 1123 1124 Collective 1125 1126 Input Parameters: 1127 + e - integer associated with the event obtained from PetscLogEventRegister() 1128 - comm - an MPI communicator 1129 1130 Example Usage: 1131 .vb 1132 PetscLogEvent USER_EVENT; 1133 1134 PetscLogEventRegister("User event", 0, &USER_EVENT); 1135 PetscLogEventSync(USER_EVENT, PETSC_COMM_WORLD); 1136 PetscLogEventBegin(USER_EVENT, 0, 0, 0, 0); 1137 [code segment to monitor] 1138 PetscLogEventEnd(USER_EVENT, 0, 0, 0 , 0); 1139 .ve 1140 1141 Level: developer 1142 1143 Note: 1144 This routine should be called only if there is not a `PetscObject` available to pass to 1145 `PetscLogEventBegin()`. 1146 1147 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1148 M*/ 1149 1150 /*MC 1151 PetscLogEventBegin - Logs the beginning of a user event. 1152 1153 Synopsis: 1154 #include <petsclog.h> 1155 PetscErrorCode PetscLogEventBegin(int e, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 1156 1157 Not Collective 1158 1159 Input Parameters: 1160 + e - integer associated with the event obtained from PetscLogEventRegister() 1161 - o1,o2,o3,o4 - objects associated with the event, or 0 1162 1163 Fortran Synopsis: 1164 void PetscLogEventBegin(int e, PetscErrorCode ierr) 1165 1166 Example Usage: 1167 .vb 1168 PetscLogEvent USER_EVENT; 1169 1170 PetscLogDouble user_event_flops; 1171 PetscLogEventRegister("User event",0, &USER_EVENT); 1172 PetscLogEventBegin(USER_EVENT, 0, 0, 0, 0); 1173 [code segment to monitor] 1174 PetscLogFlops(user_event_flops); 1175 PetscLogEventEnd(USER_EVENT, 0, 0, 0, 0); 1176 .ve 1177 1178 Level: intermediate 1179 1180 Developer Note: 1181 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly 1182 handling the errors that occur in the macro directly because other packages that use this 1183 macros have used them in their own functions or methods that do not return error codes and it 1184 would be disruptive to change the current behavior. 1185 1186 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1187 M*/ 1188 1189 /*MC 1190 PetscLogEventEnd - Log the end of a user event. 1191 1192 Synopsis: 1193 #include <petsclog.h> 1194 PetscErrorCode PetscLogEventEnd(int e, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 1195 1196 Not Collective 1197 1198 Input Parameters: 1199 + e - integer associated with the event obtained with PetscLogEventRegister() 1200 - o1,o2,o3,o4 - objects associated with the event, or 0 1201 1202 Fortran Synopsis: 1203 void PetscLogEventEnd(int e, PetscErrorCode ierr) 1204 1205 Example Usage: 1206 .vb 1207 PetscLogEvent USER_EVENT; 1208 1209 PetscLogDouble user_event_flops; 1210 PetscLogEventRegister("User event", 0, &USER_EVENT); 1211 PetscLogEventBegin(USER_EVENT, 0, 0, 0, 0); 1212 [code segment to monitor] 1213 PetscLogFlops(user_event_flops); 1214 PetscLogEventEnd(USER_EVENT, 0, 0, 0, 0); 1215 .ve 1216 1217 Level: intermediate 1218 1219 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1220 M*/ 1221 1222 /*@C 1223 PetscLogEventGetId - Returns the event id when given the event name. 1224 1225 Not Collective 1226 1227 Input Parameter: 1228 . name - The event name 1229 1230 Output Parameter: 1231 . event - The event, or -1 if no event with that name exists 1232 1233 Level: intermediate 1234 1235 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1236 @*/ 1237 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1238 { 1239 PetscStageLog stageLog; 1240 1241 PetscFunctionBegin; 1242 PetscCall(PetscLogGetStageLog(&stageLog)); 1243 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1244 PetscFunctionReturn(PETSC_SUCCESS); 1245 } 1246 1247 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) 1248 { 1249 PetscFunctionBegin; 1250 if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1251 PetscFunctionReturn(PETSC_SUCCESS); 1252 } 1253 1254 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) 1255 { 1256 PetscFunctionBegin; 1257 if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) 1262 { 1263 PetscBool empty; 1264 1265 PetscFunctionBegin; 1266 PetscAssertPointer(event, 1); 1267 *event = PETSC_DECIDE; 1268 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1269 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1270 PetscFunctionReturn(PETSC_SUCCESS); 1271 } 1272 1273 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) 1274 { 1275 PetscFunctionBegin; 1276 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1277 PetscFunctionReturn(PETSC_SUCCESS); 1278 } 1279 1280 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) 1281 { 1282 PetscStageLog stageLog; 1283 PetscEventPerfLog eventLog; 1284 int stage; 1285 1286 PetscFunctionBegin; 1287 if (event == PETSC_DECIDE) PetscFunctionReturn(PETSC_SUCCESS); 1288 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1289 PetscCall(PetscLogGetStageLog(&stageLog)); 1290 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1291 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1292 eventLog->eventInfo[event].count--; 1293 PetscFunctionReturn(PETSC_SUCCESS); 1294 } 1295 1296 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1297 /*@C 1298 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1299 be read by bin/petscview. This program no longer exists. 1300 1301 Collective on `PETSC_COMM_WORLD` 1302 1303 Input Parameter: 1304 . sname - an optional file name 1305 1306 Example Usage: 1307 .vb 1308 PetscInitialize(...); 1309 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1310 ... code ... 1311 PetscLogDump(filename); 1312 PetscFinalize(); 1313 .ve 1314 1315 Level: advanced 1316 1317 Note: 1318 The default file name is Log.<rank> where <rank> is the MPI process rank. If no name is specified, 1319 this file will be used. 1320 1321 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1322 @*/ 1323 PetscErrorCode PetscLogDump(const char sname[]) 1324 { 1325 PetscStageLog stageLog; 1326 PetscEventPerfInfo *eventInfo; 1327 FILE *fd; 1328 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1329 PetscLogDouble flops, _TotalTime; 1330 PetscMPIInt rank; 1331 int action, object, curStage; 1332 PetscLogEvent event; 1333 1334 PetscFunctionBegin; 1335 /* Calculate the total elapsed time */ 1336 PetscCall(PetscTime(&_TotalTime)); 1337 _TotalTime -= petsc_BaseTime; 1338 /* Open log file */ 1339 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1340 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 1341 PetscCall(PetscFixFilename(file, fname)); 1342 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1343 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1344 /* Output totals */ 1345 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1346 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1347 /* Output actions */ 1348 if (petsc_logActions) { 1349 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1350 for (action = 0; action < petsc_numActions; action++) { 1351 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1352 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1353 } 1354 } 1355 /* Output objects */ 1356 if (petsc_logObjects) { 1357 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1358 for (object = 0; object < petsc_numObjects; object++) { 1359 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1360 if (!petsc_objects[object].name[0]) { 1361 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1362 } else { 1363 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1364 } 1365 if (petsc_objects[object].info[0] != 0) { 1366 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1367 } else { 1368 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1369 } 1370 } 1371 } 1372 /* Output events */ 1373 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1374 PetscCall(PetscLogGetStageLog(&stageLog)); 1375 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1376 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1377 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1378 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1379 else flops = 0.0; 1380 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1381 } 1382 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385 1386 /* 1387 PetscLogView_Detailed - Each process prints the times for its own events 1388 1389 */ 1390 static PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1391 { 1392 PetscStageLog stageLog; 1393 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1394 PetscLogDouble locTotalTime, numRed, maxMem; 1395 int numStages, numEvents, stage, event; 1396 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1397 PetscMPIInt rank, size; 1398 1399 PetscFunctionBegin; 1400 PetscCallMPI(MPI_Comm_size(comm, &size)); 1401 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1402 /* Must preserve reduction count before we go on */ 1403 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1404 /* Get the total elapsed time */ 1405 PetscCall(PetscTime(&locTotalTime)); 1406 locTotalTime -= petsc_BaseTime; 1407 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1408 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1409 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1410 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1411 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1412 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1413 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1414 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1415 PetscCall(PetscLogGetStageLog(&stageLog)); 1416 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1417 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1418 for (stage = 0; stage < numStages; stage++) { 1419 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1420 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1421 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1422 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1423 } 1424 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1425 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1426 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1427 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1428 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1429 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1430 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1431 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1432 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1433 PetscCall(PetscViewerFlush(viewer)); 1434 for (stage = 0; stage < numStages; stage++) { 1435 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1436 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1437 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1438 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1439 for (event = 0; event < numEvents; event++) { 1440 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1441 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1442 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1443 eventInfo->flops)); 1444 if (eventInfo->dof[0] >= 0.) { 1445 PetscInt d, e; 1446 1447 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1448 for (d = 0; d < 8; ++d) { 1449 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1450 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1451 } 1452 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1453 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1454 for (e = 0; e < 8; ++e) { 1455 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1456 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1457 } 1458 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1459 } 1460 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1461 } 1462 } 1463 PetscCall(PetscViewerFlush(viewer)); 1464 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1465 PetscFunctionReturn(PETSC_SUCCESS); 1466 } 1467 1468 /* 1469 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1470 */ 1471 static PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1472 { 1473 PetscStageLog stageLog; 1474 PetscEventPerfInfo *eventInfo = NULL; 1475 PetscLogDouble locTotalTime, maxMem; 1476 int numStages, numEvents, stage, event; 1477 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1478 PetscMPIInt rank, size; 1479 1480 PetscFunctionBegin; 1481 PetscCallMPI(MPI_Comm_size(comm, &size)); 1482 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1483 /* Must preserve reduction count before we go on */ 1484 /* Get the total elapsed time */ 1485 PetscCall(PetscTime(&locTotalTime)); 1486 locTotalTime -= petsc_BaseTime; 1487 PetscCall(PetscLogGetStageLog(&stageLog)); 1488 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1489 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1490 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1491 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1492 PetscCall(PetscViewerFlush(viewer)); 1493 for (stage = 0; stage < numStages; stage++) { 1494 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1495 1496 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1497 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1498 for (event = 0; event < numEvents; event++) { 1499 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1500 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1501 eventInfo->numReductions, eventInfo->flops)); 1502 if (eventInfo->dof[0] >= 0.) { 1503 PetscInt d, e; 1504 1505 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1506 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1507 } 1508 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1509 } 1510 } 1511 PetscCall(PetscViewerFlush(viewer)); 1512 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1513 PetscFunctionReturn(PETSC_SUCCESS); 1514 } 1515 1516 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1517 { 1518 PetscFunctionBegin; 1519 if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS); 1520 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1521 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1522 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1523 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1524 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1525 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1526 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1527 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1528 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1529 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1530 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1531 PetscFunctionReturn(PETSC_SUCCESS); 1532 } 1533 1534 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1535 { 1536 PetscFunctionBegin; 1537 if (PetscDefined(USE_DEBUG)) { 1538 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1539 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1540 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1541 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1542 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1543 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1544 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1545 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1546 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1547 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1548 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1549 } 1550 PetscFunctionReturn(PETSC_SUCCESS); 1551 } 1552 1553 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1554 { 1555 #if defined(PETSC_HAVE_DEVICE) 1556 PetscMPIInt size; 1557 PetscBool deviceInitialized = PETSC_FALSE; 1558 1559 PetscFunctionBegin; 1560 PetscCallMPI(MPI_Comm_size(comm, &size)); 1561 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1562 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1563 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1564 deviceInitialized = PETSC_TRUE; 1565 break; 1566 } 1567 } 1568 /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1569 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS); 1570 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1571 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1572 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1573 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1574 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1575 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1576 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1577 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1578 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1579 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1580 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1581 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1582 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 #else 1585 (void)comm; 1586 (void)fd; 1587 return PETSC_SUCCESS; 1588 #endif 1589 } 1590 1591 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1592 { 1593 #if defined(PETSC_HAVE_DEVICE) 1594 PetscFunctionBegin; 1595 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS); 1596 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1597 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1598 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1599 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1600 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1601 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1602 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1603 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1604 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1605 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1606 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1607 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1608 PetscFunctionReturn(PETSC_SUCCESS); 1609 #else 1610 (void)comm; 1611 (void)fd; 1612 return PETSC_SUCCESS; 1613 #endif 1614 } 1615 1616 static PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1617 { 1618 FILE *fd; 1619 PetscLogDouble zero = 0.0; 1620 PetscStageLog stageLog; 1621 PetscStageInfo *stageInfo = NULL; 1622 PetscEventPerfInfo *eventInfo = NULL; 1623 PetscClassPerfInfo *classInfo; 1624 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1625 const char *name; 1626 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1627 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1628 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1629 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1630 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1631 PetscLogDouble min, max, tot, ratio, avg, x, y; 1632 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1633 #if defined(PETSC_HAVE_DEVICE) 1634 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1635 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1636 #endif 1637 PetscMPIInt minC, maxC; 1638 PetscMPIInt size, rank; 1639 PetscBool *localStageUsed, *stageUsed; 1640 PetscBool *localStageVisible, *stageVisible; 1641 int numStages, localNumEvents, numEvents; 1642 int stage, oclass; 1643 PetscLogEvent event; 1644 char version[256]; 1645 MPI_Comm comm; 1646 #if defined(PETSC_HAVE_DEVICE) 1647 PetscLogEvent eventid; 1648 PetscInt64 nas = 0x7FF0000000000002; 1649 #endif 1650 1651 PetscFunctionBegin; 1652 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1653 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1654 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1655 PetscCallMPI(MPI_Comm_size(comm, &size)); 1656 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1657 /* Get the total elapsed time */ 1658 PetscCall(PetscTime(&locTotalTime)); 1659 locTotalTime -= petsc_BaseTime; 1660 1661 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1662 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1663 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1664 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1665 PetscCall(PetscLogViewWarnSync(comm, fd)); 1666 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1667 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1668 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1669 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1670 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1671 PetscCall(PetscGetUserName(username, sizeof(username))); 1672 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1673 PetscCall(PetscGetDate(date, sizeof(date))); 1674 PetscCall(PetscGetVersion(version, sizeof(version))); 1675 if (size == 1) { 1676 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1677 } else { 1678 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1679 } 1680 #if defined(PETSC_HAVE_OPENMP) 1681 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1682 #endif 1683 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1684 1685 /* Must preserve reduction count before we go on */ 1686 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1687 1688 /* Calculate summary information */ 1689 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1690 /* Time */ 1691 PetscCall(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1692 PetscCall(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1693 PetscCall(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1694 avg = tot / ((PetscLogDouble)size); 1695 if (min != 0.0) ratio = max / min; 1696 else ratio = 0.0; 1697 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1698 TotalTime = tot; 1699 /* Objects */ 1700 avg = (PetscLogDouble)petsc_numObjects; 1701 PetscCall(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1702 PetscCall(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1703 PetscCall(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1704 avg = tot / ((PetscLogDouble)size); 1705 if (min != 0.0) ratio = max / min; 1706 else ratio = 0.0; 1707 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1708 /* Flops */ 1709 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1710 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1711 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1712 avg = tot / ((PetscLogDouble)size); 1713 if (min != 0.0) ratio = max / min; 1714 else ratio = 0.0; 1715 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1716 TotalFlops = tot; 1717 /* Flops/sec -- Must talk to Barry here */ 1718 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1719 else flops = 0.0; 1720 PetscCall(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1721 PetscCall(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1722 PetscCall(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1723 avg = tot / ((PetscLogDouble)size); 1724 if (min != 0.0) ratio = max / min; 1725 else ratio = 0.0; 1726 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1727 /* Memory */ 1728 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1729 if (mem > 0.0) { 1730 PetscCall(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1731 PetscCall(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1732 PetscCall(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1733 avg = tot / ((PetscLogDouble)size); 1734 if (min != 0.0) ratio = max / min; 1735 else ratio = 0.0; 1736 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1737 } 1738 /* Messages */ 1739 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1740 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1741 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1742 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1743 avg = tot / ((PetscLogDouble)size); 1744 if (min != 0.0) ratio = max / min; 1745 else ratio = 0.0; 1746 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1747 numMessages = tot; 1748 /* Message Lengths */ 1749 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1750 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1751 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1752 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1753 if (numMessages != 0) avg = tot / numMessages; 1754 else avg = 0.0; 1755 if (min != 0.0) ratio = max / min; 1756 else ratio = 0.0; 1757 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1758 messageLength = tot; 1759 /* Reductions */ 1760 PetscCall(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1761 PetscCall(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1762 PetscCall(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1763 if (min != 0.0) ratio = max / min; 1764 else ratio = 0.0; 1765 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1766 numReductions = red; /* wrong because uses count from process zero */ 1767 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1768 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1769 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1770 1771 /* Get total number of stages -- 1772 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1773 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1774 This seems best accomplished by assoicating a communicator with each stage. 1775 */ 1776 PetscCall(PetscLogGetStageLog(&stageLog)); 1777 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1778 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1779 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1780 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1781 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1782 if (numStages > 0) { 1783 stageInfo = stageLog->stageInfo; 1784 for (stage = 0; stage < numStages; stage++) { 1785 if (stage < stageLog->numStages) { 1786 localStageUsed[stage] = stageInfo[stage].used; 1787 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1788 } else { 1789 localStageUsed[stage] = PETSC_FALSE; 1790 localStageVisible[stage] = PETSC_TRUE; 1791 } 1792 } 1793 PetscCall(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1794 PetscCall(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1795 for (stage = 0; stage < numStages; stage++) { 1796 if (stageUsed[stage]) { 1797 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1798 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1799 break; 1800 } 1801 } 1802 for (stage = 0; stage < numStages; stage++) { 1803 if (!stageUsed[stage]) continue; 1804 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1805 if (localStageUsed[stage]) { 1806 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1807 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1808 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1809 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1810 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1811 name = stageInfo[stage].name; 1812 } else { 1813 PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1814 PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1815 PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1816 PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1817 PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1818 name = ""; 1819 } 1820 mess *= 0.5; 1821 messLen *= 0.5; 1822 red /= size; 1823 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1824 else fracTime = 0.0; 1825 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1826 else fracFlops = 0.0; 1827 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1828 if (numMessages != 0.0) fracMessages = mess / numMessages; 1829 else fracMessages = 0.0; 1830 if (mess != 0.0) avgMessLen = messLen / mess; 1831 else avgMessLen = 0.0; 1832 if (messageLength != 0.0) fracLength = messLen / messageLength; 1833 else fracLength = 0.0; 1834 if (numReductions != 0.0) fracReductions = red / numReductions; 1835 else fracReductions = 0.0; 1836 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1837 } 1838 } 1839 1840 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1841 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1842 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1843 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1844 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1845 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1846 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1847 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1848 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1849 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1850 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1851 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1852 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1853 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1854 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1855 if (PetscLogMemory) { 1856 PetscCall(PetscFPrintf(comm, fd, " Memory usage is summed over all MPI processes, it is given in mega-bytes\n")); 1857 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1858 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1859 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1860 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1861 } 1862 #if defined(PETSC_HAVE_DEVICE) 1863 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1864 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1865 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1866 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1867 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1868 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1869 #endif 1870 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1871 1872 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1873 1874 /* Report events */ 1875 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1876 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1877 #if defined(PETSC_HAVE_DEVICE) 1878 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1879 #endif 1880 PetscCall(PetscFPrintf(comm, fd, "\n")); 1881 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1882 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1883 #if defined(PETSC_HAVE_DEVICE) 1884 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1885 #endif 1886 PetscCall(PetscFPrintf(comm, fd, "\n")); 1887 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1888 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1889 #if defined(PETSC_HAVE_DEVICE) 1890 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1891 #endif 1892 PetscCall(PetscFPrintf(comm, fd, "\n")); 1893 1894 #if defined(PETSC_HAVE_DEVICE) 1895 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1896 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1897 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1898 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1899 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1900 #endif 1901 1902 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1903 for (stage = 0; stage < numStages; stage++) { 1904 if (!stageVisible[stage]) continue; 1905 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1906 if (localStageUsed[stage]) { 1907 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1908 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1909 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1910 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1911 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1912 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1913 } else { 1914 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1915 PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1916 PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1917 PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1918 PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1919 PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1920 } 1921 mess *= 0.5; 1922 messLen *= 0.5; 1923 red /= size; 1924 1925 /* Get total number of events in this stage -- 1926 Currently, a single processor can register more events than another, but events must all be registered in order, 1927 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1928 on the event ID. This seems best accomplished by associating a communicator with each stage. 1929 1930 Problem: If the event did not happen on proc 1, its name will not be available. 1931 Problem: Event visibility is not implemented 1932 */ 1933 if (localStageUsed[stage]) { 1934 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1935 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1936 } else localNumEvents = 0; 1937 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1938 for (event = 0; event < numEvents; event++) { 1939 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1940 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1941 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1942 else flopr = 0.0; 1943 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1944 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1945 PetscCall(MPIU_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1946 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1947 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1948 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1949 PetscCall(MPIU_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1950 PetscCall(MPIU_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1951 PetscCall(MPIU_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1952 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1953 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1954 if (PetscLogMemory) { 1955 PetscCall(MPIU_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1956 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1957 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1958 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1959 } 1960 #if defined(PETSC_HAVE_DEVICE) 1961 PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1962 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1963 PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1964 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1965 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1966 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1967 #endif 1968 name = stageLog->eventLog->eventInfo[event].name; 1969 } else { 1970 int ierr = 0; 1971 1972 flopr = 0.0; 1973 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1974 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1975 PetscCall(MPIU_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1976 PetscCall(MPIU_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1977 PetscCall(MPIU_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1978 PetscCall(MPIU_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1979 PetscCall(MPIU_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1980 PetscCall(MPIU_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1981 PetscCall(MPIU_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1982 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1983 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1984 if (PetscLogMemory) { 1985 PetscCall(MPIU_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1986 PetscCall(MPIU_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1987 PetscCall(MPIU_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1988 PetscCall(MPIU_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1989 } 1990 #if defined(PETSC_HAVE_DEVICE) 1991 PetscCall(MPIU_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1992 PetscCall(MPIU_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1993 PetscCall(MPIU_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1994 PetscCall(MPIU_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1995 PetscCall(MPIU_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1996 PetscCall(MPIU_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1997 #endif 1998 name = ""; 1999 } 2000 if (mint < 0.0) { 2001 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 2002 mint = 0; 2003 } 2004 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 2005 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 2006 #if defined(PETSC_HAVE_DEVICE) 2007 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 2008 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 2009 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 2010 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 2011 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 2012 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 2013 } 2014 } 2015 #endif 2016 totm *= 0.5; 2017 totml *= 0.5; 2018 totr /= size; 2019 2020 if (maxC != 0) { 2021 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 2022 else ratC = 0.0; 2023 if (mint != 0.0) ratt = maxt / mint; 2024 else ratt = 0.0; 2025 if (minf != 0.0) ratf = maxf / minf; 2026 else ratf = 0.0; 2027 if (TotalTime != 0.0) fracTime = tott / TotalTime; 2028 else fracTime = 0.0; 2029 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 2030 else fracFlops = 0.0; 2031 if (stageTime != 0.0) fracStageTime = tott / stageTime; 2032 else fracStageTime = 0.0; 2033 if (flops != 0.0) fracStageFlops = totf / flops; 2034 else fracStageFlops = 0.0; 2035 if (numMessages != 0.0) fracMess = totm / numMessages; 2036 else fracMess = 0.0; 2037 if (messageLength != 0.0) fracMessLen = totml / messageLength; 2038 else fracMessLen = 0.0; 2039 if (numReductions != 0.0) fracRed = totr / numReductions; 2040 else fracRed = 0.0; 2041 if (mess != 0.0) fracStageMess = totm / mess; 2042 else fracStageMess = 0.0; 2043 if (messLen != 0.0) fracStageMessLen = totml / messLen; 2044 else fracStageMessLen = 0.0; 2045 if (red != 0.0) fracStageRed = totr / red; 2046 else fracStageRed = 0.0; 2047 if (totm != 0.0) totml /= totm; 2048 else totml = 0.0; 2049 if (maxt != 0.0) flopr = totf / maxt; 2050 else flopr = 0.0; 2051 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 2052 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 2053 else 2054 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 2055 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 2056 #if defined(PETSC_HAVE_DEVICE) 2057 if (totf != 0.0) fracgflops = gflops / totf; 2058 else fracgflops = 0.0; 2059 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 2060 else gflopr = 0.0; 2061 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 2062 #endif 2063 PetscCall(PetscFPrintf(comm, fd, "\n")); 2064 } 2065 } 2066 } 2067 2068 /* Memory usage and object creation */ 2069 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 2070 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 2071 #if defined(PETSC_HAVE_DEVICE) 2072 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 2073 #endif 2074 PetscCall(PetscFPrintf(comm, fd, "\n")); 2075 PetscCall(PetscFPrintf(comm, fd, "\n")); 2076 2077 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 2078 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 2079 stats for stages local to processor sets. 2080 */ 2081 /* We should figure out the longest object name here (now 20 characters) */ 2082 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 2083 for (stage = 0; stage < numStages; stage++) { 2084 if (localStageUsed[stage]) { 2085 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 2086 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 2087 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 2088 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 2089 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 2090 } 2091 } 2092 } else { 2093 if (!localStageVisible[stage]) continue; 2094 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 2095 } 2096 } 2097 2098 PetscCall(PetscFree(localStageUsed)); 2099 PetscCall(PetscFree(stageUsed)); 2100 PetscCall(PetscFree(localStageVisible)); 2101 PetscCall(PetscFree(stageVisible)); 2102 2103 /* Information unrelated to this particular run */ 2104 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 2105 PetscCall(PetscTime(&y)); 2106 PetscCall(PetscTime(&x)); 2107 PetscCall(PetscTime(&y)); 2108 PetscCall(PetscTime(&y)); 2109 PetscCall(PetscTime(&y)); 2110 PetscCall(PetscTime(&y)); 2111 PetscCall(PetscTime(&y)); 2112 PetscCall(PetscTime(&y)); 2113 PetscCall(PetscTime(&y)); 2114 PetscCall(PetscTime(&y)); 2115 PetscCall(PetscTime(&y)); 2116 PetscCall(PetscTime(&y)); 2117 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 2118 /* MPI information */ 2119 if (size > 1) { 2120 MPI_Status status; 2121 PetscMPIInt tag; 2122 MPI_Comm newcomm; 2123 2124 PetscCallMPI(MPI_Barrier(comm)); 2125 PetscCall(PetscTime(&x)); 2126 PetscCallMPI(MPI_Barrier(comm)); 2127 PetscCallMPI(MPI_Barrier(comm)); 2128 PetscCallMPI(MPI_Barrier(comm)); 2129 PetscCallMPI(MPI_Barrier(comm)); 2130 PetscCallMPI(MPI_Barrier(comm)); 2131 PetscCall(PetscTime(&y)); 2132 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 2133 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 2134 PetscCallMPI(MPI_Barrier(comm)); 2135 if (rank) { 2136 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 2137 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 2138 } else { 2139 PetscCall(PetscTime(&x)); 2140 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 2141 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 2142 PetscCall(PetscTime(&y)); 2143 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 2144 } 2145 PetscCall(PetscCommDestroy(&newcomm)); 2146 } 2147 PetscCall(PetscOptionsView(NULL, viewer)); 2148 2149 /* Machine and compile information */ 2150 #if defined(PETSC_USE_FORTRAN_KERNELS) 2151 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 2152 #else 2153 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 2154 #endif 2155 #if defined(PETSC_USE_64BIT_INDICES) 2156 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64-bit PetscInt\n")); 2157 #elif defined(PETSC_USE___FLOAT128) 2158 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32-bit PetscInt\n")); 2159 #endif 2160 #if defined(PETSC_USE_REAL_SINGLE) 2161 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2162 #elif defined(PETSC_USE___FLOAT128) 2163 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2164 #endif 2165 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2166 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2167 #else 2168 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2169 #endif 2170 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 2171 2172 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 2173 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2174 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2175 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2176 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2177 2178 /* Cleanup */ 2179 PetscCall(PetscFPrintf(comm, fd, "\n")); 2180 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 2181 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 2182 PetscCall(PetscFPTrapPop()); 2183 PetscFunctionReturn(PETSC_SUCCESS); 2184 } 2185 2186 /*@C 2187 PetscLogView - Prints a summary of the logging. 2188 2189 Collective over MPI_Comm 2190 2191 Input Parameter: 2192 . viewer - an ASCII viewer 2193 2194 Options Database Keys: 2195 + -log_view [:filename] - Prints summary of log information 2196 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2197 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2198 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2199 . -log_view_memory - Also display memory usage in each event 2200 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2201 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2202 - -log_trace [filename] - Displays a trace of what each process is doing 2203 2204 Level: beginner 2205 2206 Notes: 2207 It is possible to control the logging programmatically but we recommend using the options database approach whenever possible 2208 By default the summary is printed to stdout. 2209 2210 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2211 2212 If PETSc is configured with --with-logging=0 then this functionality is not available 2213 2214 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2215 directory then open filename.xml with your browser. Specific notes for certain browsers 2216 $ Firefox and Internet explorer - simply open the file 2217 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2218 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2219 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2220 your browser. 2221 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2222 window and render the XML log file contents. 2223 2224 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2225 2226 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2227 or using speedscope (https://www.speedscope.app). 2228 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2229 2230 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogDump()` 2231 @*/ 2232 PetscErrorCode PetscLogView(PetscViewer viewer) 2233 { 2234 PetscBool isascii; 2235 PetscViewerFormat format; 2236 int stage, lastStage; 2237 PetscStageLog stageLog; 2238 2239 PetscFunctionBegin; 2240 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2241 /* Pop off any stages the user forgot to remove */ 2242 lastStage = 0; 2243 PetscCall(PetscLogGetStageLog(&stageLog)); 2244 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2245 while (stage >= 0) { 2246 lastStage = stage; 2247 PetscCall(PetscStageLogPop(stageLog)); 2248 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2249 } 2250 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2251 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2252 PetscCall(PetscViewerGetFormat(viewer, &format)); 2253 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2254 PetscCall(PetscLogView_Default(viewer)); 2255 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2256 PetscCall(PetscLogView_Detailed(viewer)); 2257 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2258 PetscCall(PetscLogView_CSV(viewer)); 2259 } else if (format == PETSC_VIEWER_ASCII_XML) { 2260 PetscCall(PetscLogView_Nested(viewer)); 2261 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2262 PetscCall(PetscLogView_Flamegraph(viewer)); 2263 } 2264 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2265 PetscFunctionReturn(PETSC_SUCCESS); 2266 } 2267 2268 /*@C 2269 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2270 2271 Collective on `PETSC_COMM_WORLD` 2272 2273 Level: developer 2274 2275 .seealso: [](ch_profiling), `PetscLogView()` 2276 @*/ 2277 PetscErrorCode PetscLogViewFromOptions(void) 2278 { 2279 PetscViewer viewer; 2280 PetscBool flg; 2281 PetscViewerFormat format; 2282 2283 PetscFunctionBegin; 2284 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2285 if (flg) { 2286 PetscCall(PetscViewerPushFormat(viewer, format)); 2287 PetscCall(PetscLogView(viewer)); 2288 PetscCall(PetscViewerPopFormat(viewer)); 2289 PetscCall(PetscViewerDestroy(&viewer)); 2290 } 2291 PetscFunctionReturn(PETSC_SUCCESS); 2292 } 2293 2294 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2295 /*@C 2296 PetscGetFlops - Returns the number of flops used on this processor 2297 since the program began. 2298 2299 Not Collective 2300 2301 Output Parameter: 2302 . flops - number of floating point operations 2303 2304 Level: intermediate 2305 2306 Notes: 2307 A global counter logs all PETSc flop counts. The user can use 2308 `PetscLogFlops()` to increment this counter to include flops for the 2309 application code. 2310 2311 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2312 2313 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2314 @*/ 2315 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2316 { 2317 PetscFunctionBegin; 2318 *flops = petsc_TotalFlops; 2319 PetscFunctionReturn(PETSC_SUCCESS); 2320 } 2321 2322 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2323 { 2324 size_t fullLength; 2325 va_list Argp; 2326 2327 PetscFunctionBegin; 2328 if (!petsc_logObjects) PetscFunctionReturn(PETSC_SUCCESS); 2329 va_start(Argp, format); 2330 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2331 va_end(Argp); 2332 PetscFunctionReturn(PETSC_SUCCESS); 2333 } 2334 2335 /*MC 2336 PetscLogFlops - Adds floating point operations to the global counter. 2337 2338 Synopsis: 2339 #include <petsclog.h> 2340 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2341 2342 Not Collective 2343 2344 Input Parameter: 2345 . f - flop counter 2346 2347 Example Usage: 2348 .vb 2349 PetscLogEvent USER_EVENT; 2350 2351 PetscLogEventRegister("User event", 0, &USER_EVENT); 2352 PetscLogEventBegin(USER_EVENT, 0, 0, 0, 0); 2353 [code segment to monitor] 2354 PetscLogFlops(user_flops) 2355 PetscLogEventEnd(USER_EVENT, 0, 0, 0, 0); 2356 .ve 2357 2358 Level: intermediate 2359 2360 Note: 2361 A global counter logs all PETSc flop counts. The user can use PetscLogFlops() to increment 2362 this counter to include flops for the application code. 2363 2364 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2365 M*/ 2366 2367 /*MC 2368 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) to get accurate 2369 timings 2370 2371 Synopsis: 2372 #include <petsclog.h> 2373 void PetscPreLoadBegin(PetscBool flag, char *name); 2374 2375 Not Collective 2376 2377 Input Parameters: 2378 + flag - `PETSC_TRUE` to run twice, `PETSC_FALSE` to run once, may be overridden with command 2379 line option `-preload true|false` 2380 - name - name of first stage (lines of code timed separately with `-log_view`) to be preloaded 2381 2382 Example Usage: 2383 .vb 2384 PetscPreLoadBegin(PETSC_TRUE, "first stage"); 2385 // lines of code 2386 PetscPreLoadStage("second stage"); 2387 // lines of code 2388 PetscPreLoadEnd(); 2389 .ve 2390 2391 Level: intermediate 2392 2393 Note: 2394 Only works in C/C++, not Fortran 2395 2396 Flags available within the macro\: 2397 + PetscPreLoadingUsed - `PETSC_TRUE` if we are or have done preloading 2398 . PetscPreLoadingOn - `PETSC_TRUE` if it is CURRENTLY doing preload 2399 . PetscPreLoadIt - `0` for the first computation (with preloading turned off it is only 2400 `0`) `1` for the second 2401 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is 2402 turned on 2403 2404 The first two variables are available throughout the program, the second two only between the 2405 `PetscPreLoadBegin()` and `PetscPreLoadEnd()` 2406 2407 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2408 M*/ 2409 2410 /*MC 2411 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) to get accurate 2412 timings 2413 2414 Synopsis: 2415 #include <petsclog.h> 2416 void PetscPreLoadEnd(void); 2417 2418 Not Collective 2419 2420 Example Usage: 2421 .vb 2422 PetscPreLoadBegin(PETSC_TRUE, "first stage"); 2423 // lines of code 2424 PetscPreLoadStage("second stage"); 2425 // lines of code 2426 PetscPreLoadEnd(); 2427 .ve 2428 2429 Level: intermediate 2430 2431 Note: 2432 Only works in C/C++ not fortran 2433 2434 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2435 M*/ 2436 2437 /*MC 2438 PetscPreLoadStage - Start a new segment of code to be timed separately to get accurate timings 2439 2440 Synopsis: 2441 #include <petsclog.h> 2442 void PetscPreLoadStage(char *name); 2443 2444 Not Collective 2445 2446 Example Usage: 2447 .vb 2448 PetscPreLoadBegin(PETSC_TRUE,"first stage"); 2449 // lines of code 2450 PetscPreLoadStage("second stage"); 2451 // lines of code 2452 PetscPreLoadEnd(); 2453 .ve 2454 2455 Level: intermediate 2456 2457 Note: 2458 Only works in C/C++ not fortran 2459 2460 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2461 M*/ 2462 2463 #if PetscDefined(HAVE_DEVICE) 2464 #include <petsc/private/deviceimpl.h> 2465 2466 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2467 2468 /* 2469 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2470 because it will result in timing results that cannot be interpreted. 2471 */ 2472 static PetscErrorCode PetscLogGpuTime_Off(void) 2473 { 2474 PetscLogGpuTimeFlag = PETSC_FALSE; 2475 return PETSC_SUCCESS; 2476 } 2477 2478 /*@C 2479 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2480 2481 Options Database Key: 2482 . -log_view_gpu_time - provide the GPU times in the `-log_view` output 2483 2484 Level: advanced 2485 2486 Notes: 2487 Turning on the timing of the GPU kernels can slow down the entire computation and should only 2488 be used when studying the performance of operations on GPU such as vector operations and 2489 matrix-vector operations. 2490 2491 This routine should only be called once near the beginning of the program. Once it is started 2492 it cannot be turned off. 2493 2494 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2495 @*/ 2496 PetscErrorCode PetscLogGpuTime(void) 2497 { 2498 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2499 PetscLogGpuTimeFlag = PETSC_TRUE; 2500 return PETSC_SUCCESS; 2501 } 2502 2503 /*@C 2504 PetscLogGpuTimeBegin - Start timer for device 2505 2506 Level: intermediate 2507 2508 Notes: 2509 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time 2510 devoted to GPU computations (excluding kernel launch times). 2511 2512 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of 2513 time devoted to GPU computations (including kernel launch times). 2514 2515 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and 2516 `PetscLogGpuTimeEnd()` 2517 2518 This timer should NOT include times for data transfers between the GPU and CPU, nor setup 2519 actions such as allocating space. 2520 2521 The regular logging captures the time for data transfers and any CPU activities during the 2522 event. It is used to compute the flop rate on the GPU as it is actively engaged in running a 2523 kernel. 2524 2525 Developer Notes: 2526 The GPU event timer captures the execution time of all the kernels launched in the default 2527 stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2528 2529 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the 2530 default stream (stream 0). The device will record a time stamp for the event when it reaches 2531 that event in the stream. The function xxxEventSynchronize() is called in 2532 `PetsLogGpuTimeEnd()` to block CPU execution, but not continued GPU execution, until the 2533 timer event is recorded. 2534 2535 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2536 @*/ 2537 PetscErrorCode PetscLogGpuTimeBegin(void) 2538 { 2539 PetscFunctionBegin; 2540 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS); 2541 if (PetscDefined(HAVE_DEVICE)) { 2542 PetscDeviceContext dctx; 2543 2544 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2545 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2546 } else { 2547 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2548 } 2549 PetscFunctionReturn(PETSC_SUCCESS); 2550 } 2551 2552 /*@C 2553 PetscLogGpuTimeEnd - Stop timer for device 2554 2555 Level: intermediate 2556 2557 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2558 @*/ 2559 PetscErrorCode PetscLogGpuTimeEnd(void) 2560 { 2561 PetscFunctionBegin; 2562 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS); 2563 if (PetscDefined(HAVE_DEVICE)) { 2564 PetscDeviceContext dctx; 2565 PetscLogDouble elapsed; 2566 2567 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2568 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2569 petsc_gtime += (elapsed / 1000.0); 2570 } else { 2571 PetscCall(PetscTimeAdd(&petsc_gtime)); 2572 } 2573 PetscFunctionReturn(PETSC_SUCCESS); 2574 } 2575 2576 #endif /* end of PETSC_HAVE_DEVICE */ 2577 2578 #else /* end of -DPETSC_USE_LOG section */ 2579 2580 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2581 { 2582 PetscFunctionBegin; 2583 PetscFunctionReturn(PETSC_SUCCESS); 2584 } 2585 2586 #endif /* PETSC_USE_LOG*/ 2587 2588 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2589 PetscClassId PETSC_OBJECT_CLASSID = 0; 2590 2591 /*@C 2592 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2593 2594 Not Collective 2595 2596 Input Parameter: 2597 . name - The class name 2598 2599 Output Parameter: 2600 . oclass - The class id or classid 2601 2602 Level: developer 2603 2604 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2605 @*/ 2606 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) 2607 { 2608 #if defined(PETSC_USE_LOG) 2609 PetscStageLog stageLog; 2610 PetscInt stage; 2611 #endif 2612 2613 PetscFunctionBegin; 2614 *oclass = ++PETSC_LARGEST_CLASSID; 2615 #if defined(PETSC_USE_LOG) 2616 PetscCall(PetscLogGetStageLog(&stageLog)); 2617 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2618 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2619 #endif 2620 PetscFunctionReturn(PETSC_SUCCESS); 2621 } 2622 2623 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2624 #include <mpe.h> 2625 2626 PetscBool PetscBeganMPE = PETSC_FALSE; 2627 2628 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2629 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2630 2631 /*@C 2632 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files and slows the 2633 program down. 2634 2635 Collective over `PETSC_COMM_WORLD` 2636 2637 Options Database Key: 2638 . -log_mpe - Prints extensive log information 2639 2640 Level: advanced 2641 2642 Note: 2643 A related routine is `PetscLogDefaultBegin()` (with the options key `-log_view`), which is 2644 intended for production runs since it logs only flop rates and object creation (and should 2645 not significantly slow the programs). 2646 2647 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2648 `PetscLogEventDeactivate()` 2649 @*/ 2650 PetscErrorCode PetscLogMPEBegin(void) 2651 { 2652 PetscFunctionBegin; 2653 /* Do MPE initialization */ 2654 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2655 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2656 PetscCall(MPE_Init_log()); 2657 2658 PetscBeganMPE = PETSC_TRUE; 2659 } else { 2660 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2661 } 2662 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2663 PetscFunctionReturn(PETSC_SUCCESS); 2664 } 2665 2666 /*@C 2667 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2668 2669 Input Parameter: 2670 . sname - The filename to dump to 2671 2672 Collective over `PETSC_COMM_WORLD` 2673 2674 Level: advanced 2675 2676 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2677 @*/ 2678 PetscErrorCode PetscLogMPEDump(const char sname[]) 2679 { 2680 char name[PETSC_MAX_PATH_LEN]; 2681 2682 PetscFunctionBegin; 2683 if (PetscBeganMPE) { 2684 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2685 if (sname) { 2686 PetscCall(PetscStrncpy(name, sname, sizeof(name))); 2687 } else { 2688 PetscCall(PetscGetProgramName(name, sizeof(name))); 2689 } 2690 PetscCall(MPE_Finish_log(name)); 2691 } else { 2692 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2693 } 2694 PetscFunctionReturn(PETSC_SUCCESS); 2695 } 2696 2697 #define PETSC_RGB_COLORS_MAX 39 2698 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2699 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2700 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2701 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2702 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2703 2704 /*@C 2705 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2706 2707 Not collective. Maybe it should be? 2708 2709 Output Parameter: 2710 . str - character string representing the color 2711 2712 Level: developer 2713 2714 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2715 @*/ 2716 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2717 { 2718 static int idx = 0; 2719 2720 PetscFunctionBegin; 2721 *str = PetscLogMPERGBColors[idx]; 2722 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2723 PetscFunctionReturn(PETSC_SUCCESS); 2724 } 2725 2726 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2727