1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 #include <petscdevice.h> 15 #include <petsc/private/deviceimpl.h> 16 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 17 #include <../src/sys/perfstubs/timer.h> 18 #endif 19 20 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 21 22 #if defined(PETSC_USE_LOG) 23 #include <petscmachineinfo.h> 24 #include <petscconfiginfo.h> 25 26 #if defined(PETSC_HAVE_THREADSAFETY) 27 28 PetscInt petsc_log_gid = -1; /* Global threadId counter */ 29 PETSC_TLS PetscInt petsc_log_tid = -1; /* Local threadId */ 30 31 /* shared variables */ 32 PetscSpinlock PetscLogSpinLock; 33 PetscHMapEvent eventInfoMap_th = NULL; 34 35 #endif 36 37 /* used in the MPI_XXX() count macros in petsclog.h */ 38 39 /* Action and object logging variables */ 40 Action *petsc_actions = NULL; 41 Object *petsc_objects = NULL; 42 PetscBool petsc_logActions = PETSC_FALSE; 43 PetscBool petsc_logObjects = PETSC_FALSE; 44 int petsc_numActions = 0, petsc_maxActions = 100; 45 int petsc_numObjects = 0, petsc_maxObjects = 100; 46 int petsc_numObjectsDestroyed = 0; 47 48 /* Global counters */ 49 PetscLogDouble petsc_BaseTime = 0.0; 50 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 51 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 52 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 53 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 54 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 55 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 56 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 57 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 58 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 59 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 60 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 61 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 62 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 63 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 64 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 65 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 66 67 /* Thread Local storage */ 68 PETSC_TLS PetscLogDouble petsc_TotalFlops_th = 0.0; 69 PETSC_TLS PetscLogDouble petsc_send_ct_th = 0.0; 70 PETSC_TLS PetscLogDouble petsc_recv_ct_th = 0.0; 71 PETSC_TLS PetscLogDouble petsc_send_len_th = 0.0; 72 PETSC_TLS PetscLogDouble petsc_recv_len_th = 0.0; 73 PETSC_TLS PetscLogDouble petsc_isend_ct_th = 0.0; 74 PETSC_TLS PetscLogDouble petsc_irecv_ct_th = 0.0; 75 PETSC_TLS PetscLogDouble petsc_isend_len_th = 0.0; 76 PETSC_TLS PetscLogDouble petsc_irecv_len_th = 0.0; 77 PETSC_TLS PetscLogDouble petsc_wait_ct_th = 0.0; 78 PETSC_TLS PetscLogDouble petsc_wait_any_ct_th = 0.0; 79 PETSC_TLS PetscLogDouble petsc_wait_all_ct_th = 0.0; 80 PETSC_TLS PetscLogDouble petsc_sum_of_waits_ct_th = 0.0; 81 PETSC_TLS PetscLogDouble petsc_allreduce_ct_th = 0.0; 82 PETSC_TLS PetscLogDouble petsc_gather_ct_th = 0.0; 83 PETSC_TLS PetscLogDouble petsc_scatter_ct_th = 0.0; 84 85 #if defined(PETSC_HAVE_DEVICE) 86 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 87 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 88 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 89 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 90 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 91 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 92 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 93 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 94 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 95 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 96 97 PETSC_TLS PetscLogDouble petsc_ctog_ct_th = 0.0; 98 PETSC_TLS PetscLogDouble petsc_gtoc_ct_th = 0.0; 99 PETSC_TLS PetscLogDouble petsc_ctog_sz_th = 0.0; 100 PETSC_TLS PetscLogDouble petsc_gtoc_sz_th = 0.0; 101 PETSC_TLS PetscLogDouble petsc_ctog_ct_scalar_th = 0.0; 102 PETSC_TLS PetscLogDouble petsc_gtoc_ct_scalar_th = 0.0; 103 PETSC_TLS PetscLogDouble petsc_ctog_sz_scalar_th = 0.0; 104 PETSC_TLS PetscLogDouble petsc_gtoc_sz_scalar_th = 0.0; 105 PETSC_TLS PetscLogDouble petsc_gflops_th = 0.0; 106 PETSC_TLS PetscLogDouble petsc_gtime_th = 0.0; 107 #endif 108 109 #if defined(PETSC_HAVE_THREADSAFETY) 110 PetscErrorCode PetscAddLogDouble(PetscLogDouble *tot, PetscLogDouble *tot_th, PetscLogDouble tmp) 111 { 112 *tot_th += tmp; 113 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 114 *tot += tmp; 115 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 116 return PETSC_SUCCESS; 117 } 118 119 PetscErrorCode PetscAddLogDoubleCnt(PetscLogDouble *cnt, PetscLogDouble *tot, PetscLogDouble *cnt_th, PetscLogDouble *tot_th, PetscLogDouble tmp) 120 { 121 *cnt_th = *cnt_th + 1; 122 *tot_th += tmp; 123 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 124 *tot += (PetscLogDouble)(tmp); 125 *cnt += *cnt + 1; 126 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 127 return PETSC_SUCCESS; 128 } 129 130 PetscInt PetscLogGetTid(void) 131 { 132 if (petsc_log_tid < 0) { 133 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 134 petsc_log_tid = ++petsc_log_gid; 135 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 136 } 137 return petsc_log_tid; 138 } 139 140 #endif 141 142 /* Logging functions */ 143 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 144 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 145 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 146 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 147 148 /* Tracing event logging variables */ 149 FILE *petsc_tracefile = NULL; 150 int petsc_tracelevel = 0; 151 const char *petsc_traceblanks = " "; 152 char petsc_tracespace[128] = " "; 153 PetscLogDouble petsc_tracetime = 0.0; 154 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 155 156 static PetscIntStack current_log_event_stack = NULL; 157 158 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 159 { 160 int stage; 161 PetscBool opt; 162 163 PetscFunctionBegin; 164 if (PetscLogInitializeCalled) PetscFunctionReturn(PETSC_SUCCESS); 165 PetscLogInitializeCalled = PETSC_TRUE; 166 167 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 168 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 169 if (opt) petsc_logActions = PETSC_FALSE; 170 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 171 if (opt) petsc_logObjects = PETSC_FALSE; 172 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 173 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 174 PetscLogPHC = PetscLogObjCreateDefault; 175 PetscLogPHD = PetscLogObjDestroyDefault; 176 /* Setup default logging structures */ 177 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 178 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 179 180 PetscCall(PetscSpinlockCreate(&PetscLogSpinLock)); 181 #if defined(PETSC_HAVE_THREADSAFETY) 182 petsc_log_tid = 0; 183 petsc_log_gid = 0; 184 PetscCall(PetscHMapEventCreate(&eventInfoMap_th)); 185 #endif 186 187 /* All processors sync here for more consistent logging */ 188 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 189 PetscCall(PetscTime(&petsc_BaseTime)); 190 PetscCall(PetscLogStagePush(stage)); 191 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 192 PetscStackCallExternalVoid("ps_initialize_", ps_initialize_()); 193 #endif 194 PetscFunctionReturn(PETSC_SUCCESS); 195 } 196 197 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 198 { 199 PetscStageLog stageLog; 200 201 PetscFunctionBegin; 202 #if defined(PETSC_HAVE_THREADSAFETY) 203 if (eventInfoMap_th) { 204 PetscEventPerfInfo **array; 205 PetscInt n, off = 0; 206 207 PetscCall(PetscHMapEventGetSize(eventInfoMap_th, &n)); 208 PetscCall(PetscMalloc1(n, &array)); 209 PetscCall(PetscHMapEventGetVals(eventInfoMap_th, &off, array)); 210 for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i])); 211 PetscCall(PetscFree(array)); 212 PetscCall(PetscHMapEventDestroy(&eventInfoMap_th)); 213 } 214 #endif 215 PetscCall(PetscFree(petsc_actions)); 216 PetscCall(PetscFree(petsc_objects)); 217 PetscCall(PetscLogNestedEnd()); 218 PetscCall(PetscLogSet(NULL, NULL)); 219 220 /* Resetting phase */ 221 PetscCall(PetscLogGetStageLog(&stageLog)); 222 PetscCall(PetscStageLogDestroy(stageLog)); 223 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 224 current_log_event_stack = NULL; 225 226 petsc_TotalFlops = 0.0; 227 petsc_numActions = 0; 228 petsc_numObjects = 0; 229 petsc_numObjectsDestroyed = 0; 230 petsc_maxActions = 100; 231 petsc_maxObjects = 100; 232 petsc_actions = NULL; 233 petsc_objects = NULL; 234 petsc_logActions = PETSC_FALSE; 235 petsc_logObjects = PETSC_FALSE; 236 petsc_BaseTime = 0.0; 237 petsc_TotalFlops = 0.0; 238 petsc_send_ct = 0.0; 239 petsc_recv_ct = 0.0; 240 petsc_send_len = 0.0; 241 petsc_recv_len = 0.0; 242 petsc_isend_ct = 0.0; 243 petsc_irecv_ct = 0.0; 244 petsc_isend_len = 0.0; 245 petsc_irecv_len = 0.0; 246 petsc_wait_ct = 0.0; 247 petsc_wait_any_ct = 0.0; 248 petsc_wait_all_ct = 0.0; 249 petsc_sum_of_waits_ct = 0.0; 250 petsc_allreduce_ct = 0.0; 251 petsc_gather_ct = 0.0; 252 petsc_scatter_ct = 0.0; 253 petsc_TotalFlops_th = 0.0; 254 petsc_send_ct_th = 0.0; 255 petsc_recv_ct_th = 0.0; 256 petsc_send_len_th = 0.0; 257 petsc_recv_len_th = 0.0; 258 petsc_isend_ct_th = 0.0; 259 petsc_irecv_ct_th = 0.0; 260 petsc_isend_len_th = 0.0; 261 petsc_irecv_len_th = 0.0; 262 petsc_wait_ct_th = 0.0; 263 petsc_wait_any_ct_th = 0.0; 264 petsc_wait_all_ct_th = 0.0; 265 petsc_sum_of_waits_ct_th = 0.0; 266 petsc_allreduce_ct_th = 0.0; 267 petsc_gather_ct_th = 0.0; 268 petsc_scatter_ct_th = 0.0; 269 270 #if defined(PETSC_HAVE_DEVICE) 271 petsc_ctog_ct = 0.0; 272 petsc_gtoc_ct = 0.0; 273 petsc_ctog_sz = 0.0; 274 petsc_gtoc_sz = 0.0; 275 petsc_gflops = 0.0; 276 petsc_gtime = 0.0; 277 petsc_ctog_ct_th = 0.0; 278 petsc_gtoc_ct_th = 0.0; 279 petsc_ctog_sz_th = 0.0; 280 petsc_gtoc_sz_th = 0.0; 281 petsc_gflops_th = 0.0; 282 petsc_gtime_th = 0.0; 283 #endif 284 285 PETSC_LARGEST_EVENT = PETSC_EVENT; 286 PetscLogPHC = NULL; 287 PetscLogPHD = NULL; 288 petsc_tracefile = NULL; 289 petsc_tracelevel = 0; 290 petsc_traceblanks = " "; 291 petsc_tracespace[0] = ' '; 292 petsc_tracespace[1] = 0; 293 petsc_tracetime = 0.0; 294 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 295 PETSC_OBJECT_CLASSID = 0; 296 petsc_stageLog = NULL; 297 PetscLogInitializeCalled = PETSC_FALSE; 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 /*@C 302 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 303 304 Not Collective 305 306 Input Parameters: 307 + b - The function called at beginning of event 308 - e - The function called at end of event 309 310 Level: developer 311 312 Developer Note: 313 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 314 315 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 316 @*/ 317 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 318 { 319 PetscFunctionBegin; 320 PetscLogPLB = b; 321 PetscLogPLE = e; 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 /*@C 326 PetscLogIsActive - Check if logging is currently in progress. 327 328 Not Collective 329 330 Output Parameter: 331 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 332 333 Level: beginner 334 335 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 336 @*/ 337 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 338 { 339 PetscFunctionBegin; 340 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 /*@C 345 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 346 rates and object creation and should not slow programs down too much. 347 This routine may be called more than once. 348 349 Logically Collective over `PETSC_COMM_WORLD` 350 351 Options Database Key: 352 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 353 screen (for code configured with --with-log=1 (which is the default)) 354 355 Usage: 356 .vb 357 PetscInitialize(...); 358 PetscLogDefaultBegin(); 359 ... code ... 360 PetscLogView(viewer); or PetscLogDump(); 361 PetscFinalize(); 362 .ve 363 364 Level: advanced 365 366 Note: 367 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 368 the logging information. 369 370 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 371 @*/ 372 PetscErrorCode PetscLogDefaultBegin(void) 373 { 374 PetscFunctionBegin; 375 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 376 PetscFunctionReturn(PETSC_SUCCESS); 377 } 378 379 /*@C 380 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 381 all events. This creates large log files and slows the program down. 382 383 Logically Collective on `PETSC_COMM_WORLD` 384 385 Options Database Key: 386 . -log_all - Prints extensive log information 387 388 Usage: 389 .vb 390 PetscInitialize(...); 391 PetscLogAllBegin(); 392 ... code ... 393 PetscLogDump(filename); 394 PetscFinalize(); 395 .ve 396 397 Level: advanced 398 399 Note: 400 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 401 intended for production runs since it logs only flop rates and object 402 creation (and shouldn't significantly slow the programs). 403 404 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 405 @*/ 406 PetscErrorCode PetscLogAllBegin(void) 407 { 408 PetscFunctionBegin; 409 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 410 PetscFunctionReturn(PETSC_SUCCESS); 411 } 412 413 /*@C 414 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 415 begins or ends, the event name is printed. 416 417 Logically Collective on `PETSC_COMM_WORLD` 418 419 Input Parameter: 420 . file - The file to print trace in (e.g. stdout) 421 422 Options Database Key: 423 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 424 425 Level: intermediate 426 427 Notes: 428 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 429 then "Event begin:" or "Event end:" followed by the event name. 430 431 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 432 to determine where a program is hanging without running in the 433 debugger. Can be used in conjunction with the -info option. 434 435 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 436 @*/ 437 PetscErrorCode PetscLogTraceBegin(FILE *file) 438 { 439 PetscFunctionBegin; 440 petsc_tracefile = file; 441 442 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 443 PetscFunctionReturn(PETSC_SUCCESS); 444 } 445 446 /*@ 447 PetscLogActions - Determines whether actions are logged for the graphical viewer. 448 449 Not Collective 450 451 Input Parameter: 452 . flag - `PETSC_TRUE` if actions are to be logged 453 454 Options Database Key: 455 . -log_exclude_actions - Turns off actions logging 456 457 Level: intermediate 458 459 Note: 460 Logging of actions continues to consume more memory as the program 461 runs. Long running programs should consider turning this feature off. 462 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 463 @*/ 464 PetscErrorCode PetscLogActions(PetscBool flag) 465 { 466 PetscFunctionBegin; 467 petsc_logActions = flag; 468 PetscFunctionReturn(PETSC_SUCCESS); 469 } 470 471 /*@ 472 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 473 474 Not Collective 475 476 Input Parameter: 477 . flag - `PETSC_TRUE` if objects are to be logged 478 479 Options Database Key: 480 . -log_exclude_objects - Turns off objects logging 481 482 Level: intermediate 483 484 Note: 485 Logging of objects continues to consume more memory as the program 486 runs. Long running programs should consider turning this feature off. 487 488 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 489 @*/ 490 PetscErrorCode PetscLogObjects(PetscBool flag) 491 { 492 PetscFunctionBegin; 493 petsc_logObjects = flag; 494 PetscFunctionReturn(PETSC_SUCCESS); 495 } 496 497 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 498 /*@C 499 PetscLogStageRegister - Attaches a character string name to a logging stage. 500 501 Not Collective 502 503 Input Parameter: 504 . sname - The name to associate with that stage 505 506 Output Parameter: 507 . stage - The stage number 508 509 Level: intermediate 510 511 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 512 @*/ 513 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) 514 { 515 PetscStageLog stageLog; 516 PetscLogEvent event; 517 518 PetscFunctionBegin; 519 PetscCall(PetscLogGetStageLog(&stageLog)); 520 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 521 /* Copy events already changed in the main stage, this sucks */ 522 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 523 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 524 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 525 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 526 if (perfstubs_initialized == PERFSTUBS_SUCCESS) PetscStackCallExternalVoid("ps_timer_create_", stageLog->stageInfo[*stage].timer = ps_timer_create_(sname)); 527 #endif 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 /*@C 532 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 533 534 Not Collective 535 536 Input Parameter: 537 . stage - The stage on which to log 538 539 Usage: 540 If the option -log_view is used to run the program containing the 541 following code, then 2 sets of summary data will be printed during 542 PetscFinalize(). 543 .vb 544 PetscInitialize(int *argc,char ***args,0,0); 545 [stage 0 of code] 546 PetscLogStagePush(1); 547 [stage 1 of code] 548 PetscLogStagePop(); 549 PetscBarrier(...); 550 [more stage 0 of code] 551 PetscFinalize(); 552 .ve 553 554 Level: intermediate 555 556 Note: 557 Use `PetscLogStageRegister()` to register a stage. 558 559 .seealso: [](ch_profiling), `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 560 @*/ 561 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 562 { 563 PetscStageLog stageLog; 564 565 PetscFunctionBegin; 566 PetscCall(PetscLogGetStageLog(&stageLog)); 567 PetscCall(PetscStageLogPush(stageLog, stage)); 568 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 569 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stage].timer != NULL) PetscStackCallExternalVoid("ps_timer_start_", ps_timer_start_(stageLog->stageInfo[stage].timer)); 570 #endif 571 PetscFunctionReturn(PETSC_SUCCESS); 572 } 573 574 /*@C 575 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 576 577 Not Collective 578 579 Usage: 580 If the option -log_view is used to run the program containing the 581 following code, then 2 sets of summary data will be printed during 582 PetscFinalize(). 583 .vb 584 PetscInitialize(int *argc,char ***args,0,0); 585 [stage 0 of code] 586 PetscLogStagePush(1); 587 [stage 1 of code] 588 PetscLogStagePop(); 589 PetscBarrier(...); 590 [more stage 0 of code] 591 PetscFinalize(); 592 .ve 593 594 Level: intermediate 595 596 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 597 @*/ 598 PetscErrorCode PetscLogStagePop(void) 599 { 600 PetscStageLog stageLog; 601 602 PetscFunctionBegin; 603 PetscCall(PetscLogGetStageLog(&stageLog)); 604 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 605 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stageLog->curStage].timer != NULL) PetscStackCallExternalVoid("ps_timer_stop_", ps_timer_stop_(stageLog->stageInfo[stageLog->curStage].timer)); 606 #endif 607 PetscCall(PetscStageLogPop(stageLog)); 608 PetscFunctionReturn(PETSC_SUCCESS); 609 } 610 611 /*@ 612 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 613 614 Not Collective 615 616 Input Parameters: 617 + stage - The stage 618 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 619 620 Level: intermediate 621 622 Note: 623 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 624 625 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 626 @*/ 627 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 628 { 629 PetscStageLog stageLog; 630 631 PetscFunctionBegin; 632 PetscCall(PetscLogGetStageLog(&stageLog)); 633 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 634 PetscFunctionReturn(PETSC_SUCCESS); 635 } 636 637 /*@ 638 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 639 640 Not Collective 641 642 Input Parameter: 643 . stage - The stage 644 645 Output Parameter: 646 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 647 648 Level: intermediate 649 650 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 651 @*/ 652 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 653 { 654 PetscStageLog stageLog; 655 656 PetscFunctionBegin; 657 PetscCall(PetscLogGetStageLog(&stageLog)); 658 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 659 PetscFunctionReturn(PETSC_SUCCESS); 660 } 661 662 /*@ 663 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 664 665 Not Collective 666 667 Input Parameters: 668 + stage - The stage 669 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 670 671 Level: intermediate 672 673 Developer Note: 674 What does visible mean, needs to be documented. 675 676 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 677 @*/ 678 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 679 { 680 PetscStageLog stageLog; 681 682 PetscFunctionBegin; 683 PetscCall(PetscLogGetStageLog(&stageLog)); 684 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 685 PetscFunctionReturn(PETSC_SUCCESS); 686 } 687 688 /*@ 689 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 690 691 Not Collective 692 693 Input Parameter: 694 . stage - The stage 695 696 Output Parameter: 697 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 698 699 Level: intermediate 700 701 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 702 @*/ 703 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 704 { 705 PetscStageLog stageLog; 706 707 PetscFunctionBegin; 708 PetscCall(PetscLogGetStageLog(&stageLog)); 709 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 710 PetscFunctionReturn(PETSC_SUCCESS); 711 } 712 713 /*@C 714 PetscLogStageGetId - Returns the stage id when given the stage name. 715 716 Not Collective 717 718 Input Parameter: 719 . name - The stage name 720 721 Output Parameter: 722 . stage - The stage, , or -1 if no stage with that name exists 723 724 Level: intermediate 725 726 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 727 @*/ 728 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 729 { 730 PetscStageLog stageLog; 731 732 PetscFunctionBegin; 733 PetscCall(PetscLogGetStageLog(&stageLog)); 734 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 735 PetscFunctionReturn(PETSC_SUCCESS); 736 } 737 738 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 739 740 /*@C 741 PetscLogEventRegister - Registers an event name for logging operations 742 743 Not Collective 744 745 Input Parameters: 746 + name - The name associated with the event 747 - classid - The classid associated to the class for this event, obtain either with 748 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 749 are only available in C code 750 751 Output Parameter: 752 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 753 754 Example of Usage: 755 .vb 756 PetscLogEvent USER_EVENT; 757 PetscClassId classid; 758 PetscLogDouble user_event_flops; 759 PetscClassIdRegister("class name",&classid); 760 PetscLogEventRegister("User event name",classid,&USER_EVENT); 761 PetscLogEventBegin(USER_EVENT,0,0,0,0); 762 [code segment to monitor] 763 PetscLogFlops(user_event_flops); 764 PetscLogEventEnd(USER_EVENT,0,0,0,0); 765 .ve 766 767 Level: intermediate 768 769 Notes: 770 PETSc automatically logs library events if the code has been 771 configured with --with-log (which is the default) and 772 -log_view or -log_all is specified. `PetscLogEventRegister()` is 773 intended for logging user events to supplement this PETSc 774 information. 775 776 PETSc can gather data for use with the utilities Jumpshot 777 (part of the MPICH distribution). If PETSc has been compiled 778 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 779 MPICH), the user can employ another command line option, -log_mpe, 780 to create a logfile, "mpe.log", which can be visualized 781 Jumpshot. 782 783 The classid is associated with each event so that classes of events 784 can be disabled simultaneously, such as all matrix events. The user 785 can either use an existing classid, such as `MAT_CLASSID`, or create 786 their own as shown in the example. 787 788 If an existing event with the same name exists, its event handle is 789 returned instead of creating a new event. 790 791 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 792 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 793 @*/ 794 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) 795 { 796 PetscStageLog stageLog; 797 int stage; 798 799 PetscFunctionBegin; 800 *event = PETSC_DECIDE; 801 PetscCall(PetscLogGetStageLog(&stageLog)); 802 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 803 if (*event > 0) PetscFunctionReturn(PETSC_SUCCESS); 804 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 805 for (stage = 0; stage < stageLog->numStages; stage++) { 806 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 807 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 808 } 809 PetscFunctionReturn(PETSC_SUCCESS); 810 } 811 812 /*@ 813 PetscLogEventSetCollective - Indicates that a particular event is collective. 814 815 Not Collective 816 817 Input Parameters: 818 + event - The event id 819 - collective - Boolean flag indicating whether a particular event is collective 820 821 Level: developer 822 823 Notes: 824 New events returned from `PetscLogEventRegister()` are collective by default. 825 826 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 827 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 828 to be performed. This option is useful to debug imbalance within the computations or communications 829 830 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 831 @*/ 832 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) 833 { 834 PetscStageLog stageLog; 835 PetscEventRegLog eventRegLog; 836 837 PetscFunctionBegin; 838 PetscCall(PetscLogGetStageLog(&stageLog)); 839 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 840 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 841 eventRegLog->eventInfo[event].collective = collective; 842 PetscFunctionReturn(PETSC_SUCCESS); 843 } 844 845 /*@ 846 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 847 848 Not Collective 849 850 Input Parameter: 851 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 852 853 Level: developer 854 855 .seealso: [](ch_profiling), `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 856 @*/ 857 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 858 { 859 PetscStageLog stageLog; 860 int stage; 861 862 PetscFunctionBegin; 863 PetscCall(PetscLogGetStageLog(&stageLog)); 864 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 865 PetscFunctionReturn(PETSC_SUCCESS); 866 } 867 868 /*@ 869 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 870 871 Not Collective 872 873 Input Parameter: 874 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 875 876 Level: developer 877 878 Note: 879 If a class is excluded then events associated with that class are not logged. 880 881 .seealso: [](ch_profiling), `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 882 @*/ 883 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 884 { 885 PetscStageLog stageLog; 886 int stage; 887 888 PetscFunctionBegin; 889 PetscCall(PetscLogGetStageLog(&stageLog)); 890 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 891 PetscFunctionReturn(PETSC_SUCCESS); 892 } 893 894 /*@ 895 PetscLogEventActivate - Indicates that a particular event should be logged. 896 897 Not Collective 898 899 Input Parameter: 900 . event - The event id 901 902 Usage: 903 .vb 904 PetscLogEventDeactivate(VEC_SetValues); 905 [code where you do not want to log VecSetValues()] 906 PetscLogEventActivate(VEC_SetValues); 907 [code where you do want to log VecSetValues()] 908 .ve 909 910 Level: advanced 911 912 Note: 913 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 914 or an event number obtained with `PetscLogEventRegister()`. 915 916 .seealso: [](ch_profiling), `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 917 @*/ 918 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 919 { 920 PetscStageLog stageLog; 921 int stage; 922 923 PetscFunctionBegin; 924 PetscCall(PetscLogGetStageLog(&stageLog)); 925 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 926 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } 929 930 /*@ 931 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 932 933 Not Collective 934 935 Input Parameter: 936 . event - The event id 937 938 Usage: 939 .vb 940 PetscLogEventDeactivate(VEC_SetValues); 941 [code where you do not want to log VecSetValues()] 942 PetscLogEventActivate(VEC_SetValues); 943 [code where you do want to log VecSetValues()] 944 .ve 945 946 Level: advanced 947 948 Note: 949 The event may be either a pre-defined PETSc event (found in 950 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 951 952 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 953 @*/ 954 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 955 { 956 PetscStageLog stageLog; 957 int stage; 958 959 PetscFunctionBegin; 960 PetscCall(PetscLogGetStageLog(&stageLog)); 961 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 962 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 963 PetscFunctionReturn(PETSC_SUCCESS); 964 } 965 966 /*@ 967 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 968 969 Not Collective 970 971 Input Parameter: 972 . event - The event id 973 974 Usage: 975 .vb 976 PetscLogEventDeactivatePush(VEC_SetValues); 977 [code where you do not want to log VecSetValues()] 978 PetscLogEventDeactivatePop(VEC_SetValues); 979 [code where you do want to log VecSetValues()] 980 .ve 981 982 Level: advanced 983 984 Note: 985 The event may be either a pre-defined PETSc event (found in 986 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 987 988 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 989 @*/ 990 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 991 { 992 PetscStageLog stageLog; 993 int stage; 994 995 PetscFunctionBegin; 996 PetscCall(PetscLogGetStageLog(&stageLog)); 997 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 998 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 999 PetscFunctionReturn(PETSC_SUCCESS); 1000 } 1001 1002 /*@ 1003 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 1004 1005 Not Collective 1006 1007 Input Parameter: 1008 . event - The event id 1009 1010 Usage: 1011 .vb 1012 PetscLogEventDeactivatePush(VEC_SetValues); 1013 [code where you do not want to log VecSetValues()] 1014 PetscLogEventDeactivatePop(VEC_SetValues); 1015 [code where you do want to log VecSetValues()] 1016 .ve 1017 1018 Level: advanced 1019 1020 Note: 1021 The event may be either a pre-defined PETSc event (found in 1022 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 1023 1024 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 1025 @*/ 1026 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 1027 { 1028 PetscStageLog stageLog; 1029 int stage; 1030 1031 PetscFunctionBegin; 1032 PetscCall(PetscLogGetStageLog(&stageLog)); 1033 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1034 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } 1037 1038 /*@ 1039 PetscLogEventSetActiveAll - Turns on logging of all events 1040 1041 Not Collective 1042 1043 Input Parameters: 1044 + event - The event id 1045 - isActive - The activity flag determining whether the event is logged 1046 1047 Level: advanced 1048 1049 .seealso: [](ch_profiling), `PlogEventActivate()`, `PlogEventDeactivate()` 1050 @*/ 1051 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 1052 { 1053 PetscStageLog stageLog; 1054 int stage; 1055 1056 PetscFunctionBegin; 1057 PetscCall(PetscLogGetStageLog(&stageLog)); 1058 for (stage = 0; stage < stageLog->numStages; stage++) { 1059 if (isActive) { 1060 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 1061 } else { 1062 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 1063 } 1064 } 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 /*@ 1069 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 1070 1071 Not Collective 1072 1073 Input Parameter: 1074 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 1075 1076 Level: developer 1077 1078 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1079 @*/ 1080 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 1081 { 1082 PetscStageLog stageLog; 1083 int stage; 1084 1085 PetscFunctionBegin; 1086 PetscCall(PetscLogGetStageLog(&stageLog)); 1087 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1088 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1089 PetscFunctionReturn(PETSC_SUCCESS); 1090 } 1091 1092 /*@ 1093 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 1094 1095 Not Collective 1096 1097 Input Parameter: 1098 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 1099 1100 Level: developer 1101 1102 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1103 @*/ 1104 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 1105 { 1106 PetscStageLog stageLog; 1107 int stage; 1108 1109 PetscFunctionBegin; 1110 PetscCall(PetscLogGetStageLog(&stageLog)); 1111 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1112 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1113 PetscFunctionReturn(PETSC_SUCCESS); 1114 } 1115 1116 /*MC 1117 PetscLogEventSync - Synchronizes the beginning of a user event. 1118 1119 Synopsis: 1120 #include <petsclog.h> 1121 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 1122 1123 Collective 1124 1125 Input Parameters: 1126 + e - integer associated with the event obtained from PetscLogEventRegister() 1127 - comm - an MPI communicator 1128 1129 Usage: 1130 .vb 1131 PetscLogEvent USER_EVENT; 1132 PetscLogEventRegister("User event",0,&USER_EVENT); 1133 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1134 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1135 [code segment to monitor] 1136 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1137 .ve 1138 1139 Level: developer 1140 1141 Note: 1142 This routine should be called only if there is not a 1143 `PetscObject` available to pass to `PetscLogEventBegin()`. 1144 1145 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1146 M*/ 1147 1148 /*MC 1149 PetscLogEventBegin - Logs the beginning of a user event. 1150 1151 Synopsis: 1152 #include <petsclog.h> 1153 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1154 1155 Not Collective 1156 1157 Input Parameters: 1158 + e - integer associated with the event obtained from PetscLogEventRegister() 1159 - o1,o2,o3,o4 - objects associated with the event, or 0 1160 1161 Fortran Synopsis: 1162 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1163 1164 Usage: 1165 .vb 1166 PetscLogEvent USER_EVENT; 1167 PetscLogDouble user_event_flops; 1168 PetscLogEventRegister("User event",0,&USER_EVENT); 1169 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1170 [code segment to monitor] 1171 PetscLogFlops(user_event_flops); 1172 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1173 .ve 1174 1175 Level: intermediate 1176 1177 Developer Note: 1178 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the 1179 errors that occur in the macro directly because other packages that use this macros have used them in their 1180 own functions or methods that do not return error codes and it would be disruptive to change the current 1181 behavior. 1182 1183 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1184 M*/ 1185 1186 /*MC 1187 PetscLogEventEnd - Log the end of a user event. 1188 1189 Synopsis: 1190 #include <petsclog.h> 1191 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1192 1193 Not Collective 1194 1195 Input Parameters: 1196 + e - integer associated with the event obtained with PetscLogEventRegister() 1197 - o1,o2,o3,o4 - objects associated with the event, or 0 1198 1199 Fortran Synopsis: 1200 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1201 1202 Usage: 1203 .vb 1204 PetscLogEvent USER_EVENT; 1205 PetscLogDouble user_event_flops; 1206 PetscLogEventRegister("User event",0,&USER_EVENT,); 1207 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1208 [code segment to monitor] 1209 PetscLogFlops(user_event_flops); 1210 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1211 .ve 1212 1213 Level: intermediate 1214 1215 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1216 M*/ 1217 1218 /*@C 1219 PetscLogEventGetId - Returns the event id when given the event name. 1220 1221 Not Collective 1222 1223 Input Parameter: 1224 . name - The event name 1225 1226 Output Parameter: 1227 . event - The event, or -1 if no event with that name exists 1228 1229 Level: intermediate 1230 1231 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1232 @*/ 1233 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1234 { 1235 PetscStageLog stageLog; 1236 1237 PetscFunctionBegin; 1238 PetscCall(PetscLogGetStageLog(&stageLog)); 1239 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1240 PetscFunctionReturn(PETSC_SUCCESS); 1241 } 1242 1243 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) 1244 { 1245 PetscFunctionBegin; 1246 if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1247 PetscFunctionReturn(PETSC_SUCCESS); 1248 } 1249 1250 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) 1251 { 1252 PetscFunctionBegin; 1253 if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1254 PetscFunctionReturn(PETSC_SUCCESS); 1255 } 1256 1257 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) 1258 { 1259 PetscBool empty; 1260 1261 PetscFunctionBegin; 1262 PetscValidIntPointer(event, 1); 1263 *event = PETSC_DECIDE; 1264 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1265 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1266 PetscFunctionReturn(PETSC_SUCCESS); 1267 } 1268 1269 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) 1270 { 1271 PetscFunctionBegin; 1272 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1273 PetscFunctionReturn(PETSC_SUCCESS); 1274 } 1275 1276 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) 1277 { 1278 PetscStageLog stageLog; 1279 PetscEventPerfLog eventLog; 1280 int stage; 1281 1282 PetscFunctionBegin; 1283 if (event == PETSC_DECIDE) PetscFunctionReturn(PETSC_SUCCESS); 1284 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1285 PetscCall(PetscLogGetStageLog(&stageLog)); 1286 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1287 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1288 eventLog->eventInfo[event].count--; 1289 PetscFunctionReturn(PETSC_SUCCESS); 1290 } 1291 1292 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1293 /*@C 1294 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1295 be read by bin/petscview. This program no longer exists. 1296 1297 Collective on `PETSC_COMM_WORLD` 1298 1299 Input Parameter: 1300 . name - an optional file name 1301 1302 Usage: 1303 .vb 1304 PetscInitialize(...); 1305 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1306 ... code ... 1307 PetscLogDump(filename); 1308 PetscFinalize(); 1309 .ve 1310 1311 Level: advanced 1312 1313 Note: 1314 The default file name is 1315 $ Log.<rank> 1316 where <rank> is the processor number. If no name is specified, 1317 this file will be used. 1318 1319 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1320 @*/ 1321 PetscErrorCode PetscLogDump(const char sname[]) 1322 { 1323 PetscStageLog stageLog; 1324 PetscEventPerfInfo *eventInfo; 1325 FILE *fd; 1326 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1327 PetscLogDouble flops, _TotalTime; 1328 PetscMPIInt rank; 1329 int action, object, curStage; 1330 PetscLogEvent event; 1331 1332 PetscFunctionBegin; 1333 /* Calculate the total elapsed time */ 1334 PetscCall(PetscTime(&_TotalTime)); 1335 _TotalTime -= petsc_BaseTime; 1336 /* Open log file */ 1337 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1338 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 1339 PetscCall(PetscFixFilename(file, fname)); 1340 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1341 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1342 /* Output totals */ 1343 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1344 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1345 /* Output actions */ 1346 if (petsc_logActions) { 1347 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1348 for (action = 0; action < petsc_numActions; action++) { 1349 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1350 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1351 } 1352 } 1353 /* Output objects */ 1354 if (petsc_logObjects) { 1355 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1356 for (object = 0; object < petsc_numObjects; object++) { 1357 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1358 if (!petsc_objects[object].name[0]) { 1359 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1360 } else { 1361 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1362 } 1363 if (petsc_objects[object].info[0] != 0) { 1364 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1365 } else { 1366 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1367 } 1368 } 1369 } 1370 /* Output events */ 1371 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1372 PetscCall(PetscLogGetStageLog(&stageLog)); 1373 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1374 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1375 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1376 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1377 else flops = 0.0; 1378 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1379 } 1380 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1381 PetscFunctionReturn(PETSC_SUCCESS); 1382 } 1383 1384 /* 1385 PetscLogView_Detailed - Each process prints the times for its own events 1386 1387 */ 1388 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1389 { 1390 PetscStageLog stageLog; 1391 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1392 PetscLogDouble locTotalTime, numRed, maxMem; 1393 int numStages, numEvents, stage, event; 1394 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1395 PetscMPIInt rank, size; 1396 1397 PetscFunctionBegin; 1398 PetscCallMPI(MPI_Comm_size(comm, &size)); 1399 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1400 /* Must preserve reduction count before we go on */ 1401 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1402 /* Get the total elapsed time */ 1403 PetscCall(PetscTime(&locTotalTime)); 1404 locTotalTime -= petsc_BaseTime; 1405 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1406 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1407 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1408 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1409 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1410 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1411 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1412 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1413 PetscCall(PetscLogGetStageLog(&stageLog)); 1414 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1415 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1416 for (stage = 0; stage < numStages; stage++) { 1417 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1418 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1419 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1420 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1421 } 1422 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1423 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1424 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1425 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1426 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1427 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1428 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1429 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1430 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1431 PetscCall(PetscViewerFlush(viewer)); 1432 for (stage = 0; stage < numStages; stage++) { 1433 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1434 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1435 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1436 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1437 for (event = 0; event < numEvents; event++) { 1438 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1439 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1440 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1441 eventInfo->flops)); 1442 if (eventInfo->dof[0] >= 0.) { 1443 PetscInt d, e; 1444 1445 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1446 for (d = 0; d < 8; ++d) { 1447 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1448 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1449 } 1450 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1451 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1452 for (e = 0; e < 8; ++e) { 1453 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1454 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1455 } 1456 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1457 } 1458 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1459 } 1460 } 1461 PetscCall(PetscViewerFlush(viewer)); 1462 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 /* 1467 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1468 */ 1469 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1470 { 1471 PetscStageLog stageLog; 1472 PetscEventPerfInfo *eventInfo = NULL; 1473 PetscLogDouble locTotalTime, maxMem; 1474 int numStages, numEvents, stage, event; 1475 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1476 PetscMPIInt rank, size; 1477 1478 PetscFunctionBegin; 1479 PetscCallMPI(MPI_Comm_size(comm, &size)); 1480 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1481 /* Must preserve reduction count before we go on */ 1482 /* Get the total elapsed time */ 1483 PetscCall(PetscTime(&locTotalTime)); 1484 locTotalTime -= petsc_BaseTime; 1485 PetscCall(PetscLogGetStageLog(&stageLog)); 1486 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1487 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1488 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1489 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1490 PetscCall(PetscViewerFlush(viewer)); 1491 for (stage = 0; stage < numStages; stage++) { 1492 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1493 1494 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1495 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1496 for (event = 0; event < numEvents; event++) { 1497 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1498 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1499 eventInfo->numReductions, eventInfo->flops)); 1500 if (eventInfo->dof[0] >= 0.) { 1501 PetscInt d, e; 1502 1503 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1504 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1505 } 1506 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1507 } 1508 } 1509 PetscCall(PetscViewerFlush(viewer)); 1510 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1511 PetscFunctionReturn(PETSC_SUCCESS); 1512 } 1513 1514 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1515 { 1516 PetscFunctionBegin; 1517 if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS); 1518 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1519 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1520 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1521 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1522 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1523 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1524 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1525 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1526 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1527 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1528 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1529 PetscFunctionReturn(PETSC_SUCCESS); 1530 } 1531 1532 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1533 { 1534 PetscFunctionBegin; 1535 if (PetscDefined(USE_DEBUG)) { 1536 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1537 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1538 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1539 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1540 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1541 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1542 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1543 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1544 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1545 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1546 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1547 } 1548 PetscFunctionReturn(PETSC_SUCCESS); 1549 } 1550 1551 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1552 { 1553 #if defined(PETSC_HAVE_DEVICE) 1554 PetscMPIInt size; 1555 PetscBool deviceInitialized = PETSC_FALSE; 1556 1557 PetscFunctionBegin; 1558 PetscCallMPI(MPI_Comm_size(comm, &size)); 1559 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1560 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1561 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1562 deviceInitialized = PETSC_TRUE; 1563 break; 1564 } 1565 } 1566 /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1567 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS); 1568 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1569 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1570 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1571 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1572 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1573 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1574 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1575 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1576 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1577 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1578 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1579 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1580 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1581 PetscFunctionReturn(PETSC_SUCCESS); 1582 #else 1583 return PETSC_SUCCESS; 1584 #endif 1585 } 1586 1587 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1588 { 1589 #if defined(PETSC_HAVE_DEVICE) 1590 1591 PetscFunctionBegin; 1592 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS); 1593 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1594 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1595 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1596 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1597 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1598 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1599 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1600 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1601 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1602 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1603 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1604 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1605 PetscFunctionReturn(PETSC_SUCCESS); 1606 #else 1607 return PETSC_SUCCESS; 1608 #endif 1609 } 1610 1611 PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1612 { 1613 FILE *fd; 1614 PetscLogDouble zero = 0.0; 1615 PetscStageLog stageLog; 1616 PetscStageInfo *stageInfo = NULL; 1617 PetscEventPerfInfo *eventInfo = NULL; 1618 PetscClassPerfInfo *classInfo; 1619 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1620 const char *name; 1621 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1622 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1623 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1624 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1625 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1626 PetscLogDouble min, max, tot, ratio, avg, x, y; 1627 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1628 #if defined(PETSC_HAVE_DEVICE) 1629 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1630 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1631 #endif 1632 PetscMPIInt minC, maxC; 1633 PetscMPIInt size, rank; 1634 PetscBool *localStageUsed, *stageUsed; 1635 PetscBool *localStageVisible, *stageVisible; 1636 int numStages, localNumEvents, numEvents; 1637 int stage, oclass; 1638 PetscLogEvent event; 1639 char version[256]; 1640 MPI_Comm comm; 1641 #if defined(PETSC_HAVE_DEVICE) 1642 PetscLogEvent eventid; 1643 PetscInt64 nas = 0x7FF0000000000002; 1644 #endif 1645 1646 PetscFunctionBegin; 1647 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1648 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1649 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1650 PetscCallMPI(MPI_Comm_size(comm, &size)); 1651 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1652 /* Get the total elapsed time */ 1653 PetscCall(PetscTime(&locTotalTime)); 1654 locTotalTime -= petsc_BaseTime; 1655 1656 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1657 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1658 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1659 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1660 PetscCall(PetscLogViewWarnSync(comm, fd)); 1661 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1662 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1663 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1664 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1665 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1666 PetscCall(PetscGetUserName(username, sizeof(username))); 1667 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1668 PetscCall(PetscGetDate(date, sizeof(date))); 1669 PetscCall(PetscGetVersion(version, sizeof(version))); 1670 if (size == 1) { 1671 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1672 } else { 1673 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1674 } 1675 #if defined(PETSC_HAVE_OPENMP) 1676 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1677 #endif 1678 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1679 1680 /* Must preserve reduction count before we go on */ 1681 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1682 1683 /* Calculate summary information */ 1684 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1685 /* Time */ 1686 PetscCall(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1687 PetscCall(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1688 PetscCall(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1689 avg = tot / ((PetscLogDouble)size); 1690 if (min != 0.0) ratio = max / min; 1691 else ratio = 0.0; 1692 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1693 TotalTime = tot; 1694 /* Objects */ 1695 avg = (PetscLogDouble)petsc_numObjects; 1696 PetscCall(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1697 PetscCall(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1698 PetscCall(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1699 avg = tot / ((PetscLogDouble)size); 1700 if (min != 0.0) ratio = max / min; 1701 else ratio = 0.0; 1702 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1703 /* Flops */ 1704 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1705 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1706 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1707 avg = tot / ((PetscLogDouble)size); 1708 if (min != 0.0) ratio = max / min; 1709 else ratio = 0.0; 1710 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1711 TotalFlops = tot; 1712 /* Flops/sec -- Must talk to Barry here */ 1713 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1714 else flops = 0.0; 1715 PetscCall(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1716 PetscCall(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1717 PetscCall(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1718 avg = tot / ((PetscLogDouble)size); 1719 if (min != 0.0) ratio = max / min; 1720 else ratio = 0.0; 1721 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1722 /* Memory */ 1723 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1724 if (mem > 0.0) { 1725 PetscCall(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1726 PetscCall(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1727 PetscCall(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1728 avg = tot / ((PetscLogDouble)size); 1729 if (min != 0.0) ratio = max / min; 1730 else ratio = 0.0; 1731 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1732 } 1733 /* Messages */ 1734 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1735 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1736 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1737 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1738 avg = tot / ((PetscLogDouble)size); 1739 if (min != 0.0) ratio = max / min; 1740 else ratio = 0.0; 1741 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1742 numMessages = tot; 1743 /* Message Lengths */ 1744 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1745 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1746 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1747 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1748 if (numMessages != 0) avg = tot / numMessages; 1749 else avg = 0.0; 1750 if (min != 0.0) ratio = max / min; 1751 else ratio = 0.0; 1752 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1753 messageLength = tot; 1754 /* Reductions */ 1755 PetscCall(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1756 PetscCall(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1757 PetscCall(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1758 if (min != 0.0) ratio = max / min; 1759 else ratio = 0.0; 1760 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1761 numReductions = red; /* wrong because uses count from process zero */ 1762 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1763 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1764 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1765 1766 /* Get total number of stages -- 1767 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1768 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1769 This seems best accomplished by assoicating a communicator with each stage. 1770 */ 1771 PetscCall(PetscLogGetStageLog(&stageLog)); 1772 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1773 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1774 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1775 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1776 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1777 if (numStages > 0) { 1778 stageInfo = stageLog->stageInfo; 1779 for (stage = 0; stage < numStages; stage++) { 1780 if (stage < stageLog->numStages) { 1781 localStageUsed[stage] = stageInfo[stage].used; 1782 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1783 } else { 1784 localStageUsed[stage] = PETSC_FALSE; 1785 localStageVisible[stage] = PETSC_TRUE; 1786 } 1787 } 1788 PetscCall(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1789 PetscCall(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1790 for (stage = 0; stage < numStages; stage++) { 1791 if (stageUsed[stage]) { 1792 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1793 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1794 break; 1795 } 1796 } 1797 for (stage = 0; stage < numStages; stage++) { 1798 if (!stageUsed[stage]) continue; 1799 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1800 if (localStageUsed[stage]) { 1801 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1802 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1803 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1804 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1805 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1806 name = stageInfo[stage].name; 1807 } else { 1808 PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1809 PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1810 PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1811 PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1812 PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1813 name = ""; 1814 } 1815 mess *= 0.5; 1816 messLen *= 0.5; 1817 red /= size; 1818 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1819 else fracTime = 0.0; 1820 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1821 else fracFlops = 0.0; 1822 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1823 if (numMessages != 0.0) fracMessages = mess / numMessages; 1824 else fracMessages = 0.0; 1825 if (mess != 0.0) avgMessLen = messLen / mess; 1826 else avgMessLen = 0.0; 1827 if (messageLength != 0.0) fracLength = messLen / messageLength; 1828 else fracLength = 0.0; 1829 if (numReductions != 0.0) fracReductions = red / numReductions; 1830 else fracReductions = 0.0; 1831 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1832 } 1833 } 1834 1835 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1836 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1837 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1838 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1839 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1840 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1841 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1842 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1843 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1844 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1845 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1846 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1847 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1848 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1849 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1850 if (PetscLogMemory) { 1851 PetscCall(PetscFPrintf(comm, fd, " Memory usage is summed over all MPI processes, it is given in mega-bytes\n")); 1852 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1853 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1854 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1855 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1856 } 1857 #if defined(PETSC_HAVE_DEVICE) 1858 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1859 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1860 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1861 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1862 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1863 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1864 #endif 1865 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1866 1867 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1868 1869 /* Report events */ 1870 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1871 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1872 #if defined(PETSC_HAVE_DEVICE) 1873 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1874 #endif 1875 PetscCall(PetscFPrintf(comm, fd, "\n")); 1876 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1877 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1878 #if defined(PETSC_HAVE_DEVICE) 1879 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1880 #endif 1881 PetscCall(PetscFPrintf(comm, fd, "\n")); 1882 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1883 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1884 #if defined(PETSC_HAVE_DEVICE) 1885 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1886 #endif 1887 PetscCall(PetscFPrintf(comm, fd, "\n")); 1888 1889 #if defined(PETSC_HAVE_DEVICE) 1890 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1891 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1892 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1893 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1894 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1895 #endif 1896 1897 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1898 for (stage = 0; stage < numStages; stage++) { 1899 if (!stageVisible[stage]) continue; 1900 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1901 if (localStageUsed[stage]) { 1902 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1903 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1904 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1905 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1906 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1907 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1908 } else { 1909 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1910 PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1911 PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1912 PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1913 PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1914 PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1915 } 1916 mess *= 0.5; 1917 messLen *= 0.5; 1918 red /= size; 1919 1920 /* Get total number of events in this stage -- 1921 Currently, a single processor can register more events than another, but events must all be registered in order, 1922 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1923 on the event ID. This seems best accomplished by associating a communicator with each stage. 1924 1925 Problem: If the event did not happen on proc 1, its name will not be available. 1926 Problem: Event visibility is not implemented 1927 */ 1928 if (localStageUsed[stage]) { 1929 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1930 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1931 } else localNumEvents = 0; 1932 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1933 for (event = 0; event < numEvents; event++) { 1934 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1935 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1936 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1937 else flopr = 0.0; 1938 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1939 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1940 PetscCall(MPIU_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1941 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1942 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1943 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1944 PetscCall(MPIU_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1945 PetscCall(MPIU_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1946 PetscCall(MPIU_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1947 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1948 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1949 if (PetscLogMemory) { 1950 PetscCall(MPIU_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1951 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1952 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1953 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1954 } 1955 #if defined(PETSC_HAVE_DEVICE) 1956 PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1957 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1958 PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1959 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1960 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1961 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1962 #endif 1963 name = stageLog->eventLog->eventInfo[event].name; 1964 } else { 1965 int ierr = 0; 1966 1967 flopr = 0.0; 1968 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1969 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1970 PetscCall(MPIU_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1971 PetscCall(MPIU_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1972 PetscCall(MPIU_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1973 PetscCall(MPIU_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1974 PetscCall(MPIU_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1975 PetscCall(MPIU_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1976 PetscCall(MPIU_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1977 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1978 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1979 if (PetscLogMemory) { 1980 PetscCall(MPIU_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1981 PetscCall(MPIU_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1982 PetscCall(MPIU_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1983 PetscCall(MPIU_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1984 } 1985 #if defined(PETSC_HAVE_DEVICE) 1986 PetscCall(MPIU_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1987 PetscCall(MPIU_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1988 PetscCall(MPIU_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1989 PetscCall(MPIU_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1990 PetscCall(MPIU_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1991 PetscCall(MPIU_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1992 #endif 1993 name = ""; 1994 } 1995 if (mint < 0.0) { 1996 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 1997 mint = 0; 1998 } 1999 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 2000 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 2001 #if defined(PETSC_HAVE_DEVICE) 2002 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 2003 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 2004 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 2005 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 2006 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 2007 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 2008 } 2009 } 2010 #endif 2011 totm *= 0.5; 2012 totml *= 0.5; 2013 totr /= size; 2014 2015 if (maxC != 0) { 2016 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 2017 else ratC = 0.0; 2018 if (mint != 0.0) ratt = maxt / mint; 2019 else ratt = 0.0; 2020 if (minf != 0.0) ratf = maxf / minf; 2021 else ratf = 0.0; 2022 if (TotalTime != 0.0) fracTime = tott / TotalTime; 2023 else fracTime = 0.0; 2024 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 2025 else fracFlops = 0.0; 2026 if (stageTime != 0.0) fracStageTime = tott / stageTime; 2027 else fracStageTime = 0.0; 2028 if (flops != 0.0) fracStageFlops = totf / flops; 2029 else fracStageFlops = 0.0; 2030 if (numMessages != 0.0) fracMess = totm / numMessages; 2031 else fracMess = 0.0; 2032 if (messageLength != 0.0) fracMessLen = totml / messageLength; 2033 else fracMessLen = 0.0; 2034 if (numReductions != 0.0) fracRed = totr / numReductions; 2035 else fracRed = 0.0; 2036 if (mess != 0.0) fracStageMess = totm / mess; 2037 else fracStageMess = 0.0; 2038 if (messLen != 0.0) fracStageMessLen = totml / messLen; 2039 else fracStageMessLen = 0.0; 2040 if (red != 0.0) fracStageRed = totr / red; 2041 else fracStageRed = 0.0; 2042 if (totm != 0.0) totml /= totm; 2043 else totml = 0.0; 2044 if (maxt != 0.0) flopr = totf / maxt; 2045 else flopr = 0.0; 2046 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 2047 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 2048 else 2049 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 2050 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 2051 #if defined(PETSC_HAVE_DEVICE) 2052 if (totf != 0.0) fracgflops = gflops / totf; 2053 else fracgflops = 0.0; 2054 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 2055 else gflopr = 0.0; 2056 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 2057 #endif 2058 PetscCall(PetscFPrintf(comm, fd, "\n")); 2059 } 2060 } 2061 } 2062 2063 /* Memory usage and object creation */ 2064 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 2065 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 2066 #if defined(PETSC_HAVE_DEVICE) 2067 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 2068 #endif 2069 PetscCall(PetscFPrintf(comm, fd, "\n")); 2070 PetscCall(PetscFPrintf(comm, fd, "\n")); 2071 2072 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 2073 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 2074 stats for stages local to processor sets. 2075 */ 2076 /* We should figure out the longest object name here (now 20 characters) */ 2077 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 2078 for (stage = 0; stage < numStages; stage++) { 2079 if (localStageUsed[stage]) { 2080 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 2081 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 2082 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 2083 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 2084 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 2085 } 2086 } 2087 } else { 2088 if (!localStageVisible[stage]) continue; 2089 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 2090 } 2091 } 2092 2093 PetscCall(PetscFree(localStageUsed)); 2094 PetscCall(PetscFree(stageUsed)); 2095 PetscCall(PetscFree(localStageVisible)); 2096 PetscCall(PetscFree(stageVisible)); 2097 2098 /* Information unrelated to this particular run */ 2099 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 2100 PetscCall(PetscTime(&y)); 2101 PetscCall(PetscTime(&x)); 2102 PetscCall(PetscTime(&y)); 2103 PetscCall(PetscTime(&y)); 2104 PetscCall(PetscTime(&y)); 2105 PetscCall(PetscTime(&y)); 2106 PetscCall(PetscTime(&y)); 2107 PetscCall(PetscTime(&y)); 2108 PetscCall(PetscTime(&y)); 2109 PetscCall(PetscTime(&y)); 2110 PetscCall(PetscTime(&y)); 2111 PetscCall(PetscTime(&y)); 2112 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 2113 /* MPI information */ 2114 if (size > 1) { 2115 MPI_Status status; 2116 PetscMPIInt tag; 2117 MPI_Comm newcomm; 2118 2119 PetscCallMPI(MPI_Barrier(comm)); 2120 PetscCall(PetscTime(&x)); 2121 PetscCallMPI(MPI_Barrier(comm)); 2122 PetscCallMPI(MPI_Barrier(comm)); 2123 PetscCallMPI(MPI_Barrier(comm)); 2124 PetscCallMPI(MPI_Barrier(comm)); 2125 PetscCallMPI(MPI_Barrier(comm)); 2126 PetscCall(PetscTime(&y)); 2127 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 2128 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 2129 PetscCallMPI(MPI_Barrier(comm)); 2130 if (rank) { 2131 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 2132 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 2133 } else { 2134 PetscCall(PetscTime(&x)); 2135 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 2136 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 2137 PetscCall(PetscTime(&y)); 2138 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 2139 } 2140 PetscCall(PetscCommDestroy(&newcomm)); 2141 } 2142 PetscCall(PetscOptionsView(NULL, viewer)); 2143 2144 /* Machine and compile information */ 2145 #if defined(PETSC_USE_FORTRAN_KERNELS) 2146 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 2147 #else 2148 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 2149 #endif 2150 #if defined(PETSC_USE_64BIT_INDICES) 2151 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64-bit PetscInt\n")); 2152 #elif defined(PETSC_USE___FLOAT128) 2153 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32-bit PetscInt\n")); 2154 #endif 2155 #if defined(PETSC_USE_REAL_SINGLE) 2156 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2157 #elif defined(PETSC_USE___FLOAT128) 2158 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2159 #endif 2160 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2161 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2162 #else 2163 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2164 #endif 2165 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 2166 2167 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 2168 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2169 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2170 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2171 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2172 2173 /* Cleanup */ 2174 PetscCall(PetscFPrintf(comm, fd, "\n")); 2175 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 2176 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 2177 PetscCall(PetscFPTrapPop()); 2178 PetscFunctionReturn(PETSC_SUCCESS); 2179 } 2180 2181 /*@C 2182 PetscLogView - Prints a summary of the logging. 2183 2184 Collective over MPI_Comm 2185 2186 Input Parameter: 2187 . viewer - an ASCII viewer 2188 2189 Options Database Keys: 2190 + -log_view [:filename] - Prints summary of log information 2191 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2192 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2193 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2194 . -log_view_memory - Also display memory usage in each event 2195 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2196 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2197 - -log_trace [filename] - Displays a trace of what each process is doing 2198 2199 Level: beginner 2200 2201 Notes: 2202 It is possible to control the logging programmatically but we recommend using the options database approach whenever possible 2203 By default the summary is printed to stdout. 2204 2205 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2206 2207 If PETSc is configured with --with-logging=0 then this functionality is not available 2208 2209 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2210 directory then open filename.xml with your browser. Specific notes for certain browsers 2211 $ Firefox and Internet explorer - simply open the file 2212 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2213 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2214 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2215 your browser. 2216 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2217 window and render the XML log file contents. 2218 2219 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2220 2221 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2222 or using speedscope (https://www.speedscope.app). 2223 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2224 2225 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogDump()` 2226 @*/ 2227 PetscErrorCode PetscLogView(PetscViewer viewer) 2228 { 2229 PetscBool isascii; 2230 PetscViewerFormat format; 2231 int stage, lastStage; 2232 PetscStageLog stageLog; 2233 2234 PetscFunctionBegin; 2235 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2236 /* Pop off any stages the user forgot to remove */ 2237 lastStage = 0; 2238 PetscCall(PetscLogGetStageLog(&stageLog)); 2239 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2240 while (stage >= 0) { 2241 lastStage = stage; 2242 PetscCall(PetscStageLogPop(stageLog)); 2243 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2244 } 2245 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2246 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2247 PetscCall(PetscViewerGetFormat(viewer, &format)); 2248 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2249 PetscCall(PetscLogView_Default(viewer)); 2250 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2251 PetscCall(PetscLogView_Detailed(viewer)); 2252 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2253 PetscCall(PetscLogView_CSV(viewer)); 2254 } else if (format == PETSC_VIEWER_ASCII_XML) { 2255 PetscCall(PetscLogView_Nested(viewer)); 2256 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2257 PetscCall(PetscLogView_Flamegraph(viewer)); 2258 } 2259 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2260 PetscFunctionReturn(PETSC_SUCCESS); 2261 } 2262 2263 /*@C 2264 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2265 2266 Collective on `PETSC_COMM_WORLD` 2267 2268 Level: developer 2269 2270 .seealso: [](ch_profiling), `PetscLogView()` 2271 @*/ 2272 PetscErrorCode PetscLogViewFromOptions(void) 2273 { 2274 PetscViewer viewer; 2275 PetscBool flg; 2276 PetscViewerFormat format; 2277 2278 PetscFunctionBegin; 2279 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2280 if (flg) { 2281 PetscCall(PetscViewerPushFormat(viewer, format)); 2282 PetscCall(PetscLogView(viewer)); 2283 PetscCall(PetscViewerPopFormat(viewer)); 2284 PetscCall(PetscViewerDestroy(&viewer)); 2285 } 2286 PetscFunctionReturn(PETSC_SUCCESS); 2287 } 2288 2289 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2290 /*@C 2291 PetscGetFlops - Returns the number of flops used on this processor 2292 since the program began. 2293 2294 Not Collective 2295 2296 Output Parameter: 2297 flops - number of floating point operations 2298 2299 Level: intermediate 2300 2301 Notes: 2302 A global counter logs all PETSc flop counts. The user can use 2303 `PetscLogFlops()` to increment this counter to include flops for the 2304 application code. 2305 2306 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2307 2308 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2309 @*/ 2310 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2311 { 2312 PetscFunctionBegin; 2313 *flops = petsc_TotalFlops; 2314 PetscFunctionReturn(PETSC_SUCCESS); 2315 } 2316 2317 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2318 { 2319 size_t fullLength; 2320 va_list Argp; 2321 2322 PetscFunctionBegin; 2323 if (!petsc_logObjects) PetscFunctionReturn(PETSC_SUCCESS); 2324 va_start(Argp, format); 2325 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2326 va_end(Argp); 2327 PetscFunctionReturn(PETSC_SUCCESS); 2328 } 2329 2330 /*MC 2331 PetscLogFlops - Adds floating point operations to the global counter. 2332 2333 Synopsis: 2334 #include <petsclog.h> 2335 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2336 2337 Not Collective 2338 2339 Input Parameter: 2340 . f - flop counter 2341 2342 Usage: 2343 .vb 2344 PetscLogEvent USER_EVENT; 2345 PetscLogEventRegister("User event",0,&USER_EVENT); 2346 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2347 [code segment to monitor] 2348 PetscLogFlops(user_flops) 2349 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2350 .ve 2351 2352 Level: intermediate 2353 2354 Note: 2355 A global counter logs all PETSc flop counts. The user can use 2356 PetscLogFlops() to increment this counter to include flops for the 2357 application code. 2358 2359 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2360 M*/ 2361 2362 /*MC 2363 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2364 to get accurate timings 2365 2366 Synopsis: 2367 #include <petsclog.h> 2368 void PetscPreLoadBegin(PetscBool flag,char *name); 2369 2370 Not Collective 2371 2372 Input Parameters: 2373 + flag - `PETSC_TRUE` to run twice, `PETSC_FALSE` to run once, may be overridden 2374 with command line option -preload true or -preload false 2375 - name - name of first stage (lines of code timed separately with `-log_view`) to 2376 be preloaded 2377 2378 Usage: 2379 .vb 2380 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2381 lines of code 2382 PetscPreLoadStage("second stage"); 2383 lines of code 2384 PetscPreLoadEnd(); 2385 .ve 2386 2387 Level: intermediate 2388 2389 Note: 2390 Only works in C/C++, not Fortran 2391 2392 Flags available within the macro. 2393 + PetscPreLoadingUsed - true if we are or have done preloading 2394 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2395 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2396 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2397 The first two variables are available throughout the program, the second two only between the `PetscPreLoadBegin()` 2398 and `PetscPreLoadEnd()` 2399 2400 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2401 M*/ 2402 2403 /*MC 2404 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2405 to get accurate timings 2406 2407 Synopsis: 2408 #include <petsclog.h> 2409 void PetscPreLoadEnd(void); 2410 2411 Not Collective 2412 2413 Usage: 2414 .vb 2415 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2416 lines of code 2417 PetscPreLoadStage("second stage"); 2418 lines of code 2419 PetscPreLoadEnd(); 2420 .ve 2421 2422 Level: intermediate 2423 2424 Note: 2425 Only works in C/C++ not fortran 2426 2427 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2428 M*/ 2429 2430 /*MC 2431 PetscPreLoadStage - Start a new segment of code to be timed separately. 2432 to get accurate timings 2433 2434 Synopsis: 2435 #include <petsclog.h> 2436 void PetscPreLoadStage(char *name); 2437 2438 Not Collective 2439 2440 Usage: 2441 .vb 2442 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2443 lines of code 2444 PetscPreLoadStage("second stage"); 2445 lines of code 2446 PetscPreLoadEnd(); 2447 .ve 2448 2449 Level: intermediate 2450 2451 Note: 2452 Only works in C/C++ not fortran 2453 2454 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2455 M*/ 2456 2457 #if PetscDefined(HAVE_DEVICE) 2458 #include <petsc/private/deviceimpl.h> 2459 2460 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2461 2462 /* 2463 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2464 because it will result in timing results that cannot be interpreted. 2465 */ 2466 static PetscErrorCode PetscLogGpuTime_Off(void) 2467 { 2468 PetscLogGpuTimeFlag = PETSC_FALSE; 2469 return PETSC_SUCCESS; 2470 } 2471 2472 /*@C 2473 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2474 2475 Options Database Key: 2476 . -log_view_gpu_time - provide the GPU times in the -log_view output 2477 2478 Level: advanced 2479 2480 Notes: 2481 Turning on the timing of the 2482 GPU kernels can slow down the entire computation and should only be used when studying the performance 2483 of operations on GPU such as vector operations and matrix-vector operations. 2484 2485 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2486 2487 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2488 @*/ 2489 PetscErrorCode PetscLogGpuTime(void) 2490 { 2491 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2492 PetscLogGpuTimeFlag = PETSC_TRUE; 2493 return PETSC_SUCCESS; 2494 } 2495 2496 /*@C 2497 PetscLogGpuTimeBegin - Start timer for device 2498 2499 Level: intermediate 2500 2501 Notes: 2502 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2503 2504 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2505 2506 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()` 2507 2508 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2509 2510 The regular logging captures the time for data transfers and any CPU activities during the event 2511 2512 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2513 2514 Developer Notes: 2515 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2516 2517 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the 2518 event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution, 2519 but not continued GPU execution, until the timer event is recorded. 2520 2521 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2522 @*/ 2523 PetscErrorCode PetscLogGpuTimeBegin(void) 2524 { 2525 PetscFunctionBegin; 2526 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS); 2527 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2528 PetscDeviceContext dctx; 2529 2530 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2531 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2532 } else { 2533 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2534 } 2535 PetscFunctionReturn(PETSC_SUCCESS); 2536 } 2537 2538 /*@C 2539 PetscLogGpuTimeEnd - Stop timer for device 2540 2541 Level: intermediate 2542 2543 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2544 @*/ 2545 PetscErrorCode PetscLogGpuTimeEnd(void) 2546 { 2547 PetscFunctionBegin; 2548 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS); 2549 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2550 PetscDeviceContext dctx; 2551 PetscLogDouble elapsed; 2552 2553 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2554 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2555 petsc_gtime += (elapsed / 1000.0); 2556 } else { 2557 PetscCall(PetscTimeAdd(&petsc_gtime)); 2558 } 2559 PetscFunctionReturn(PETSC_SUCCESS); 2560 } 2561 2562 #endif /* end of PETSC_HAVE_DEVICE */ 2563 2564 #else /* end of -DPETSC_USE_LOG section */ 2565 2566 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2567 { 2568 PetscFunctionBegin; 2569 PetscFunctionReturn(PETSC_SUCCESS); 2570 } 2571 2572 #endif /* PETSC_USE_LOG*/ 2573 2574 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2575 PetscClassId PETSC_OBJECT_CLASSID = 0; 2576 2577 /*@C 2578 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2579 2580 Not Collective 2581 2582 Input Parameter: 2583 . name - The class name 2584 2585 Output Parameter: 2586 . oclass - The class id or classid 2587 2588 Level: developer 2589 2590 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2591 @*/ 2592 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) 2593 { 2594 #if defined(PETSC_USE_LOG) 2595 PetscStageLog stageLog; 2596 PetscInt stage; 2597 #endif 2598 2599 PetscFunctionBegin; 2600 *oclass = ++PETSC_LARGEST_CLASSID; 2601 #if defined(PETSC_USE_LOG) 2602 PetscCall(PetscLogGetStageLog(&stageLog)); 2603 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2604 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2605 #endif 2606 PetscFunctionReturn(PETSC_SUCCESS); 2607 } 2608 2609 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2610 #include <mpe.h> 2611 2612 PetscBool PetscBeganMPE = PETSC_FALSE; 2613 2614 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2615 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2616 2617 /*@C 2618 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2619 and slows the program down. 2620 2621 Collective over `PETSC_COMM_WORLD` 2622 2623 Options Database Key: 2624 . -log_mpe - Prints extensive log information 2625 2626 Level: advanced 2627 2628 Note: 2629 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 2630 intended for production runs since it logs only flop rates and object 2631 creation (and should not significantly slow the programs). 2632 2633 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2634 `PetscLogEventDeactivate()` 2635 @*/ 2636 PetscErrorCode PetscLogMPEBegin(void) 2637 { 2638 PetscFunctionBegin; 2639 /* Do MPE initialization */ 2640 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2641 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2642 PetscCall(MPE_Init_log()); 2643 2644 PetscBeganMPE = PETSC_TRUE; 2645 } else { 2646 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2647 } 2648 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2649 PetscFunctionReturn(PETSC_SUCCESS); 2650 } 2651 2652 /*@C 2653 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2654 2655 Collective over `PETSC_COMM_WORLD` 2656 2657 Level: advanced 2658 2659 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2660 @*/ 2661 PetscErrorCode PetscLogMPEDump(const char sname[]) 2662 { 2663 char name[PETSC_MAX_PATH_LEN]; 2664 2665 PetscFunctionBegin; 2666 if (PetscBeganMPE) { 2667 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2668 if (sname) { 2669 PetscCall(PetscStrncpy(name, sname, sizeof(name))); 2670 } else { 2671 PetscCall(PetscGetProgramName(name, sizeof(name))); 2672 } 2673 PetscCall(MPE_Finish_log(name)); 2674 } else { 2675 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2676 } 2677 PetscFunctionReturn(PETSC_SUCCESS); 2678 } 2679 2680 #define PETSC_RGB_COLORS_MAX 39 2681 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2682 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2683 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2684 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2685 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2686 2687 /*@C 2688 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2689 2690 Not collective. Maybe it should be? 2691 2692 Output Parameter: 2693 . str - character string representing the color 2694 2695 Level: developer 2696 2697 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2698 @*/ 2699 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2700 { 2701 static int idx = 0; 2702 2703 PetscFunctionBegin; 2704 *str = PetscLogMPERGBColors[idx]; 2705 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2706 PetscFunctionReturn(PETSC_SUCCESS); 2707 } 2708 2709 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2710