1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 #include <petscdevice.h> 15 #include <petsc/private/deviceimpl.h> 16 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 17 #include <../src/sys/perfstubs/timer.h> 18 #endif 19 20 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 21 22 #if defined(PETSC_USE_LOG) 23 #include <petscmachineinfo.h> 24 #include <petscconfiginfo.h> 25 26 #if defined(PETSC_HAVE_THREADSAFETY) 27 28 PetscInt petsc_log_gid = -1; /* Global threadId counter */ 29 PETSC_TLS PetscInt petsc_log_tid = -1; /* Local threadId */ 30 31 /* shared variables */ 32 PetscSpinlock PetscLogSpinLock; 33 PetscHMapEvent eventInfoMap_th = NULL; 34 35 #endif 36 37 /* used in the MPI_XXX() count macros in petsclog.h */ 38 39 /* Action and object logging variables */ 40 Action *petsc_actions = NULL; 41 Object *petsc_objects = NULL; 42 PetscBool petsc_logActions = PETSC_FALSE; 43 PetscBool petsc_logObjects = PETSC_FALSE; 44 int petsc_numActions = 0, petsc_maxActions = 100; 45 int petsc_numObjects = 0, petsc_maxObjects = 100; 46 int petsc_numObjectsDestroyed = 0; 47 48 /* Global counters */ 49 PetscLogDouble petsc_BaseTime = 0.0; 50 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 51 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 52 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 53 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 54 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 55 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 56 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 57 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 58 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 59 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 60 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 61 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 62 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 63 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 64 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 65 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 66 67 /* Thread Local storage */ 68 PETSC_TLS PetscLogDouble petsc_TotalFlops_th = 0.0; 69 PETSC_TLS PetscLogDouble petsc_send_ct_th = 0.0; 70 PETSC_TLS PetscLogDouble petsc_recv_ct_th = 0.0; 71 PETSC_TLS PetscLogDouble petsc_send_len_th = 0.0; 72 PETSC_TLS PetscLogDouble petsc_recv_len_th = 0.0; 73 PETSC_TLS PetscLogDouble petsc_isend_ct_th = 0.0; 74 PETSC_TLS PetscLogDouble petsc_irecv_ct_th = 0.0; 75 PETSC_TLS PetscLogDouble petsc_isend_len_th = 0.0; 76 PETSC_TLS PetscLogDouble petsc_irecv_len_th = 0.0; 77 PETSC_TLS PetscLogDouble petsc_wait_ct_th = 0.0; 78 PETSC_TLS PetscLogDouble petsc_wait_any_ct_th = 0.0; 79 PETSC_TLS PetscLogDouble petsc_wait_all_ct_th = 0.0; 80 PETSC_TLS PetscLogDouble petsc_sum_of_waits_ct_th = 0.0; 81 PETSC_TLS PetscLogDouble petsc_allreduce_ct_th = 0.0; 82 PETSC_TLS PetscLogDouble petsc_gather_ct_th = 0.0; 83 PETSC_TLS PetscLogDouble petsc_scatter_ct_th = 0.0; 84 85 #if defined(PETSC_HAVE_DEVICE) 86 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 87 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 88 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 89 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 90 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 91 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 92 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 93 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 94 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 95 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 96 97 PETSC_TLS PetscLogDouble petsc_ctog_ct_th = 0.0; 98 PETSC_TLS PetscLogDouble petsc_gtoc_ct_th = 0.0; 99 PETSC_TLS PetscLogDouble petsc_ctog_sz_th = 0.0; 100 PETSC_TLS PetscLogDouble petsc_gtoc_sz_th = 0.0; 101 PETSC_TLS PetscLogDouble petsc_ctog_ct_scalar_th = 0.0; 102 PETSC_TLS PetscLogDouble petsc_gtoc_ct_scalar_th = 0.0; 103 PETSC_TLS PetscLogDouble petsc_ctog_sz_scalar_th = 0.0; 104 PETSC_TLS PetscLogDouble petsc_gtoc_sz_scalar_th = 0.0; 105 PETSC_TLS PetscLogDouble petsc_gflops_th = 0.0; 106 PETSC_TLS PetscLogDouble petsc_gtime_th = 0.0; 107 #endif 108 109 #if defined(PETSC_HAVE_THREADSAFETY) 110 PetscErrorCode PetscAddLogDouble(PetscLogDouble *tot, PetscLogDouble *tot_th, PetscLogDouble tmp) 111 { 112 *tot_th += tmp; 113 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 114 *tot += tmp; 115 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 116 return PETSC_SUCCESS; 117 } 118 119 PetscErrorCode PetscAddLogDoubleCnt(PetscLogDouble *cnt, PetscLogDouble *tot, PetscLogDouble *cnt_th, PetscLogDouble *tot_th, PetscLogDouble tmp) 120 { 121 *cnt_th = *cnt_th + 1; 122 *tot_th += tmp; 123 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 124 *tot += (PetscLogDouble)(tmp); 125 *cnt += *cnt + 1; 126 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 127 return PETSC_SUCCESS; 128 } 129 130 PetscInt PetscLogGetTid(void) 131 { 132 if (petsc_log_tid < 0) { 133 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 134 petsc_log_tid = ++petsc_log_gid; 135 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 136 } 137 return petsc_log_tid; 138 } 139 140 #endif 141 142 /* Logging functions */ 143 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 144 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 145 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 146 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 147 148 /* Tracing event logging variables */ 149 FILE *petsc_tracefile = NULL; 150 int petsc_tracelevel = 0; 151 const char *petsc_traceblanks = " "; 152 char petsc_tracespace[128] = " "; 153 PetscLogDouble petsc_tracetime = 0.0; 154 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 155 156 static PetscIntStack current_log_event_stack = NULL; 157 158 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 159 { 160 int stage; 161 PetscBool opt; 162 163 PetscFunctionBegin; 164 if (PetscLogInitializeCalled) PetscFunctionReturn(PETSC_SUCCESS); 165 PetscLogInitializeCalled = PETSC_TRUE; 166 167 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 168 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 169 if (opt) petsc_logActions = PETSC_FALSE; 170 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 171 if (opt) petsc_logObjects = PETSC_FALSE; 172 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 173 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 174 PetscLogPHC = PetscLogObjCreateDefault; 175 PetscLogPHD = PetscLogObjDestroyDefault; 176 /* Setup default logging structures */ 177 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 178 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 179 180 PetscCall(PetscSpinlockCreate(&PetscLogSpinLock)); 181 #if defined(PETSC_HAVE_THREADSAFETY) 182 petsc_log_tid = 0; 183 petsc_log_gid = 0; 184 PetscCall(PetscHMapEventCreate(&eventInfoMap_th)); 185 #endif 186 187 /* All processors sync here for more consistent logging */ 188 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 189 PetscCall(PetscTime(&petsc_BaseTime)); 190 PetscCall(PetscLogStagePush(stage)); 191 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 192 PetscStackCallExternalVoid("ps_initialize_", ps_initialize_()); 193 #endif 194 PetscFunctionReturn(PETSC_SUCCESS); 195 } 196 197 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 198 { 199 PetscStageLog stageLog; 200 201 PetscFunctionBegin; 202 #if defined(PETSC_HAVE_THREADSAFETY) 203 if (eventInfoMap_th) { 204 PetscEventPerfInfo **array; 205 PetscInt n, off = 0; 206 207 PetscCall(PetscHMapEventGetSize(eventInfoMap_th, &n)); 208 PetscCall(PetscMalloc1(n, &array)); 209 PetscCall(PetscHMapEventGetVals(eventInfoMap_th, &off, array)); 210 for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i])); 211 PetscCall(PetscFree(array)); 212 PetscCall(PetscHMapEventDestroy(&eventInfoMap_th)); 213 } 214 #endif 215 PetscCall(PetscFree(petsc_actions)); 216 PetscCall(PetscFree(petsc_objects)); 217 PetscCall(PetscLogNestedEnd()); 218 PetscCall(PetscLogSet(NULL, NULL)); 219 220 /* Resetting phase */ 221 PetscCall(PetscLogGetStageLog(&stageLog)); 222 PetscCall(PetscStageLogDestroy(stageLog)); 223 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 224 current_log_event_stack = NULL; 225 226 petsc_TotalFlops = 0.0; 227 petsc_numActions = 0; 228 petsc_numObjects = 0; 229 petsc_numObjectsDestroyed = 0; 230 petsc_maxActions = 100; 231 petsc_maxObjects = 100; 232 petsc_actions = NULL; 233 petsc_objects = NULL; 234 petsc_logActions = PETSC_FALSE; 235 petsc_logObjects = PETSC_FALSE; 236 petsc_BaseTime = 0.0; 237 petsc_TotalFlops = 0.0; 238 petsc_send_ct = 0.0; 239 petsc_recv_ct = 0.0; 240 petsc_send_len = 0.0; 241 petsc_recv_len = 0.0; 242 petsc_isend_ct = 0.0; 243 petsc_irecv_ct = 0.0; 244 petsc_isend_len = 0.0; 245 petsc_irecv_len = 0.0; 246 petsc_wait_ct = 0.0; 247 petsc_wait_any_ct = 0.0; 248 petsc_wait_all_ct = 0.0; 249 petsc_sum_of_waits_ct = 0.0; 250 petsc_allreduce_ct = 0.0; 251 petsc_gather_ct = 0.0; 252 petsc_scatter_ct = 0.0; 253 petsc_TotalFlops_th = 0.0; 254 petsc_send_ct_th = 0.0; 255 petsc_recv_ct_th = 0.0; 256 petsc_send_len_th = 0.0; 257 petsc_recv_len_th = 0.0; 258 petsc_isend_ct_th = 0.0; 259 petsc_irecv_ct_th = 0.0; 260 petsc_isend_len_th = 0.0; 261 petsc_irecv_len_th = 0.0; 262 petsc_wait_ct_th = 0.0; 263 petsc_wait_any_ct_th = 0.0; 264 petsc_wait_all_ct_th = 0.0; 265 petsc_sum_of_waits_ct_th = 0.0; 266 petsc_allreduce_ct_th = 0.0; 267 petsc_gather_ct_th = 0.0; 268 petsc_scatter_ct_th = 0.0; 269 270 #if defined(PETSC_HAVE_DEVICE) 271 petsc_ctog_ct = 0.0; 272 petsc_gtoc_ct = 0.0; 273 petsc_ctog_sz = 0.0; 274 petsc_gtoc_sz = 0.0; 275 petsc_gflops = 0.0; 276 petsc_gtime = 0.0; 277 petsc_ctog_ct_th = 0.0; 278 petsc_gtoc_ct_th = 0.0; 279 petsc_ctog_sz_th = 0.0; 280 petsc_gtoc_sz_th = 0.0; 281 petsc_gflops_th = 0.0; 282 petsc_gtime_th = 0.0; 283 #endif 284 285 PETSC_LARGEST_EVENT = PETSC_EVENT; 286 PetscLogPHC = NULL; 287 PetscLogPHD = NULL; 288 petsc_tracefile = NULL; 289 petsc_tracelevel = 0; 290 petsc_traceblanks = " "; 291 petsc_tracespace[0] = ' '; 292 petsc_tracespace[1] = 0; 293 petsc_tracetime = 0.0; 294 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 295 PETSC_OBJECT_CLASSID = 0; 296 petsc_stageLog = NULL; 297 PetscLogInitializeCalled = PETSC_FALSE; 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 /*@C 302 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 303 304 Not Collective 305 306 Input Parameters: 307 + b - The function called at beginning of event 308 - e - The function called at end of event 309 310 Level: developer 311 312 Developer Note: 313 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 314 315 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 316 @*/ 317 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 318 { 319 PetscFunctionBegin; 320 PetscLogPLB = b; 321 PetscLogPLE = e; 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 /*@C 326 PetscLogIsActive - Check if logging is currently in progress. 327 328 Not Collective 329 330 Output Parameter: 331 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 332 333 Level: beginner 334 335 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 336 @*/ 337 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 338 { 339 PetscFunctionBegin; 340 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 /*@C 345 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 346 rates and object creation and should not slow programs down too much. 347 This routine may be called more than once. 348 349 Logically Collective over `PETSC_COMM_WORLD` 350 351 Options Database Key: 352 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 353 screen (for code configured with --with-log=1 (which is the default)) 354 355 Usage: 356 .vb 357 PetscInitialize(...); 358 PetscLogDefaultBegin(); 359 ... code ... 360 PetscLogView(viewer); or PetscLogDump(); 361 PetscFinalize(); 362 .ve 363 364 Level: advanced 365 366 Note: 367 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 368 the logging information. 369 370 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 371 @*/ 372 PetscErrorCode PetscLogDefaultBegin(void) 373 { 374 PetscFunctionBegin; 375 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 376 PetscFunctionReturn(PETSC_SUCCESS); 377 } 378 379 /*@C 380 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 381 all events. This creates large log files and slows the program down. 382 383 Logically Collective on `PETSC_COMM_WORLD` 384 385 Options Database Key: 386 . -log_all - Prints extensive log information 387 388 Usage: 389 .vb 390 PetscInitialize(...); 391 PetscLogAllBegin(); 392 ... code ... 393 PetscLogDump(filename); 394 PetscFinalize(); 395 .ve 396 397 Level: advanced 398 399 Note: 400 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 401 intended for production runs since it logs only flop rates and object 402 creation (and shouldn't significantly slow the programs). 403 404 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 405 @*/ 406 PetscErrorCode PetscLogAllBegin(void) 407 { 408 PetscFunctionBegin; 409 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 410 PetscFunctionReturn(PETSC_SUCCESS); 411 } 412 413 /*@C 414 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 415 begins or ends, the event name is printed. 416 417 Logically Collective on `PETSC_COMM_WORLD` 418 419 Input Parameter: 420 . file - The file to print trace in (e.g. stdout) 421 422 Options Database Key: 423 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 424 425 Level: intermediate 426 427 Notes: 428 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 429 then "Event begin:" or "Event end:" followed by the event name. 430 431 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 432 to determine where a program is hanging without running in the 433 debugger. Can be used in conjunction with the -info option. 434 435 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 436 @*/ 437 PetscErrorCode PetscLogTraceBegin(FILE *file) 438 { 439 PetscFunctionBegin; 440 petsc_tracefile = file; 441 442 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 443 PetscFunctionReturn(PETSC_SUCCESS); 444 } 445 446 /*@ 447 PetscLogActions - Determines whether actions are logged for the graphical viewer. 448 449 Not Collective 450 451 Input Parameter: 452 . flag - `PETSC_TRUE` if actions are to be logged 453 454 Options Database Key: 455 . -log_exclude_actions - Turns off actions logging 456 457 Level: intermediate 458 459 Note: 460 Logging of actions continues to consume more memory as the program 461 runs. Long running programs should consider turning this feature off. 462 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 463 @*/ 464 PetscErrorCode PetscLogActions(PetscBool flag) 465 { 466 PetscFunctionBegin; 467 petsc_logActions = flag; 468 PetscFunctionReturn(PETSC_SUCCESS); 469 } 470 471 /*@ 472 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 473 474 Not Collective 475 476 Input Parameter: 477 . flag - `PETSC_TRUE` if objects are to be logged 478 479 Options Database Key: 480 . -log_exclude_objects - Turns off objects logging 481 482 Level: intermediate 483 484 Note: 485 Logging of objects continues to consume more memory as the program 486 runs. Long running programs should consider turning this feature off. 487 488 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 489 @*/ 490 PetscErrorCode PetscLogObjects(PetscBool flag) 491 { 492 PetscFunctionBegin; 493 petsc_logObjects = flag; 494 PetscFunctionReturn(PETSC_SUCCESS); 495 } 496 497 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 498 /*@C 499 PetscLogStageRegister - Attaches a character string name to a logging stage. 500 501 Not Collective 502 503 Input Parameter: 504 . sname - The name to associate with that stage 505 506 Output Parameter: 507 . stage - The stage number 508 509 Level: intermediate 510 511 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 512 @*/ 513 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) 514 { 515 PetscStageLog stageLog; 516 PetscLogEvent event; 517 518 PetscFunctionBegin; 519 PetscCall(PetscLogGetStageLog(&stageLog)); 520 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 521 /* Copy events already changed in the main stage, this sucks */ 522 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 523 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 524 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 525 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 526 if (perfstubs_initialized == PERFSTUBS_SUCCESS) PetscStackCallExternalVoid("ps_timer_create_", stageLog->stageInfo[*stage].timer = ps_timer_create_(sname)); 527 #endif 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 /*@C 532 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 533 534 Not Collective 535 536 Input Parameter: 537 . stage - The stage on which to log 538 539 Usage: 540 If the option -log_view is used to run the program containing the 541 following code, then 2 sets of summary data will be printed during 542 PetscFinalize(). 543 .vb 544 PetscInitialize(int *argc,char ***args,0,0); 545 [stage 0 of code] 546 PetscLogStagePush(1); 547 [stage 1 of code] 548 PetscLogStagePop(); 549 PetscBarrier(...); 550 [more stage 0 of code] 551 PetscFinalize(); 552 .ve 553 554 Level: intermediate 555 556 Note: 557 Use `PetscLogStageRegister()` to register a stage. 558 559 .seealso: [](ch_profiling), `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 560 @*/ 561 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 562 { 563 PetscStageLog stageLog; 564 565 PetscFunctionBegin; 566 PetscCall(PetscLogGetStageLog(&stageLog)); 567 PetscCall(PetscStageLogPush(stageLog, stage)); 568 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 569 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stage].timer != NULL) PetscStackCallExternalVoid("ps_timer_start_", ps_timer_start_(stageLog->stageInfo[stage].timer)); 570 #endif 571 PetscFunctionReturn(PETSC_SUCCESS); 572 } 573 574 /*@C 575 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 576 577 Not Collective 578 579 Usage: 580 If the option -log_view is used to run the program containing the 581 following code, then 2 sets of summary data will be printed during 582 PetscFinalize(). 583 .vb 584 PetscInitialize(int *argc,char ***args,0,0); 585 [stage 0 of code] 586 PetscLogStagePush(1); 587 [stage 1 of code] 588 PetscLogStagePop(); 589 PetscBarrier(...); 590 [more stage 0 of code] 591 PetscFinalize(); 592 .ve 593 594 Level: intermediate 595 596 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 597 @*/ 598 PetscErrorCode PetscLogStagePop(void) 599 { 600 PetscStageLog stageLog; 601 602 PetscFunctionBegin; 603 PetscCall(PetscLogGetStageLog(&stageLog)); 604 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 605 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stageLog->curStage].timer != NULL) PetscStackCallExternalVoid("ps_timer_stop_", ps_timer_stop_(stageLog->stageInfo[stageLog->curStage].timer)); 606 #endif 607 PetscCall(PetscStageLogPop(stageLog)); 608 PetscFunctionReturn(PETSC_SUCCESS); 609 } 610 611 /*@ 612 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 613 614 Not Collective 615 616 Input Parameters: 617 + stage - The stage 618 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 619 620 Level: intermediate 621 622 Note: 623 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 624 625 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 626 @*/ 627 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 628 { 629 PetscStageLog stageLog; 630 631 PetscFunctionBegin; 632 PetscCall(PetscLogGetStageLog(&stageLog)); 633 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 634 PetscFunctionReturn(PETSC_SUCCESS); 635 } 636 637 /*@ 638 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 639 640 Not Collective 641 642 Input Parameter: 643 . stage - The stage 644 645 Output Parameter: 646 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 647 648 Level: intermediate 649 650 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 651 @*/ 652 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 653 { 654 PetscStageLog stageLog; 655 656 PetscFunctionBegin; 657 PetscCall(PetscLogGetStageLog(&stageLog)); 658 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 659 PetscFunctionReturn(PETSC_SUCCESS); 660 } 661 662 /*@ 663 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 664 665 Not Collective 666 667 Input Parameters: 668 + stage - The stage 669 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 670 671 Level: intermediate 672 673 Developer Note: 674 What does visible mean, needs to be documented. 675 676 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 677 @*/ 678 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 679 { 680 PetscStageLog stageLog; 681 682 PetscFunctionBegin; 683 PetscCall(PetscLogGetStageLog(&stageLog)); 684 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 685 PetscFunctionReturn(PETSC_SUCCESS); 686 } 687 688 /*@ 689 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 690 691 Not Collective 692 693 Input Parameter: 694 . stage - The stage 695 696 Output Parameter: 697 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 698 699 Level: intermediate 700 701 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 702 @*/ 703 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 704 { 705 PetscStageLog stageLog; 706 707 PetscFunctionBegin; 708 PetscCall(PetscLogGetStageLog(&stageLog)); 709 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 710 PetscFunctionReturn(PETSC_SUCCESS); 711 } 712 713 /*@C 714 PetscLogStageGetId - Returns the stage id when given the stage name. 715 716 Not Collective 717 718 Input Parameter: 719 . name - The stage name 720 721 Output Parameter: 722 . stage - The stage, , or -1 if no stage with that name exists 723 724 Level: intermediate 725 726 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 727 @*/ 728 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 729 { 730 PetscStageLog stageLog; 731 732 PetscFunctionBegin; 733 PetscCall(PetscLogGetStageLog(&stageLog)); 734 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 735 PetscFunctionReturn(PETSC_SUCCESS); 736 } 737 738 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 739 740 /*@C 741 PetscLogEventRegister - Registers an event name for logging operations 742 743 Not Collective 744 745 Input Parameters: 746 + name - The name associated with the event 747 - classid - The classid associated to the class for this event, obtain either with 748 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 749 are only available in C code 750 751 Output Parameter: 752 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 753 754 Example of Usage: 755 .vb 756 PetscLogEvent USER_EVENT; 757 PetscClassId classid; 758 PetscLogDouble user_event_flops; 759 PetscClassIdRegister("class name",&classid); 760 PetscLogEventRegister("User event name",classid,&USER_EVENT); 761 PetscLogEventBegin(USER_EVENT,0,0,0,0); 762 [code segment to monitor] 763 PetscLogFlops(user_event_flops); 764 PetscLogEventEnd(USER_EVENT,0,0,0,0); 765 .ve 766 767 Level: intermediate 768 769 Notes: 770 PETSc automatically logs library events if the code has been 771 configured with --with-log (which is the default) and 772 -log_view or -log_all is specified. `PetscLogEventRegister()` is 773 intended for logging user events to supplement this PETSc 774 information. 775 776 PETSc can gather data for use with the utilities Jumpshot 777 (part of the MPICH distribution). If PETSc has been compiled 778 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 779 MPICH), the user can employ another command line option, -log_mpe, 780 to create a logfile, "mpe.log", which can be visualized 781 Jumpshot. 782 783 The classid is associated with each event so that classes of events 784 can be disabled simultaneously, such as all matrix events. The user 785 can either use an existing classid, such as `MAT_CLASSID`, or create 786 their own as shown in the example. 787 788 If an existing event with the same name exists, its event handle is 789 returned instead of creating a new event. 790 791 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 792 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 793 @*/ 794 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) 795 { 796 PetscStageLog stageLog; 797 int stage; 798 799 PetscFunctionBegin; 800 *event = PETSC_DECIDE; 801 PetscCall(PetscLogGetStageLog(&stageLog)); 802 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 803 if (*event > 0) PetscFunctionReturn(PETSC_SUCCESS); 804 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 805 for (stage = 0; stage < stageLog->numStages; stage++) { 806 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 807 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 808 } 809 PetscFunctionReturn(PETSC_SUCCESS); 810 } 811 812 /*@ 813 PetscLogEventSetCollective - Indicates that a particular event is collective. 814 815 Not Collective 816 817 Input Parameters: 818 + event - The event id 819 - collective - Boolean flag indicating whether a particular event is collective 820 821 Level: developer 822 823 Notes: 824 New events returned from `PetscLogEventRegister()` are collective by default. 825 826 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 827 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 828 to be performed. This option is useful to debug imbalance within the computations or communications 829 830 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 831 @*/ 832 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) 833 { 834 PetscStageLog stageLog; 835 PetscEventRegLog eventRegLog; 836 837 PetscFunctionBegin; 838 PetscCall(PetscLogGetStageLog(&stageLog)); 839 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 840 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 841 eventRegLog->eventInfo[event].collective = collective; 842 PetscFunctionReturn(PETSC_SUCCESS); 843 } 844 845 /*@ 846 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 847 848 Not Collective 849 850 Input Parameter: 851 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 852 853 Level: developer 854 855 .seealso: [](ch_profiling), `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 856 @*/ 857 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 858 { 859 PetscStageLog stageLog; 860 int stage; 861 862 PetscFunctionBegin; 863 PetscCall(PetscLogGetStageLog(&stageLog)); 864 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 865 PetscFunctionReturn(PETSC_SUCCESS); 866 } 867 868 /*@ 869 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 870 871 Not Collective 872 873 Input Parameter: 874 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 875 876 Level: developer 877 878 Note: 879 If a class is excluded then events associated with that class are not logged. 880 881 .seealso: [](ch_profiling), `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 882 @*/ 883 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 884 { 885 PetscStageLog stageLog; 886 int stage; 887 888 PetscFunctionBegin; 889 PetscCall(PetscLogGetStageLog(&stageLog)); 890 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 891 PetscFunctionReturn(PETSC_SUCCESS); 892 } 893 894 /*@ 895 PetscLogEventActivate - Indicates that a particular event should be logged. 896 897 Not Collective 898 899 Input Parameter: 900 . event - The event id 901 902 Usage: 903 .vb 904 PetscLogEventDeactivate(VEC_SetValues); 905 [code where you do not want to log VecSetValues()] 906 PetscLogEventActivate(VEC_SetValues); 907 [code where you do want to log VecSetValues()] 908 .ve 909 910 Level: advanced 911 912 Note: 913 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 914 or an event number obtained with `PetscLogEventRegister()`. 915 916 .seealso: [](ch_profiling), `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 917 @*/ 918 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 919 { 920 PetscStageLog stageLog; 921 int stage; 922 923 PetscFunctionBegin; 924 PetscCall(PetscLogGetStageLog(&stageLog)); 925 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 926 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } 929 930 /*@ 931 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 932 933 Not Collective 934 935 Input Parameter: 936 . event - The event id 937 938 Usage: 939 .vb 940 PetscLogEventDeactivate(VEC_SetValues); 941 [code where you do not want to log VecSetValues()] 942 PetscLogEventActivate(VEC_SetValues); 943 [code where you do want to log VecSetValues()] 944 .ve 945 946 Level: advanced 947 948 Note: 949 The event may be either a pre-defined PETSc event (found in 950 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 951 952 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 953 @*/ 954 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 955 { 956 PetscStageLog stageLog; 957 int stage; 958 959 PetscFunctionBegin; 960 PetscCall(PetscLogGetStageLog(&stageLog)); 961 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 962 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 963 PetscFunctionReturn(PETSC_SUCCESS); 964 } 965 966 /*@ 967 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 968 969 Not Collective 970 971 Input Parameter: 972 . event - The event id 973 974 Usage: 975 .vb 976 PetscLogEventDeactivatePush(VEC_SetValues); 977 [code where you do not want to log VecSetValues()] 978 PetscLogEventDeactivatePop(VEC_SetValues); 979 [code where you do want to log VecSetValues()] 980 .ve 981 982 Level: advanced 983 984 Note: 985 The event may be either a pre-defined PETSc event (found in 986 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 987 988 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 989 @*/ 990 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 991 { 992 PetscStageLog stageLog; 993 int stage; 994 995 PetscFunctionBegin; 996 PetscCall(PetscLogGetStageLog(&stageLog)); 997 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 998 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 999 PetscFunctionReturn(PETSC_SUCCESS); 1000 } 1001 1002 /*@ 1003 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 1004 1005 Not Collective 1006 1007 Input Parameter: 1008 . event - The event id 1009 1010 Usage: 1011 .vb 1012 PetscLogEventDeactivatePush(VEC_SetValues); 1013 [code where you do not want to log VecSetValues()] 1014 PetscLogEventDeactivatePop(VEC_SetValues); 1015 [code where you do want to log VecSetValues()] 1016 .ve 1017 1018 Level: advanced 1019 1020 Note: 1021 The event may be either a pre-defined PETSc event (found in 1022 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 1023 1024 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 1025 @*/ 1026 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 1027 { 1028 PetscStageLog stageLog; 1029 int stage; 1030 1031 PetscFunctionBegin; 1032 PetscCall(PetscLogGetStageLog(&stageLog)); 1033 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1034 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } 1037 1038 /*@ 1039 PetscLogEventSetActiveAll - Turns on logging of all events 1040 1041 Not Collective 1042 1043 Input Parameters: 1044 + event - The event id 1045 - isActive - The activity flag determining whether the event is logged 1046 1047 Level: advanced 1048 1049 .seealso: [](ch_profiling), `PlogEventActivate()`, `PlogEventDeactivate()` 1050 @*/ 1051 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 1052 { 1053 PetscStageLog stageLog; 1054 int stage; 1055 1056 PetscFunctionBegin; 1057 PetscCall(PetscLogGetStageLog(&stageLog)); 1058 for (stage = 0; stage < stageLog->numStages; stage++) { 1059 if (isActive) { 1060 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 1061 } else { 1062 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 1063 } 1064 } 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 /*@ 1069 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 1070 1071 Not Collective 1072 1073 Input Parameter: 1074 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 1075 1076 Level: developer 1077 1078 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1079 @*/ 1080 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 1081 { 1082 PetscStageLog stageLog; 1083 int stage; 1084 1085 PetscFunctionBegin; 1086 PetscCall(PetscLogGetStageLog(&stageLog)); 1087 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1088 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1089 PetscFunctionReturn(PETSC_SUCCESS); 1090 } 1091 1092 /*@ 1093 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 1094 1095 Not Collective 1096 1097 Input Parameter: 1098 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 1099 1100 Level: developer 1101 1102 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1103 @*/ 1104 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 1105 { 1106 PetscStageLog stageLog; 1107 int stage; 1108 1109 PetscFunctionBegin; 1110 PetscCall(PetscLogGetStageLog(&stageLog)); 1111 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1112 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1113 PetscFunctionReturn(PETSC_SUCCESS); 1114 } 1115 1116 /*MC 1117 PetscLogEventSync - Synchronizes the beginning of a user event. 1118 1119 Synopsis: 1120 #include <petsclog.h> 1121 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 1122 1123 Collective 1124 1125 Input Parameters: 1126 + e - integer associated with the event obtained from PetscLogEventRegister() 1127 - comm - an MPI communicator 1128 1129 Usage: 1130 .vb 1131 PetscLogEvent USER_EVENT; 1132 PetscLogEventRegister("User event",0,&USER_EVENT); 1133 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1134 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1135 [code segment to monitor] 1136 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1137 .ve 1138 1139 Level: developer 1140 1141 Note: 1142 This routine should be called only if there is not a 1143 `PetscObject` available to pass to `PetscLogEventBegin()`. 1144 1145 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1146 M*/ 1147 1148 /*MC 1149 PetscLogEventBegin - Logs the beginning of a user event. 1150 1151 Synopsis: 1152 #include <petsclog.h> 1153 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1154 1155 Not Collective 1156 1157 Input Parameters: 1158 + e - integer associated with the event obtained from PetscLogEventRegister() 1159 - o1,o2,o3,o4 - objects associated with the event, or 0 1160 1161 Fortran Synopsis: 1162 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1163 1164 Usage: 1165 .vb 1166 PetscLogEvent USER_EVENT; 1167 PetscLogDouble user_event_flops; 1168 PetscLogEventRegister("User event",0,&USER_EVENT); 1169 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1170 [code segment to monitor] 1171 PetscLogFlops(user_event_flops); 1172 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1173 .ve 1174 1175 Level: intermediate 1176 1177 Developer Note: 1178 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the 1179 errors that occur in the macro directly because other packages that use this macros have used them in their 1180 own functions or methods that do not return error codes and it would be disruptive to change the current 1181 behavior. 1182 1183 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1184 M*/ 1185 1186 /*MC 1187 PetscLogEventEnd - Log the end of a user event. 1188 1189 Synopsis: 1190 #include <petsclog.h> 1191 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1192 1193 Not Collective 1194 1195 Input Parameters: 1196 + e - integer associated with the event obtained with PetscLogEventRegister() 1197 - o1,o2,o3,o4 - objects associated with the event, or 0 1198 1199 Fortran Synopsis: 1200 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1201 1202 Usage: 1203 .vb 1204 PetscLogEvent USER_EVENT; 1205 PetscLogDouble user_event_flops; 1206 PetscLogEventRegister("User event",0,&USER_EVENT,); 1207 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1208 [code segment to monitor] 1209 PetscLogFlops(user_event_flops); 1210 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1211 .ve 1212 1213 Level: intermediate 1214 1215 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1216 M*/ 1217 1218 /*@C 1219 PetscLogEventGetId - Returns the event id when given the event name. 1220 1221 Not Collective 1222 1223 Input Parameter: 1224 . name - The event name 1225 1226 Output Parameter: 1227 . event - The event, or -1 if no event with that name exists 1228 1229 Level: intermediate 1230 1231 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1232 @*/ 1233 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1234 { 1235 PetscStageLog stageLog; 1236 1237 PetscFunctionBegin; 1238 PetscCall(PetscLogGetStageLog(&stageLog)); 1239 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1240 PetscFunctionReturn(PETSC_SUCCESS); 1241 } 1242 1243 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) 1244 { 1245 PetscFunctionBegin; 1246 if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1247 PetscFunctionReturn(PETSC_SUCCESS); 1248 } 1249 1250 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) 1251 { 1252 PetscFunctionBegin; 1253 if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1254 PetscFunctionReturn(PETSC_SUCCESS); 1255 } 1256 1257 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) 1258 { 1259 PetscBool empty; 1260 1261 PetscFunctionBegin; 1262 PetscValidIntPointer(event, 1); 1263 *event = PETSC_DECIDE; 1264 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1265 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1266 PetscFunctionReturn(PETSC_SUCCESS); 1267 } 1268 1269 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) 1270 { 1271 PetscFunctionBegin; 1272 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1273 PetscFunctionReturn(PETSC_SUCCESS); 1274 } 1275 1276 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) 1277 { 1278 PetscStageLog stageLog; 1279 PetscEventPerfLog eventLog; 1280 int stage; 1281 1282 PetscFunctionBegin; 1283 if (event == PETSC_DECIDE) PetscFunctionReturn(PETSC_SUCCESS); 1284 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1285 PetscCall(PetscLogGetStageLog(&stageLog)); 1286 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1287 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1288 eventLog->eventInfo[event].count--; 1289 PetscFunctionReturn(PETSC_SUCCESS); 1290 } 1291 1292 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1293 /*@C 1294 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1295 be read by bin/petscview. This program no longer exists. 1296 1297 Collective on `PETSC_COMM_WORLD` 1298 1299 Input Parameter: 1300 . name - an optional file name 1301 1302 Usage: 1303 .vb 1304 PetscInitialize(...); 1305 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1306 ... code ... 1307 PetscLogDump(filename); 1308 PetscFinalize(); 1309 .ve 1310 1311 Level: advanced 1312 1313 Note: 1314 The default file name is Log.<rank> where <rank> is the MPI process rank. If no name is specified, 1315 this file will be used. 1316 1317 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1318 @*/ 1319 PetscErrorCode PetscLogDump(const char sname[]) 1320 { 1321 PetscStageLog stageLog; 1322 PetscEventPerfInfo *eventInfo; 1323 FILE *fd; 1324 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1325 PetscLogDouble flops, _TotalTime; 1326 PetscMPIInt rank; 1327 int action, object, curStage; 1328 PetscLogEvent event; 1329 1330 PetscFunctionBegin; 1331 /* Calculate the total elapsed time */ 1332 PetscCall(PetscTime(&_TotalTime)); 1333 _TotalTime -= petsc_BaseTime; 1334 /* Open log file */ 1335 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1336 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 1337 PetscCall(PetscFixFilename(file, fname)); 1338 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1339 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1340 /* Output totals */ 1341 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1342 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1343 /* Output actions */ 1344 if (petsc_logActions) { 1345 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1346 for (action = 0; action < petsc_numActions; action++) { 1347 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1348 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1349 } 1350 } 1351 /* Output objects */ 1352 if (petsc_logObjects) { 1353 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1354 for (object = 0; object < petsc_numObjects; object++) { 1355 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1356 if (!petsc_objects[object].name[0]) { 1357 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1358 } else { 1359 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1360 } 1361 if (petsc_objects[object].info[0] != 0) { 1362 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1363 } else { 1364 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1365 } 1366 } 1367 } 1368 /* Output events */ 1369 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1370 PetscCall(PetscLogGetStageLog(&stageLog)); 1371 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1372 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1373 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1374 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1375 else flops = 0.0; 1376 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1377 } 1378 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1379 PetscFunctionReturn(PETSC_SUCCESS); 1380 } 1381 1382 /* 1383 PetscLogView_Detailed - Each process prints the times for its own events 1384 1385 */ 1386 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1387 { 1388 PetscStageLog stageLog; 1389 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1390 PetscLogDouble locTotalTime, numRed, maxMem; 1391 int numStages, numEvents, stage, event; 1392 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1393 PetscMPIInt rank, size; 1394 1395 PetscFunctionBegin; 1396 PetscCallMPI(MPI_Comm_size(comm, &size)); 1397 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1398 /* Must preserve reduction count before we go on */ 1399 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1400 /* Get the total elapsed time */ 1401 PetscCall(PetscTime(&locTotalTime)); 1402 locTotalTime -= petsc_BaseTime; 1403 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1404 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1405 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1406 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1407 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1408 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1409 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1410 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1411 PetscCall(PetscLogGetStageLog(&stageLog)); 1412 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1413 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1414 for (stage = 0; stage < numStages; stage++) { 1415 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1416 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1417 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1418 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1419 } 1420 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1421 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1422 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1423 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1424 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1425 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1426 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1427 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1428 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1429 PetscCall(PetscViewerFlush(viewer)); 1430 for (stage = 0; stage < numStages; stage++) { 1431 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1432 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1433 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1434 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1435 for (event = 0; event < numEvents; event++) { 1436 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1437 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1438 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1439 eventInfo->flops)); 1440 if (eventInfo->dof[0] >= 0.) { 1441 PetscInt d, e; 1442 1443 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1444 for (d = 0; d < 8; ++d) { 1445 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1446 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1447 } 1448 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1449 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1450 for (e = 0; e < 8; ++e) { 1451 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1452 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1453 } 1454 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1455 } 1456 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1457 } 1458 } 1459 PetscCall(PetscViewerFlush(viewer)); 1460 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1461 PetscFunctionReturn(PETSC_SUCCESS); 1462 } 1463 1464 /* 1465 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1466 */ 1467 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1468 { 1469 PetscStageLog stageLog; 1470 PetscEventPerfInfo *eventInfo = NULL; 1471 PetscLogDouble locTotalTime, maxMem; 1472 int numStages, numEvents, stage, event; 1473 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1474 PetscMPIInt rank, size; 1475 1476 PetscFunctionBegin; 1477 PetscCallMPI(MPI_Comm_size(comm, &size)); 1478 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1479 /* Must preserve reduction count before we go on */ 1480 /* Get the total elapsed time */ 1481 PetscCall(PetscTime(&locTotalTime)); 1482 locTotalTime -= petsc_BaseTime; 1483 PetscCall(PetscLogGetStageLog(&stageLog)); 1484 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1485 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1486 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1487 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1488 PetscCall(PetscViewerFlush(viewer)); 1489 for (stage = 0; stage < numStages; stage++) { 1490 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1491 1492 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1493 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1494 for (event = 0; event < numEvents; event++) { 1495 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1496 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1497 eventInfo->numReductions, eventInfo->flops)); 1498 if (eventInfo->dof[0] >= 0.) { 1499 PetscInt d, e; 1500 1501 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1502 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1503 } 1504 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1505 } 1506 } 1507 PetscCall(PetscViewerFlush(viewer)); 1508 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1509 PetscFunctionReturn(PETSC_SUCCESS); 1510 } 1511 1512 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1513 { 1514 PetscFunctionBegin; 1515 if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS); 1516 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1517 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1518 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1519 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1520 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1521 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1522 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1523 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1524 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1525 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1526 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1527 PetscFunctionReturn(PETSC_SUCCESS); 1528 } 1529 1530 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1531 { 1532 PetscFunctionBegin; 1533 if (PetscDefined(USE_DEBUG)) { 1534 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1535 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1536 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1537 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1538 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1539 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1540 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1541 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1542 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1543 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1544 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1545 } 1546 PetscFunctionReturn(PETSC_SUCCESS); 1547 } 1548 1549 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1550 { 1551 #if defined(PETSC_HAVE_DEVICE) 1552 PetscMPIInt size; 1553 PetscBool deviceInitialized = PETSC_FALSE; 1554 1555 PetscFunctionBegin; 1556 PetscCallMPI(MPI_Comm_size(comm, &size)); 1557 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1558 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1559 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1560 deviceInitialized = PETSC_TRUE; 1561 break; 1562 } 1563 } 1564 /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1565 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS); 1566 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1567 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1568 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1569 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1570 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1571 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1572 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1573 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1574 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1575 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1576 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1577 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1578 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1579 PetscFunctionReturn(PETSC_SUCCESS); 1580 #else 1581 return PETSC_SUCCESS; 1582 #endif 1583 } 1584 1585 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1586 { 1587 #if defined(PETSC_HAVE_DEVICE) 1588 1589 PetscFunctionBegin; 1590 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS); 1591 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1592 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1593 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1594 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1595 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1596 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1597 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1598 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1599 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1600 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1601 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1602 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1603 PetscFunctionReturn(PETSC_SUCCESS); 1604 #else 1605 return PETSC_SUCCESS; 1606 #endif 1607 } 1608 1609 PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1610 { 1611 FILE *fd; 1612 PetscLogDouble zero = 0.0; 1613 PetscStageLog stageLog; 1614 PetscStageInfo *stageInfo = NULL; 1615 PetscEventPerfInfo *eventInfo = NULL; 1616 PetscClassPerfInfo *classInfo; 1617 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1618 const char *name; 1619 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1620 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1621 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1622 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1623 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1624 PetscLogDouble min, max, tot, ratio, avg, x, y; 1625 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1626 #if defined(PETSC_HAVE_DEVICE) 1627 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1628 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1629 #endif 1630 PetscMPIInt minC, maxC; 1631 PetscMPIInt size, rank; 1632 PetscBool *localStageUsed, *stageUsed; 1633 PetscBool *localStageVisible, *stageVisible; 1634 int numStages, localNumEvents, numEvents; 1635 int stage, oclass; 1636 PetscLogEvent event; 1637 char version[256]; 1638 MPI_Comm comm; 1639 #if defined(PETSC_HAVE_DEVICE) 1640 PetscLogEvent eventid; 1641 PetscInt64 nas = 0x7FF0000000000002; 1642 #endif 1643 1644 PetscFunctionBegin; 1645 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1646 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1647 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1648 PetscCallMPI(MPI_Comm_size(comm, &size)); 1649 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1650 /* Get the total elapsed time */ 1651 PetscCall(PetscTime(&locTotalTime)); 1652 locTotalTime -= petsc_BaseTime; 1653 1654 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1655 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1656 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1657 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1658 PetscCall(PetscLogViewWarnSync(comm, fd)); 1659 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1660 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1661 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1662 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1663 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1664 PetscCall(PetscGetUserName(username, sizeof(username))); 1665 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1666 PetscCall(PetscGetDate(date, sizeof(date))); 1667 PetscCall(PetscGetVersion(version, sizeof(version))); 1668 if (size == 1) { 1669 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1670 } else { 1671 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1672 } 1673 #if defined(PETSC_HAVE_OPENMP) 1674 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1675 #endif 1676 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1677 1678 /* Must preserve reduction count before we go on */ 1679 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1680 1681 /* Calculate summary information */ 1682 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1683 /* Time */ 1684 PetscCall(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1685 PetscCall(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1686 PetscCall(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1687 avg = tot / ((PetscLogDouble)size); 1688 if (min != 0.0) ratio = max / min; 1689 else ratio = 0.0; 1690 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1691 TotalTime = tot; 1692 /* Objects */ 1693 avg = (PetscLogDouble)petsc_numObjects; 1694 PetscCall(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1695 PetscCall(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1696 PetscCall(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1697 avg = tot / ((PetscLogDouble)size); 1698 if (min != 0.0) ratio = max / min; 1699 else ratio = 0.0; 1700 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1701 /* Flops */ 1702 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1703 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1704 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1705 avg = tot / ((PetscLogDouble)size); 1706 if (min != 0.0) ratio = max / min; 1707 else ratio = 0.0; 1708 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1709 TotalFlops = tot; 1710 /* Flops/sec -- Must talk to Barry here */ 1711 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1712 else flops = 0.0; 1713 PetscCall(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1714 PetscCall(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1715 PetscCall(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1716 avg = tot / ((PetscLogDouble)size); 1717 if (min != 0.0) ratio = max / min; 1718 else ratio = 0.0; 1719 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1720 /* Memory */ 1721 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1722 if (mem > 0.0) { 1723 PetscCall(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1724 PetscCall(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1725 PetscCall(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1726 avg = tot / ((PetscLogDouble)size); 1727 if (min != 0.0) ratio = max / min; 1728 else ratio = 0.0; 1729 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1730 } 1731 /* Messages */ 1732 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1733 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1734 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1735 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1736 avg = tot / ((PetscLogDouble)size); 1737 if (min != 0.0) ratio = max / min; 1738 else ratio = 0.0; 1739 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1740 numMessages = tot; 1741 /* Message Lengths */ 1742 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1743 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1744 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1745 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1746 if (numMessages != 0) avg = tot / numMessages; 1747 else avg = 0.0; 1748 if (min != 0.0) ratio = max / min; 1749 else ratio = 0.0; 1750 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1751 messageLength = tot; 1752 /* Reductions */ 1753 PetscCall(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1754 PetscCall(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1755 PetscCall(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1756 if (min != 0.0) ratio = max / min; 1757 else ratio = 0.0; 1758 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1759 numReductions = red; /* wrong because uses count from process zero */ 1760 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1761 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1762 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1763 1764 /* Get total number of stages -- 1765 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1766 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1767 This seems best accomplished by assoicating a communicator with each stage. 1768 */ 1769 PetscCall(PetscLogGetStageLog(&stageLog)); 1770 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1771 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1772 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1773 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1774 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1775 if (numStages > 0) { 1776 stageInfo = stageLog->stageInfo; 1777 for (stage = 0; stage < numStages; stage++) { 1778 if (stage < stageLog->numStages) { 1779 localStageUsed[stage] = stageInfo[stage].used; 1780 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1781 } else { 1782 localStageUsed[stage] = PETSC_FALSE; 1783 localStageVisible[stage] = PETSC_TRUE; 1784 } 1785 } 1786 PetscCall(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1787 PetscCall(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1788 for (stage = 0; stage < numStages; stage++) { 1789 if (stageUsed[stage]) { 1790 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1791 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1792 break; 1793 } 1794 } 1795 for (stage = 0; stage < numStages; stage++) { 1796 if (!stageUsed[stage]) continue; 1797 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1798 if (localStageUsed[stage]) { 1799 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1800 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1801 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1802 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1803 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1804 name = stageInfo[stage].name; 1805 } else { 1806 PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1807 PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1808 PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1809 PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1810 PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1811 name = ""; 1812 } 1813 mess *= 0.5; 1814 messLen *= 0.5; 1815 red /= size; 1816 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1817 else fracTime = 0.0; 1818 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1819 else fracFlops = 0.0; 1820 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1821 if (numMessages != 0.0) fracMessages = mess / numMessages; 1822 else fracMessages = 0.0; 1823 if (mess != 0.0) avgMessLen = messLen / mess; 1824 else avgMessLen = 0.0; 1825 if (messageLength != 0.0) fracLength = messLen / messageLength; 1826 else fracLength = 0.0; 1827 if (numReductions != 0.0) fracReductions = red / numReductions; 1828 else fracReductions = 0.0; 1829 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1830 } 1831 } 1832 1833 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1834 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1835 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1836 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1837 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1838 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1839 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1840 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1841 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1842 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1843 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1844 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1845 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1846 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1847 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1848 if (PetscLogMemory) { 1849 PetscCall(PetscFPrintf(comm, fd, " Memory usage is summed over all MPI processes, it is given in mega-bytes\n")); 1850 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1851 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1852 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1853 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1854 } 1855 #if defined(PETSC_HAVE_DEVICE) 1856 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1857 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1858 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1859 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1860 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1861 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1862 #endif 1863 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1864 1865 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1866 1867 /* Report events */ 1868 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1869 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1870 #if defined(PETSC_HAVE_DEVICE) 1871 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1872 #endif 1873 PetscCall(PetscFPrintf(comm, fd, "\n")); 1874 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1875 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1876 #if defined(PETSC_HAVE_DEVICE) 1877 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1878 #endif 1879 PetscCall(PetscFPrintf(comm, fd, "\n")); 1880 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1881 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1882 #if defined(PETSC_HAVE_DEVICE) 1883 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1884 #endif 1885 PetscCall(PetscFPrintf(comm, fd, "\n")); 1886 1887 #if defined(PETSC_HAVE_DEVICE) 1888 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1889 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1890 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1891 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1892 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1893 #endif 1894 1895 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1896 for (stage = 0; stage < numStages; stage++) { 1897 if (!stageVisible[stage]) continue; 1898 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1899 if (localStageUsed[stage]) { 1900 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1901 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1902 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1903 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1904 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1905 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1906 } else { 1907 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1908 PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1909 PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1910 PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1911 PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1912 PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1913 } 1914 mess *= 0.5; 1915 messLen *= 0.5; 1916 red /= size; 1917 1918 /* Get total number of events in this stage -- 1919 Currently, a single processor can register more events than another, but events must all be registered in order, 1920 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1921 on the event ID. This seems best accomplished by associating a communicator with each stage. 1922 1923 Problem: If the event did not happen on proc 1, its name will not be available. 1924 Problem: Event visibility is not implemented 1925 */ 1926 if (localStageUsed[stage]) { 1927 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1928 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1929 } else localNumEvents = 0; 1930 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1931 for (event = 0; event < numEvents; event++) { 1932 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1933 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1934 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1935 else flopr = 0.0; 1936 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1937 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1938 PetscCall(MPIU_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1939 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1940 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1941 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1942 PetscCall(MPIU_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1943 PetscCall(MPIU_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1944 PetscCall(MPIU_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1945 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1946 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1947 if (PetscLogMemory) { 1948 PetscCall(MPIU_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1949 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1950 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1951 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1952 } 1953 #if defined(PETSC_HAVE_DEVICE) 1954 PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1955 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1956 PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1957 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1958 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1959 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1960 #endif 1961 name = stageLog->eventLog->eventInfo[event].name; 1962 } else { 1963 int ierr = 0; 1964 1965 flopr = 0.0; 1966 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1967 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1968 PetscCall(MPIU_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1969 PetscCall(MPIU_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1970 PetscCall(MPIU_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1971 PetscCall(MPIU_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1972 PetscCall(MPIU_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1973 PetscCall(MPIU_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1974 PetscCall(MPIU_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1975 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1976 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1977 if (PetscLogMemory) { 1978 PetscCall(MPIU_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1979 PetscCall(MPIU_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1980 PetscCall(MPIU_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1981 PetscCall(MPIU_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1982 } 1983 #if defined(PETSC_HAVE_DEVICE) 1984 PetscCall(MPIU_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1985 PetscCall(MPIU_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1986 PetscCall(MPIU_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1987 PetscCall(MPIU_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1988 PetscCall(MPIU_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1989 PetscCall(MPIU_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1990 #endif 1991 name = ""; 1992 } 1993 if (mint < 0.0) { 1994 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 1995 mint = 0; 1996 } 1997 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 1998 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1999 #if defined(PETSC_HAVE_DEVICE) 2000 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 2001 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 2002 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 2003 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 2004 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 2005 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 2006 } 2007 } 2008 #endif 2009 totm *= 0.5; 2010 totml *= 0.5; 2011 totr /= size; 2012 2013 if (maxC != 0) { 2014 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 2015 else ratC = 0.0; 2016 if (mint != 0.0) ratt = maxt / mint; 2017 else ratt = 0.0; 2018 if (minf != 0.0) ratf = maxf / minf; 2019 else ratf = 0.0; 2020 if (TotalTime != 0.0) fracTime = tott / TotalTime; 2021 else fracTime = 0.0; 2022 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 2023 else fracFlops = 0.0; 2024 if (stageTime != 0.0) fracStageTime = tott / stageTime; 2025 else fracStageTime = 0.0; 2026 if (flops != 0.0) fracStageFlops = totf / flops; 2027 else fracStageFlops = 0.0; 2028 if (numMessages != 0.0) fracMess = totm / numMessages; 2029 else fracMess = 0.0; 2030 if (messageLength != 0.0) fracMessLen = totml / messageLength; 2031 else fracMessLen = 0.0; 2032 if (numReductions != 0.0) fracRed = totr / numReductions; 2033 else fracRed = 0.0; 2034 if (mess != 0.0) fracStageMess = totm / mess; 2035 else fracStageMess = 0.0; 2036 if (messLen != 0.0) fracStageMessLen = totml / messLen; 2037 else fracStageMessLen = 0.0; 2038 if (red != 0.0) fracStageRed = totr / red; 2039 else fracStageRed = 0.0; 2040 if (totm != 0.0) totml /= totm; 2041 else totml = 0.0; 2042 if (maxt != 0.0) flopr = totf / maxt; 2043 else flopr = 0.0; 2044 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 2045 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 2046 else 2047 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 2048 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 2049 #if defined(PETSC_HAVE_DEVICE) 2050 if (totf != 0.0) fracgflops = gflops / totf; 2051 else fracgflops = 0.0; 2052 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 2053 else gflopr = 0.0; 2054 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 2055 #endif 2056 PetscCall(PetscFPrintf(comm, fd, "\n")); 2057 } 2058 } 2059 } 2060 2061 /* Memory usage and object creation */ 2062 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 2063 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 2064 #if defined(PETSC_HAVE_DEVICE) 2065 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 2066 #endif 2067 PetscCall(PetscFPrintf(comm, fd, "\n")); 2068 PetscCall(PetscFPrintf(comm, fd, "\n")); 2069 2070 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 2071 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 2072 stats for stages local to processor sets. 2073 */ 2074 /* We should figure out the longest object name here (now 20 characters) */ 2075 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 2076 for (stage = 0; stage < numStages; stage++) { 2077 if (localStageUsed[stage]) { 2078 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 2079 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 2080 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 2081 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 2082 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 2083 } 2084 } 2085 } else { 2086 if (!localStageVisible[stage]) continue; 2087 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 2088 } 2089 } 2090 2091 PetscCall(PetscFree(localStageUsed)); 2092 PetscCall(PetscFree(stageUsed)); 2093 PetscCall(PetscFree(localStageVisible)); 2094 PetscCall(PetscFree(stageVisible)); 2095 2096 /* Information unrelated to this particular run */ 2097 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 2098 PetscCall(PetscTime(&y)); 2099 PetscCall(PetscTime(&x)); 2100 PetscCall(PetscTime(&y)); 2101 PetscCall(PetscTime(&y)); 2102 PetscCall(PetscTime(&y)); 2103 PetscCall(PetscTime(&y)); 2104 PetscCall(PetscTime(&y)); 2105 PetscCall(PetscTime(&y)); 2106 PetscCall(PetscTime(&y)); 2107 PetscCall(PetscTime(&y)); 2108 PetscCall(PetscTime(&y)); 2109 PetscCall(PetscTime(&y)); 2110 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 2111 /* MPI information */ 2112 if (size > 1) { 2113 MPI_Status status; 2114 PetscMPIInt tag; 2115 MPI_Comm newcomm; 2116 2117 PetscCallMPI(MPI_Barrier(comm)); 2118 PetscCall(PetscTime(&x)); 2119 PetscCallMPI(MPI_Barrier(comm)); 2120 PetscCallMPI(MPI_Barrier(comm)); 2121 PetscCallMPI(MPI_Barrier(comm)); 2122 PetscCallMPI(MPI_Barrier(comm)); 2123 PetscCallMPI(MPI_Barrier(comm)); 2124 PetscCall(PetscTime(&y)); 2125 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 2126 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 2127 PetscCallMPI(MPI_Barrier(comm)); 2128 if (rank) { 2129 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 2130 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 2131 } else { 2132 PetscCall(PetscTime(&x)); 2133 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 2134 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 2135 PetscCall(PetscTime(&y)); 2136 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 2137 } 2138 PetscCall(PetscCommDestroy(&newcomm)); 2139 } 2140 PetscCall(PetscOptionsView(NULL, viewer)); 2141 2142 /* Machine and compile information */ 2143 #if defined(PETSC_USE_FORTRAN_KERNELS) 2144 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 2145 #else 2146 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 2147 #endif 2148 #if defined(PETSC_USE_64BIT_INDICES) 2149 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64-bit PetscInt\n")); 2150 #elif defined(PETSC_USE___FLOAT128) 2151 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32-bit PetscInt\n")); 2152 #endif 2153 #if defined(PETSC_USE_REAL_SINGLE) 2154 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2155 #elif defined(PETSC_USE___FLOAT128) 2156 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2157 #endif 2158 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2159 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2160 #else 2161 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2162 #endif 2163 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 2164 2165 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 2166 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2167 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2168 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2169 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2170 2171 /* Cleanup */ 2172 PetscCall(PetscFPrintf(comm, fd, "\n")); 2173 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 2174 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 2175 PetscCall(PetscFPTrapPop()); 2176 PetscFunctionReturn(PETSC_SUCCESS); 2177 } 2178 2179 /*@C 2180 PetscLogView - Prints a summary of the logging. 2181 2182 Collective over MPI_Comm 2183 2184 Input Parameter: 2185 . viewer - an ASCII viewer 2186 2187 Options Database Keys: 2188 + -log_view [:filename] - Prints summary of log information 2189 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2190 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2191 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2192 . -log_view_memory - Also display memory usage in each event 2193 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2194 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2195 - -log_trace [filename] - Displays a trace of what each process is doing 2196 2197 Level: beginner 2198 2199 Notes: 2200 It is possible to control the logging programmatically but we recommend using the options database approach whenever possible 2201 By default the summary is printed to stdout. 2202 2203 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2204 2205 If PETSc is configured with --with-logging=0 then this functionality is not available 2206 2207 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2208 directory then open filename.xml with your browser. Specific notes for certain browsers 2209 $ Firefox and Internet explorer - simply open the file 2210 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2211 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2212 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2213 your browser. 2214 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2215 window and render the XML log file contents. 2216 2217 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2218 2219 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2220 or using speedscope (https://www.speedscope.app). 2221 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2222 2223 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogDump()` 2224 @*/ 2225 PetscErrorCode PetscLogView(PetscViewer viewer) 2226 { 2227 PetscBool isascii; 2228 PetscViewerFormat format; 2229 int stage, lastStage; 2230 PetscStageLog stageLog; 2231 2232 PetscFunctionBegin; 2233 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2234 /* Pop off any stages the user forgot to remove */ 2235 lastStage = 0; 2236 PetscCall(PetscLogGetStageLog(&stageLog)); 2237 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2238 while (stage >= 0) { 2239 lastStage = stage; 2240 PetscCall(PetscStageLogPop(stageLog)); 2241 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2242 } 2243 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2244 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2245 PetscCall(PetscViewerGetFormat(viewer, &format)); 2246 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2247 PetscCall(PetscLogView_Default(viewer)); 2248 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2249 PetscCall(PetscLogView_Detailed(viewer)); 2250 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2251 PetscCall(PetscLogView_CSV(viewer)); 2252 } else if (format == PETSC_VIEWER_ASCII_XML) { 2253 PetscCall(PetscLogView_Nested(viewer)); 2254 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2255 PetscCall(PetscLogView_Flamegraph(viewer)); 2256 } 2257 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2258 PetscFunctionReturn(PETSC_SUCCESS); 2259 } 2260 2261 /*@C 2262 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2263 2264 Collective on `PETSC_COMM_WORLD` 2265 2266 Level: developer 2267 2268 .seealso: [](ch_profiling), `PetscLogView()` 2269 @*/ 2270 PetscErrorCode PetscLogViewFromOptions(void) 2271 { 2272 PetscViewer viewer; 2273 PetscBool flg; 2274 PetscViewerFormat format; 2275 2276 PetscFunctionBegin; 2277 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2278 if (flg) { 2279 PetscCall(PetscViewerPushFormat(viewer, format)); 2280 PetscCall(PetscLogView(viewer)); 2281 PetscCall(PetscViewerPopFormat(viewer)); 2282 PetscCall(PetscViewerDestroy(&viewer)); 2283 } 2284 PetscFunctionReturn(PETSC_SUCCESS); 2285 } 2286 2287 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2288 /*@C 2289 PetscGetFlops - Returns the number of flops used on this processor 2290 since the program began. 2291 2292 Not Collective 2293 2294 Output Parameter: 2295 flops - number of floating point operations 2296 2297 Level: intermediate 2298 2299 Notes: 2300 A global counter logs all PETSc flop counts. The user can use 2301 `PetscLogFlops()` to increment this counter to include flops for the 2302 application code. 2303 2304 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2305 2306 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2307 @*/ 2308 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2309 { 2310 PetscFunctionBegin; 2311 *flops = petsc_TotalFlops; 2312 PetscFunctionReturn(PETSC_SUCCESS); 2313 } 2314 2315 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2316 { 2317 size_t fullLength; 2318 va_list Argp; 2319 2320 PetscFunctionBegin; 2321 if (!petsc_logObjects) PetscFunctionReturn(PETSC_SUCCESS); 2322 va_start(Argp, format); 2323 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2324 va_end(Argp); 2325 PetscFunctionReturn(PETSC_SUCCESS); 2326 } 2327 2328 /*MC 2329 PetscLogFlops - Adds floating point operations to the global counter. 2330 2331 Synopsis: 2332 #include <petsclog.h> 2333 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2334 2335 Not Collective 2336 2337 Input Parameter: 2338 . f - flop counter 2339 2340 Usage: 2341 .vb 2342 PetscLogEvent USER_EVENT; 2343 PetscLogEventRegister("User event",0,&USER_EVENT); 2344 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2345 [code segment to monitor] 2346 PetscLogFlops(user_flops) 2347 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2348 .ve 2349 2350 Level: intermediate 2351 2352 Note: 2353 A global counter logs all PETSc flop counts. The user can use 2354 PetscLogFlops() to increment this counter to include flops for the 2355 application code. 2356 2357 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2358 M*/ 2359 2360 /*MC 2361 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2362 to get accurate timings 2363 2364 Synopsis: 2365 #include <petsclog.h> 2366 void PetscPreLoadBegin(PetscBool flag,char *name); 2367 2368 Not Collective 2369 2370 Input Parameters: 2371 + flag - `PETSC_TRUE` to run twice, `PETSC_FALSE` to run once, may be overridden 2372 with command line option -preload true or -preload false 2373 - name - name of first stage (lines of code timed separately with `-log_view`) to 2374 be preloaded 2375 2376 Usage: 2377 .vb 2378 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2379 lines of code 2380 PetscPreLoadStage("second stage"); 2381 lines of code 2382 PetscPreLoadEnd(); 2383 .ve 2384 2385 Level: intermediate 2386 2387 Note: 2388 Only works in C/C++, not Fortran 2389 2390 Flags available within the macro. 2391 + PetscPreLoadingUsed - true if we are or have done preloading 2392 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2393 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2394 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2395 The first two variables are available throughout the program, the second two only between the `PetscPreLoadBegin()` 2396 and `PetscPreLoadEnd()` 2397 2398 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2399 M*/ 2400 2401 /*MC 2402 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2403 to get accurate timings 2404 2405 Synopsis: 2406 #include <petsclog.h> 2407 void PetscPreLoadEnd(void); 2408 2409 Not Collective 2410 2411 Usage: 2412 .vb 2413 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2414 lines of code 2415 PetscPreLoadStage("second stage"); 2416 lines of code 2417 PetscPreLoadEnd(); 2418 .ve 2419 2420 Level: intermediate 2421 2422 Note: 2423 Only works in C/C++ not fortran 2424 2425 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2426 M*/ 2427 2428 /*MC 2429 PetscPreLoadStage - Start a new segment of code to be timed separately. 2430 to get accurate timings 2431 2432 Synopsis: 2433 #include <petsclog.h> 2434 void PetscPreLoadStage(char *name); 2435 2436 Not Collective 2437 2438 Usage: 2439 .vb 2440 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2441 lines of code 2442 PetscPreLoadStage("second stage"); 2443 lines of code 2444 PetscPreLoadEnd(); 2445 .ve 2446 2447 Level: intermediate 2448 2449 Note: 2450 Only works in C/C++ not fortran 2451 2452 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2453 M*/ 2454 2455 #if PetscDefined(HAVE_DEVICE) 2456 #include <petsc/private/deviceimpl.h> 2457 2458 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2459 2460 /* 2461 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2462 because it will result in timing results that cannot be interpreted. 2463 */ 2464 static PetscErrorCode PetscLogGpuTime_Off(void) 2465 { 2466 PetscLogGpuTimeFlag = PETSC_FALSE; 2467 return PETSC_SUCCESS; 2468 } 2469 2470 /*@C 2471 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2472 2473 Options Database Key: 2474 . -log_view_gpu_time - provide the GPU times in the -log_view output 2475 2476 Level: advanced 2477 2478 Notes: 2479 Turning on the timing of the 2480 GPU kernels can slow down the entire computation and should only be used when studying the performance 2481 of operations on GPU such as vector operations and matrix-vector operations. 2482 2483 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2484 2485 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2486 @*/ 2487 PetscErrorCode PetscLogGpuTime(void) 2488 { 2489 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2490 PetscLogGpuTimeFlag = PETSC_TRUE; 2491 return PETSC_SUCCESS; 2492 } 2493 2494 /*@C 2495 PetscLogGpuTimeBegin - Start timer for device 2496 2497 Level: intermediate 2498 2499 Notes: 2500 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2501 2502 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2503 2504 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()` 2505 2506 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2507 2508 The regular logging captures the time for data transfers and any CPU activities during the event 2509 2510 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2511 2512 Developer Notes: 2513 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2514 2515 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the 2516 event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution, 2517 but not continued GPU execution, until the timer event is recorded. 2518 2519 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2520 @*/ 2521 PetscErrorCode PetscLogGpuTimeBegin(void) 2522 { 2523 PetscFunctionBegin; 2524 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS); 2525 if (PetscDefined(HAVE_DEVICE)) { 2526 PetscDeviceContext dctx; 2527 2528 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2529 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2530 } else { 2531 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2532 } 2533 PetscFunctionReturn(PETSC_SUCCESS); 2534 } 2535 2536 /*@C 2537 PetscLogGpuTimeEnd - Stop timer for device 2538 2539 Level: intermediate 2540 2541 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2542 @*/ 2543 PetscErrorCode PetscLogGpuTimeEnd(void) 2544 { 2545 PetscFunctionBegin; 2546 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS); 2547 if (PetscDefined(HAVE_DEVICE)) { 2548 PetscDeviceContext dctx; 2549 PetscLogDouble elapsed; 2550 2551 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2552 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2553 petsc_gtime += (elapsed / 1000.0); 2554 } else { 2555 PetscCall(PetscTimeAdd(&petsc_gtime)); 2556 } 2557 PetscFunctionReturn(PETSC_SUCCESS); 2558 } 2559 2560 #endif /* end of PETSC_HAVE_DEVICE */ 2561 2562 #else /* end of -DPETSC_USE_LOG section */ 2563 2564 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2565 { 2566 PetscFunctionBegin; 2567 PetscFunctionReturn(PETSC_SUCCESS); 2568 } 2569 2570 #endif /* PETSC_USE_LOG*/ 2571 2572 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2573 PetscClassId PETSC_OBJECT_CLASSID = 0; 2574 2575 /*@C 2576 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2577 2578 Not Collective 2579 2580 Input Parameter: 2581 . name - The class name 2582 2583 Output Parameter: 2584 . oclass - The class id or classid 2585 2586 Level: developer 2587 2588 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2589 @*/ 2590 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) 2591 { 2592 #if defined(PETSC_USE_LOG) 2593 PetscStageLog stageLog; 2594 PetscInt stage; 2595 #endif 2596 2597 PetscFunctionBegin; 2598 *oclass = ++PETSC_LARGEST_CLASSID; 2599 #if defined(PETSC_USE_LOG) 2600 PetscCall(PetscLogGetStageLog(&stageLog)); 2601 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2602 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2603 #endif 2604 PetscFunctionReturn(PETSC_SUCCESS); 2605 } 2606 2607 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2608 #include <mpe.h> 2609 2610 PetscBool PetscBeganMPE = PETSC_FALSE; 2611 2612 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2613 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2614 2615 /*@C 2616 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2617 and slows the program down. 2618 2619 Collective over `PETSC_COMM_WORLD` 2620 2621 Options Database Key: 2622 . -log_mpe - Prints extensive log information 2623 2624 Level: advanced 2625 2626 Note: 2627 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 2628 intended for production runs since it logs only flop rates and object 2629 creation (and should not significantly slow the programs). 2630 2631 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2632 `PetscLogEventDeactivate()` 2633 @*/ 2634 PetscErrorCode PetscLogMPEBegin(void) 2635 { 2636 PetscFunctionBegin; 2637 /* Do MPE initialization */ 2638 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2639 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2640 PetscCall(MPE_Init_log()); 2641 2642 PetscBeganMPE = PETSC_TRUE; 2643 } else { 2644 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2645 } 2646 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2647 PetscFunctionReturn(PETSC_SUCCESS); 2648 } 2649 2650 /*@C 2651 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2652 2653 Collective over `PETSC_COMM_WORLD` 2654 2655 Level: advanced 2656 2657 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2658 @*/ 2659 PetscErrorCode PetscLogMPEDump(const char sname[]) 2660 { 2661 char name[PETSC_MAX_PATH_LEN]; 2662 2663 PetscFunctionBegin; 2664 if (PetscBeganMPE) { 2665 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2666 if (sname) { 2667 PetscCall(PetscStrncpy(name, sname, sizeof(name))); 2668 } else { 2669 PetscCall(PetscGetProgramName(name, sizeof(name))); 2670 } 2671 PetscCall(MPE_Finish_log(name)); 2672 } else { 2673 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2674 } 2675 PetscFunctionReturn(PETSC_SUCCESS); 2676 } 2677 2678 #define PETSC_RGB_COLORS_MAX 39 2679 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2680 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2681 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2682 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2683 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2684 2685 /*@C 2686 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2687 2688 Not collective. Maybe it should be? 2689 2690 Output Parameter: 2691 . str - character string representing the color 2692 2693 Level: developer 2694 2695 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2696 @*/ 2697 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2698 { 2699 static int idx = 0; 2700 2701 PetscFunctionBegin; 2702 *str = PetscLogMPERGBColors[idx]; 2703 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2704 PetscFunctionReturn(PETSC_SUCCESS); 2705 } 2706 2707 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2708