1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 #include <petscdevice.h> 15 #include <petsc/private/deviceimpl.h> 16 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 17 #include <../src/sys/perfstubs/timer.h> 18 #endif 19 20 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 21 22 #if defined(PETSC_USE_LOG) 23 #include <petscmachineinfo.h> 24 #include <petscconfiginfo.h> 25 26 #if defined(PETSC_HAVE_THREADSAFETY) 27 28 PetscInt petsc_log_gid = -1; /* Global threadId counter */ 29 PETSC_TLS PetscInt petsc_log_tid = -1; /* Local threadId */ 30 31 /* shared variables */ 32 PetscSpinlock PetscLogSpinLock; 33 PetscHMapEvent eventInfoMap_th = NULL; 34 35 #endif 36 37 /* used in the MPI_XXX() count macros in petsclog.h */ 38 39 /* Action and object logging variables */ 40 Action *petsc_actions = NULL; 41 Object *petsc_objects = NULL; 42 PetscBool petsc_logActions = PETSC_FALSE; 43 PetscBool petsc_logObjects = PETSC_FALSE; 44 int petsc_numActions = 0, petsc_maxActions = 100; 45 int petsc_numObjects = 0, petsc_maxObjects = 100; 46 int petsc_numObjectsDestroyed = 0; 47 48 /* Global counters */ 49 PetscLogDouble petsc_BaseTime = 0.0; 50 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 51 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 52 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 53 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 54 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 55 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 56 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 57 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 58 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 59 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 60 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 61 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 62 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 63 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 64 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 65 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 66 67 /* Thread Local storage */ 68 PETSC_TLS PetscLogDouble petsc_TotalFlops_th = 0.0; 69 PETSC_TLS PetscLogDouble petsc_send_ct_th = 0.0; 70 PETSC_TLS PetscLogDouble petsc_recv_ct_th = 0.0; 71 PETSC_TLS PetscLogDouble petsc_send_len_th = 0.0; 72 PETSC_TLS PetscLogDouble petsc_recv_len_th = 0.0; 73 PETSC_TLS PetscLogDouble petsc_isend_ct_th = 0.0; 74 PETSC_TLS PetscLogDouble petsc_irecv_ct_th = 0.0; 75 PETSC_TLS PetscLogDouble petsc_isend_len_th = 0.0; 76 PETSC_TLS PetscLogDouble petsc_irecv_len_th = 0.0; 77 PETSC_TLS PetscLogDouble petsc_wait_ct_th = 0.0; 78 PETSC_TLS PetscLogDouble petsc_wait_any_ct_th = 0.0; 79 PETSC_TLS PetscLogDouble petsc_wait_all_ct_th = 0.0; 80 PETSC_TLS PetscLogDouble petsc_sum_of_waits_ct_th = 0.0; 81 PETSC_TLS PetscLogDouble petsc_allreduce_ct_th = 0.0; 82 PETSC_TLS PetscLogDouble petsc_gather_ct_th = 0.0; 83 PETSC_TLS PetscLogDouble petsc_scatter_ct_th = 0.0; 84 85 #if defined(PETSC_HAVE_DEVICE) 86 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 87 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 88 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 89 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 90 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 91 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 92 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 93 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 94 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 95 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 96 97 PETSC_TLS PetscLogDouble petsc_ctog_ct_th = 0.0; 98 PETSC_TLS PetscLogDouble petsc_gtoc_ct_th = 0.0; 99 PETSC_TLS PetscLogDouble petsc_ctog_sz_th = 0.0; 100 PETSC_TLS PetscLogDouble petsc_gtoc_sz_th = 0.0; 101 PETSC_TLS PetscLogDouble petsc_ctog_ct_scalar_th = 0.0; 102 PETSC_TLS PetscLogDouble petsc_gtoc_ct_scalar_th = 0.0; 103 PETSC_TLS PetscLogDouble petsc_ctog_sz_scalar_th = 0.0; 104 PETSC_TLS PetscLogDouble petsc_gtoc_sz_scalar_th = 0.0; 105 PETSC_TLS PetscLogDouble petsc_gflops_th = 0.0; 106 PETSC_TLS PetscLogDouble petsc_gtime_th = 0.0; 107 #endif 108 109 #if defined(PETSC_HAVE_THREADSAFETY) 110 PetscErrorCode PetscAddLogDouble(PetscLogDouble *tot, PetscLogDouble *tot_th, PetscLogDouble tmp) 111 { 112 *tot_th += tmp; 113 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 114 *tot += tmp; 115 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 116 return PETSC_SUCCESS; 117 } 118 119 PetscErrorCode PetscAddLogDoubleCnt(PetscLogDouble *cnt, PetscLogDouble *tot, PetscLogDouble *cnt_th, PetscLogDouble *tot_th, PetscLogDouble tmp) 120 { 121 *cnt_th = *cnt_th + 1; 122 *tot_th += tmp; 123 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 124 *tot += (PetscLogDouble)(tmp); 125 *cnt += *cnt + 1; 126 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 127 return PETSC_SUCCESS; 128 } 129 130 PetscInt PetscLogGetTid(void) 131 { 132 if (petsc_log_tid < 0) { 133 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 134 petsc_log_tid = ++petsc_log_gid; 135 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 136 } 137 return petsc_log_tid; 138 } 139 140 #endif 141 142 /* Logging functions */ 143 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 144 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 145 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 146 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 147 148 /* Tracing event logging variables */ 149 FILE *petsc_tracefile = NULL; 150 int petsc_tracelevel = 0; 151 const char *petsc_traceblanks = " "; 152 char petsc_tracespace[128] = " "; 153 PetscLogDouble petsc_tracetime = 0.0; 154 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 155 156 static PetscIntStack current_log_event_stack = NULL; 157 158 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 159 { 160 int stage; 161 PetscBool opt; 162 163 PetscFunctionBegin; 164 if (PetscLogInitializeCalled) PetscFunctionReturn(PETSC_SUCCESS); 165 PetscLogInitializeCalled = PETSC_TRUE; 166 167 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 168 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 169 if (opt) petsc_logActions = PETSC_FALSE; 170 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 171 if (opt) petsc_logObjects = PETSC_FALSE; 172 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 173 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 174 PetscLogPHC = PetscLogObjCreateDefault; 175 PetscLogPHD = PetscLogObjDestroyDefault; 176 /* Setup default logging structures */ 177 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 178 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 179 180 PetscCall(PetscSpinlockCreate(&PetscLogSpinLock)); 181 #if defined(PETSC_HAVE_THREADSAFETY) 182 petsc_log_tid = 0; 183 petsc_log_gid = 0; 184 PetscCall(PetscHMapEventCreate(&eventInfoMap_th)); 185 #endif 186 187 /* All processors sync here for more consistent logging */ 188 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 189 PetscCall(PetscTime(&petsc_BaseTime)); 190 PetscCall(PetscLogStagePush(stage)); 191 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 192 PetscStackCallExternalVoid("ps_initialize_", ps_initialize_()); 193 #endif 194 PetscFunctionReturn(PETSC_SUCCESS); 195 } 196 197 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 198 { 199 PetscStageLog stageLog; 200 201 PetscFunctionBegin; 202 #if defined(PETSC_HAVE_THREADSAFETY) 203 if (eventInfoMap_th) { 204 PetscEventPerfInfo **array; 205 PetscInt n, off = 0; 206 207 PetscCall(PetscHMapEventGetSize(eventInfoMap_th, &n)); 208 PetscCall(PetscMalloc1(n, &array)); 209 PetscCall(PetscHMapEventGetVals(eventInfoMap_th, &off, array)); 210 for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i])); 211 PetscCall(PetscFree(array)); 212 PetscCall(PetscHMapEventDestroy(&eventInfoMap_th)); 213 } 214 #endif 215 PetscCall(PetscFree(petsc_actions)); 216 PetscCall(PetscFree(petsc_objects)); 217 PetscCall(PetscLogNestedEnd()); 218 PetscCall(PetscLogSet(NULL, NULL)); 219 220 /* Resetting phase */ 221 PetscCall(PetscLogGetStageLog(&stageLog)); 222 PetscCall(PetscStageLogDestroy(stageLog)); 223 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 224 current_log_event_stack = NULL; 225 226 petsc_TotalFlops = 0.0; 227 petsc_numActions = 0; 228 petsc_numObjects = 0; 229 petsc_numObjectsDestroyed = 0; 230 petsc_maxActions = 100; 231 petsc_maxObjects = 100; 232 petsc_actions = NULL; 233 petsc_objects = NULL; 234 petsc_logActions = PETSC_FALSE; 235 petsc_logObjects = PETSC_FALSE; 236 petsc_BaseTime = 0.0; 237 petsc_TotalFlops = 0.0; 238 petsc_send_ct = 0.0; 239 petsc_recv_ct = 0.0; 240 petsc_send_len = 0.0; 241 petsc_recv_len = 0.0; 242 petsc_isend_ct = 0.0; 243 petsc_irecv_ct = 0.0; 244 petsc_isend_len = 0.0; 245 petsc_irecv_len = 0.0; 246 petsc_wait_ct = 0.0; 247 petsc_wait_any_ct = 0.0; 248 petsc_wait_all_ct = 0.0; 249 petsc_sum_of_waits_ct = 0.0; 250 petsc_allreduce_ct = 0.0; 251 petsc_gather_ct = 0.0; 252 petsc_scatter_ct = 0.0; 253 petsc_TotalFlops_th = 0.0; 254 petsc_send_ct_th = 0.0; 255 petsc_recv_ct_th = 0.0; 256 petsc_send_len_th = 0.0; 257 petsc_recv_len_th = 0.0; 258 petsc_isend_ct_th = 0.0; 259 petsc_irecv_ct_th = 0.0; 260 petsc_isend_len_th = 0.0; 261 petsc_irecv_len_th = 0.0; 262 petsc_wait_ct_th = 0.0; 263 petsc_wait_any_ct_th = 0.0; 264 petsc_wait_all_ct_th = 0.0; 265 petsc_sum_of_waits_ct_th = 0.0; 266 petsc_allreduce_ct_th = 0.0; 267 petsc_gather_ct_th = 0.0; 268 petsc_scatter_ct_th = 0.0; 269 270 #if defined(PETSC_HAVE_DEVICE) 271 petsc_ctog_ct = 0.0; 272 petsc_gtoc_ct = 0.0; 273 petsc_ctog_sz = 0.0; 274 petsc_gtoc_sz = 0.0; 275 petsc_gflops = 0.0; 276 petsc_gtime = 0.0; 277 petsc_ctog_ct_th = 0.0; 278 petsc_gtoc_ct_th = 0.0; 279 petsc_ctog_sz_th = 0.0; 280 petsc_gtoc_sz_th = 0.0; 281 petsc_gflops_th = 0.0; 282 petsc_gtime_th = 0.0; 283 #endif 284 285 PETSC_LARGEST_EVENT = PETSC_EVENT; 286 PetscLogPHC = NULL; 287 PetscLogPHD = NULL; 288 petsc_tracefile = NULL; 289 petsc_tracelevel = 0; 290 petsc_traceblanks = " "; 291 petsc_tracespace[0] = ' '; 292 petsc_tracespace[1] = 0; 293 petsc_tracetime = 0.0; 294 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 295 PETSC_OBJECT_CLASSID = 0; 296 petsc_stageLog = NULL; 297 PetscLogInitializeCalled = PETSC_FALSE; 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 /*@C 302 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 303 304 Not Collective 305 306 Input Parameters: 307 + b - The function called at beginning of event 308 - e - The function called at end of event 309 310 Level: developer 311 312 Developer Note: 313 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 314 315 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 316 @*/ 317 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 318 { 319 PetscFunctionBegin; 320 PetscLogPLB = b; 321 PetscLogPLE = e; 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 /*@C 326 PetscLogIsActive - Check if logging is currently in progress. 327 328 Not Collective 329 330 Output Parameter: 331 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 332 333 Level: beginner 334 335 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 336 @*/ 337 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 338 { 339 PetscFunctionBegin; 340 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 /*@C 345 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 346 rates and object creation and should not slow programs down too much. 347 This routine may be called more than once. 348 349 Logically Collective over `PETSC_COMM_WORLD` 350 351 Options Database Key: 352 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 353 screen (for code configured with --with-log=1 (which is the default)) 354 355 Usage: 356 .vb 357 PetscInitialize(...); 358 PetscLogDefaultBegin(); 359 ... code ... 360 PetscLogView(viewer); or PetscLogDump(); 361 PetscFinalize(); 362 .ve 363 364 Level: advanced 365 366 Note: 367 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 368 the logging information. 369 370 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 371 @*/ 372 PetscErrorCode PetscLogDefaultBegin(void) 373 { 374 PetscFunctionBegin; 375 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 376 PetscFunctionReturn(PETSC_SUCCESS); 377 } 378 379 /*@C 380 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 381 all events. This creates large log files and slows the program down. 382 383 Logically Collective on `PETSC_COMM_WORLD` 384 385 Options Database Key: 386 . -log_all - Prints extensive log information 387 388 Usage: 389 .vb 390 PetscInitialize(...); 391 PetscLogAllBegin(); 392 ... code ... 393 PetscLogDump(filename); 394 PetscFinalize(); 395 .ve 396 397 Level: advanced 398 399 Note: 400 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 401 intended for production runs since it logs only flop rates and object 402 creation (and shouldn't significantly slow the programs). 403 404 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 405 @*/ 406 PetscErrorCode PetscLogAllBegin(void) 407 { 408 PetscFunctionBegin; 409 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 410 PetscFunctionReturn(PETSC_SUCCESS); 411 } 412 413 /*@C 414 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 415 begins or ends, the event name is printed. 416 417 Logically Collective on `PETSC_COMM_WORLD` 418 419 Input Parameter: 420 . file - The file to print trace in (e.g. stdout) 421 422 Options Database Key: 423 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 424 425 Level: intermediate 426 427 Notes: 428 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 429 then "Event begin:" or "Event end:" followed by the event name. 430 431 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 432 to determine where a program is hanging without running in the 433 debugger. Can be used in conjunction with the -info option. 434 435 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 436 @*/ 437 PetscErrorCode PetscLogTraceBegin(FILE *file) 438 { 439 PetscFunctionBegin; 440 petsc_tracefile = file; 441 442 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 443 PetscFunctionReturn(PETSC_SUCCESS); 444 } 445 446 /*@ 447 PetscLogActions - Determines whether actions are logged for the graphical viewer. 448 449 Not Collective 450 451 Input Parameter: 452 . flag - `PETSC_TRUE` if actions are to be logged 453 454 Options Database Key: 455 . -log_exclude_actions - Turns off actions logging 456 457 Level: intermediate 458 459 Note: 460 Logging of actions continues to consume more memory as the program 461 runs. Long running programs should consider turning this feature off. 462 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 463 @*/ 464 PetscErrorCode PetscLogActions(PetscBool flag) 465 { 466 PetscFunctionBegin; 467 petsc_logActions = flag; 468 PetscFunctionReturn(PETSC_SUCCESS); 469 } 470 471 /*@ 472 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 473 474 Not Collective 475 476 Input Parameter: 477 . flag - `PETSC_TRUE` if objects are to be logged 478 479 Options Database Key: 480 . -log_exclude_objects - Turns off objects logging 481 482 Level: intermediate 483 484 Note: 485 Logging of objects continues to consume more memory as the program 486 runs. Long running programs should consider turning this feature off. 487 488 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 489 @*/ 490 PetscErrorCode PetscLogObjects(PetscBool flag) 491 { 492 PetscFunctionBegin; 493 petsc_logObjects = flag; 494 PetscFunctionReturn(PETSC_SUCCESS); 495 } 496 497 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 498 /*@C 499 PetscLogStageRegister - Attaches a character string name to a logging stage. 500 501 Not Collective 502 503 Input Parameter: 504 . sname - The name to associate with that stage 505 506 Output Parameter: 507 . stage - The stage number 508 509 Level: intermediate 510 511 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 512 @*/ 513 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) 514 { 515 PetscStageLog stageLog; 516 PetscLogEvent event; 517 518 PetscFunctionBegin; 519 PetscCall(PetscLogGetStageLog(&stageLog)); 520 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 521 /* Copy events already changed in the main stage, this sucks */ 522 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 523 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 524 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 525 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 526 if (perfstubs_initialized == PERFSTUBS_SUCCESS) PetscStackCallExternalVoid("ps_timer_create_", stageLog->stageInfo[*stage].timer = ps_timer_create_(sname)); 527 #endif 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 /*@C 532 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 533 534 Not Collective 535 536 Input Parameter: 537 . stage - The stage on which to log 538 539 Usage: 540 If the option -log_view is used to run the program containing the 541 following code, then 2 sets of summary data will be printed during 542 PetscFinalize(). 543 .vb 544 PetscInitialize(int *argc,char ***args,0,0); 545 [stage 0 of code] 546 PetscLogStagePush(1); 547 [stage 1 of code] 548 PetscLogStagePop(); 549 PetscBarrier(...); 550 [more stage 0 of code] 551 PetscFinalize(); 552 .ve 553 554 Level: intermediate 555 556 Note: 557 Use `PetscLogStageRegister()` to register a stage. 558 559 .seealso: [](ch_profiling), `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 560 @*/ 561 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 562 { 563 PetscStageLog stageLog; 564 565 PetscFunctionBegin; 566 PetscCall(PetscLogGetStageLog(&stageLog)); 567 PetscCall(PetscStageLogPush(stageLog, stage)); 568 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 569 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stage].timer != NULL) PetscStackCallExternalVoid("ps_timer_start_", ps_timer_start_(stageLog->stageInfo[stage].timer)); 570 #endif 571 PetscFunctionReturn(PETSC_SUCCESS); 572 } 573 574 /*@C 575 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 576 577 Not Collective 578 579 Usage: 580 If the option -log_view is used to run the program containing the 581 following code, then 2 sets of summary data will be printed during 582 PetscFinalize(). 583 .vb 584 PetscInitialize(int *argc,char ***args,0,0); 585 [stage 0 of code] 586 PetscLogStagePush(1); 587 [stage 1 of code] 588 PetscLogStagePop(); 589 PetscBarrier(...); 590 [more stage 0 of code] 591 PetscFinalize(); 592 .ve 593 594 Level: intermediate 595 596 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 597 @*/ 598 PetscErrorCode PetscLogStagePop(void) 599 { 600 PetscStageLog stageLog; 601 602 PetscFunctionBegin; 603 PetscCall(PetscLogGetStageLog(&stageLog)); 604 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 605 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stageLog->curStage].timer != NULL) PetscStackCallExternalVoid("ps_timer_stop_", ps_timer_stop_(stageLog->stageInfo[stageLog->curStage].timer)); 606 #endif 607 PetscCall(PetscStageLogPop(stageLog)); 608 PetscFunctionReturn(PETSC_SUCCESS); 609 } 610 611 /*@ 612 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 613 614 Not Collective 615 616 Input Parameters: 617 + stage - The stage 618 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 619 620 Level: intermediate 621 622 Note: 623 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 624 625 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 626 @*/ 627 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 628 { 629 PetscStageLog stageLog; 630 631 PetscFunctionBegin; 632 PetscCall(PetscLogGetStageLog(&stageLog)); 633 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 634 PetscFunctionReturn(PETSC_SUCCESS); 635 } 636 637 /*@ 638 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 639 640 Not Collective 641 642 Input Parameter: 643 . stage - The stage 644 645 Output Parameter: 646 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 647 648 Level: intermediate 649 650 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 651 @*/ 652 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 653 { 654 PetscStageLog stageLog; 655 656 PetscFunctionBegin; 657 PetscCall(PetscLogGetStageLog(&stageLog)); 658 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 659 PetscFunctionReturn(PETSC_SUCCESS); 660 } 661 662 /*@ 663 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 664 665 Not Collective 666 667 Input Parameters: 668 + stage - The stage 669 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 670 671 Level: intermediate 672 673 Developer Note: 674 What does visible mean, needs to be documented. 675 676 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 677 @*/ 678 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 679 { 680 PetscStageLog stageLog; 681 682 PetscFunctionBegin; 683 PetscCall(PetscLogGetStageLog(&stageLog)); 684 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 685 PetscFunctionReturn(PETSC_SUCCESS); 686 } 687 688 /*@ 689 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 690 691 Not Collective 692 693 Input Parameter: 694 . stage - The stage 695 696 Output Parameter: 697 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 698 699 Level: intermediate 700 701 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 702 @*/ 703 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 704 { 705 PetscStageLog stageLog; 706 707 PetscFunctionBegin; 708 PetscCall(PetscLogGetStageLog(&stageLog)); 709 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 710 PetscFunctionReturn(PETSC_SUCCESS); 711 } 712 713 /*@C 714 PetscLogStageGetId - Returns the stage id when given the stage name. 715 716 Not Collective 717 718 Input Parameter: 719 . name - The stage name 720 721 Output Parameter: 722 . stage - The stage, , or -1 if no stage with that name exists 723 724 Level: intermediate 725 726 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 727 @*/ 728 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 729 { 730 PetscStageLog stageLog; 731 732 PetscFunctionBegin; 733 PetscCall(PetscLogGetStageLog(&stageLog)); 734 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 735 PetscFunctionReturn(PETSC_SUCCESS); 736 } 737 738 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 739 740 /*@C 741 PetscLogEventRegister - Registers an event name for logging operations 742 743 Not Collective 744 745 Input Parameters: 746 + name - The name associated with the event 747 - classid - The classid associated to the class for this event, obtain either with 748 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 749 are only available in C code 750 751 Output Parameter: 752 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 753 754 Example of Usage: 755 .vb 756 PetscLogEvent USER_EVENT; 757 PetscClassId classid; 758 PetscLogDouble user_event_flops; 759 PetscClassIdRegister("class name",&classid); 760 PetscLogEventRegister("User event name",classid,&USER_EVENT); 761 PetscLogEventBegin(USER_EVENT,0,0,0,0); 762 [code segment to monitor] 763 PetscLogFlops(user_event_flops); 764 PetscLogEventEnd(USER_EVENT,0,0,0,0); 765 .ve 766 767 Level: intermediate 768 769 Notes: 770 PETSc automatically logs library events if the code has been 771 configured with --with-log (which is the default) and 772 -log_view or -log_all is specified. `PetscLogEventRegister()` is 773 intended for logging user events to supplement this PETSc 774 information. 775 776 PETSc can gather data for use with the utilities Jumpshot 777 (part of the MPICH distribution). If PETSc has been compiled 778 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 779 MPICH), the user can employ another command line option, -log_mpe, 780 to create a logfile, "mpe.log", which can be visualized 781 Jumpshot. 782 783 The classid is associated with each event so that classes of events 784 can be disabled simultaneously, such as all matrix events. The user 785 can either use an existing classid, such as `MAT_CLASSID`, or create 786 their own as shown in the example. 787 788 If an existing event with the same name exists, its event handle is 789 returned instead of creating a new event. 790 791 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 792 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 793 @*/ 794 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) 795 { 796 PetscStageLog stageLog; 797 int stage; 798 799 PetscFunctionBegin; 800 *event = PETSC_DECIDE; 801 PetscCall(PetscLogGetStageLog(&stageLog)); 802 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 803 if (*event > 0) PetscFunctionReturn(PETSC_SUCCESS); 804 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 805 for (stage = 0; stage < stageLog->numStages; stage++) { 806 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 807 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 808 } 809 PetscFunctionReturn(PETSC_SUCCESS); 810 } 811 812 /*@ 813 PetscLogEventSetCollective - Indicates that a particular event is collective. 814 815 Not Collective 816 817 Input Parameters: 818 + event - The event id 819 - collective - Boolean flag indicating whether a particular event is collective 820 821 Level: developer 822 823 Notes: 824 New events returned from `PetscLogEventRegister()` are collective by default. 825 826 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 827 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 828 to be performed. This option is useful to debug imbalance within the computations or communications 829 830 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 831 @*/ 832 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) 833 { 834 PetscStageLog stageLog; 835 PetscEventRegLog eventRegLog; 836 837 PetscFunctionBegin; 838 PetscCall(PetscLogGetStageLog(&stageLog)); 839 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 840 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 841 eventRegLog->eventInfo[event].collective = collective; 842 PetscFunctionReturn(PETSC_SUCCESS); 843 } 844 845 /*@ 846 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 847 848 Not Collective 849 850 Input Parameter: 851 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 852 853 Level: developer 854 855 .seealso: [](ch_profiling), `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 856 @*/ 857 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 858 { 859 PetscStageLog stageLog; 860 int stage; 861 862 PetscFunctionBegin; 863 PetscCall(PetscLogGetStageLog(&stageLog)); 864 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 865 PetscFunctionReturn(PETSC_SUCCESS); 866 } 867 868 /*@ 869 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 870 871 Not Collective 872 873 Input Parameter: 874 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 875 876 Level: developer 877 878 Note: 879 If a class is excluded then events associated with that class are not logged. 880 881 .seealso: [](ch_profiling), `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 882 @*/ 883 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 884 { 885 PetscStageLog stageLog; 886 int stage; 887 888 PetscFunctionBegin; 889 PetscCall(PetscLogGetStageLog(&stageLog)); 890 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 891 PetscFunctionReturn(PETSC_SUCCESS); 892 } 893 894 /*@ 895 PetscLogEventActivate - Indicates that a particular event should be logged. 896 897 Not Collective 898 899 Input Parameter: 900 . event - The event id 901 902 Usage: 903 .vb 904 PetscLogEventDeactivate(VEC_SetValues); 905 [code where you do not want to log VecSetValues()] 906 PetscLogEventActivate(VEC_SetValues); 907 [code where you do want to log VecSetValues()] 908 .ve 909 910 Level: advanced 911 912 Note: 913 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 914 or an event number obtained with `PetscLogEventRegister()`. 915 916 .seealso: [](ch_profiling), `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 917 @*/ 918 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 919 { 920 PetscStageLog stageLog; 921 int stage; 922 923 PetscFunctionBegin; 924 PetscCall(PetscLogGetStageLog(&stageLog)); 925 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 926 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } 929 930 /*@ 931 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 932 933 Not Collective 934 935 Input Parameter: 936 . event - The event id 937 938 Usage: 939 .vb 940 PetscLogEventDeactivate(VEC_SetValues); 941 [code where you do not want to log VecSetValues()] 942 PetscLogEventActivate(VEC_SetValues); 943 [code where you do want to log VecSetValues()] 944 .ve 945 946 Level: advanced 947 948 Note: 949 The event may be either a pre-defined PETSc event (found in 950 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 951 952 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 953 @*/ 954 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 955 { 956 PetscStageLog stageLog; 957 int stage; 958 959 PetscFunctionBegin; 960 PetscCall(PetscLogGetStageLog(&stageLog)); 961 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 962 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 963 PetscFunctionReturn(PETSC_SUCCESS); 964 } 965 966 /*@ 967 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 968 969 Not Collective 970 971 Input Parameter: 972 . event - The event id 973 974 Usage: 975 .vb 976 PetscLogEventDeactivatePush(VEC_SetValues); 977 [code where you do not want to log VecSetValues()] 978 PetscLogEventDeactivatePop(VEC_SetValues); 979 [code where you do want to log VecSetValues()] 980 .ve 981 982 Level: advanced 983 984 Note: 985 The event may be either a pre-defined PETSc event (found in 986 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 987 988 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 989 @*/ 990 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 991 { 992 PetscStageLog stageLog; 993 int stage; 994 995 PetscFunctionBegin; 996 PetscCall(PetscLogGetStageLog(&stageLog)); 997 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 998 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 999 PetscFunctionReturn(PETSC_SUCCESS); 1000 } 1001 1002 /*@ 1003 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 1004 1005 Not Collective 1006 1007 Input Parameter: 1008 . event - The event id 1009 1010 Usage: 1011 .vb 1012 PetscLogEventDeactivatePush(VEC_SetValues); 1013 [code where you do not want to log VecSetValues()] 1014 PetscLogEventDeactivatePop(VEC_SetValues); 1015 [code where you do want to log VecSetValues()] 1016 .ve 1017 1018 Level: advanced 1019 1020 Note: 1021 The event may be either a pre-defined PETSc event (found in 1022 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 1023 1024 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 1025 @*/ 1026 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 1027 { 1028 PetscStageLog stageLog; 1029 int stage; 1030 1031 PetscFunctionBegin; 1032 PetscCall(PetscLogGetStageLog(&stageLog)); 1033 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1034 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } 1037 1038 /*@ 1039 PetscLogEventSetActiveAll - Turns on logging of all events 1040 1041 Not Collective 1042 1043 Input Parameters: 1044 + event - The event id 1045 - isActive - The activity flag determining whether the event is logged 1046 1047 Level: advanced 1048 1049 .seealso: [](ch_profiling), `PlogEventActivate()`, `PlogEventDeactivate()` 1050 @*/ 1051 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 1052 { 1053 PetscStageLog stageLog; 1054 int stage; 1055 1056 PetscFunctionBegin; 1057 PetscCall(PetscLogGetStageLog(&stageLog)); 1058 for (stage = 0; stage < stageLog->numStages; stage++) { 1059 if (isActive) { 1060 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 1061 } else { 1062 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 1063 } 1064 } 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 /*@ 1069 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 1070 1071 Not Collective 1072 1073 Input Parameter: 1074 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 1075 1076 Level: developer 1077 1078 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1079 @*/ 1080 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 1081 { 1082 PetscStageLog stageLog; 1083 int stage; 1084 1085 PetscFunctionBegin; 1086 PetscCall(PetscLogGetStageLog(&stageLog)); 1087 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1088 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1089 PetscFunctionReturn(PETSC_SUCCESS); 1090 } 1091 1092 /*@ 1093 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 1094 1095 Not Collective 1096 1097 Input Parameter: 1098 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 1099 1100 Level: developer 1101 1102 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1103 @*/ 1104 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 1105 { 1106 PetscStageLog stageLog; 1107 int stage; 1108 1109 PetscFunctionBegin; 1110 PetscCall(PetscLogGetStageLog(&stageLog)); 1111 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1112 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1113 PetscFunctionReturn(PETSC_SUCCESS); 1114 } 1115 1116 /*MC 1117 PetscLogEventSync - Synchronizes the beginning of a user event. 1118 1119 Synopsis: 1120 #include <petsclog.h> 1121 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 1122 1123 Collective 1124 1125 Input Parameters: 1126 + e - integer associated with the event obtained from PetscLogEventRegister() 1127 - comm - an MPI communicator 1128 1129 Usage: 1130 .vb 1131 PetscLogEvent USER_EVENT; 1132 PetscLogEventRegister("User event",0,&USER_EVENT); 1133 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1134 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1135 [code segment to monitor] 1136 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1137 .ve 1138 1139 Level: developer 1140 1141 Note: 1142 This routine should be called only if there is not a 1143 `PetscObject` available to pass to `PetscLogEventBegin()`. 1144 1145 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1146 M*/ 1147 1148 /*MC 1149 PetscLogEventBegin - Logs the beginning of a user event. 1150 1151 Synopsis: 1152 #include <petsclog.h> 1153 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1154 1155 Not Collective 1156 1157 Input Parameters: 1158 + e - integer associated with the event obtained from PetscLogEventRegister() 1159 - o1,o2,o3,o4 - objects associated with the event, or 0 1160 1161 Fortran Synopsis: 1162 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1163 1164 Usage: 1165 .vb 1166 PetscLogEvent USER_EVENT; 1167 PetscLogDouble user_event_flops; 1168 PetscLogEventRegister("User event",0,&USER_EVENT); 1169 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1170 [code segment to monitor] 1171 PetscLogFlops(user_event_flops); 1172 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1173 .ve 1174 1175 Level: intermediate 1176 1177 Developer Note: 1178 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the 1179 errors that occur in the macro directly because other packages that use this macros have used them in their 1180 own functions or methods that do not return error codes and it would be disruptive to change the current 1181 behavior. 1182 1183 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1184 M*/ 1185 1186 /*MC 1187 PetscLogEventEnd - Log the end of a user event. 1188 1189 Synopsis: 1190 #include <petsclog.h> 1191 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1192 1193 Not Collective 1194 1195 Input Parameters: 1196 + e - integer associated with the event obtained with PetscLogEventRegister() 1197 - o1,o2,o3,o4 - objects associated with the event, or 0 1198 1199 Fortran Synopsis: 1200 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1201 1202 Usage: 1203 .vb 1204 PetscLogEvent USER_EVENT; 1205 PetscLogDouble user_event_flops; 1206 PetscLogEventRegister("User event",0,&USER_EVENT,); 1207 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1208 [code segment to monitor] 1209 PetscLogFlops(user_event_flops); 1210 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1211 .ve 1212 1213 Level: intermediate 1214 1215 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1216 M*/ 1217 1218 /*@C 1219 PetscLogEventGetId - Returns the event id when given the event name. 1220 1221 Not Collective 1222 1223 Input Parameter: 1224 . name - The event name 1225 1226 Output Parameter: 1227 . event - The event, or -1 if no event with that name exists 1228 1229 Level: intermediate 1230 1231 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1232 @*/ 1233 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1234 { 1235 PetscStageLog stageLog; 1236 1237 PetscFunctionBegin; 1238 PetscCall(PetscLogGetStageLog(&stageLog)); 1239 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1240 PetscFunctionReturn(PETSC_SUCCESS); 1241 } 1242 1243 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) 1244 { 1245 PetscFunctionBegin; 1246 if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1247 PetscFunctionReturn(PETSC_SUCCESS); 1248 } 1249 1250 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) 1251 { 1252 PetscFunctionBegin; 1253 if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1254 PetscFunctionReturn(PETSC_SUCCESS); 1255 } 1256 1257 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) 1258 { 1259 PetscBool empty; 1260 1261 PetscFunctionBegin; 1262 PetscValidIntPointer(event, 1); 1263 *event = PETSC_DECIDE; 1264 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1265 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1266 PetscFunctionReturn(PETSC_SUCCESS); 1267 } 1268 1269 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) 1270 { 1271 PetscFunctionBegin; 1272 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1273 PetscFunctionReturn(PETSC_SUCCESS); 1274 } 1275 1276 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) 1277 { 1278 PetscStageLog stageLog; 1279 PetscEventPerfLog eventLog; 1280 int stage; 1281 1282 PetscFunctionBegin; 1283 if (event == PETSC_DECIDE) PetscFunctionReturn(PETSC_SUCCESS); 1284 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1285 PetscCall(PetscLogGetStageLog(&stageLog)); 1286 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1287 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1288 eventLog->eventInfo[event].count--; 1289 PetscFunctionReturn(PETSC_SUCCESS); 1290 } 1291 1292 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1293 /*@C 1294 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1295 be read by bin/petscview. This program no longer exists. 1296 1297 Collective on `PETSC_COMM_WORLD` 1298 1299 Input Parameter: 1300 . name - an optional file name 1301 1302 Usage: 1303 .vb 1304 PetscInitialize(...); 1305 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1306 ... code ... 1307 PetscLogDump(filename); 1308 PetscFinalize(); 1309 .ve 1310 1311 Level: advanced 1312 1313 Note: 1314 The default file name is 1315 $ Log.<rank> 1316 where <rank> is the processor number. If no name is specified, 1317 this file will be used. 1318 1319 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1320 @*/ 1321 PetscErrorCode PetscLogDump(const char sname[]) 1322 { 1323 PetscStageLog stageLog; 1324 PetscEventPerfInfo *eventInfo; 1325 FILE *fd; 1326 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1327 PetscLogDouble flops, _TotalTime; 1328 PetscMPIInt rank; 1329 int action, object, curStage; 1330 PetscLogEvent event; 1331 1332 PetscFunctionBegin; 1333 /* Calculate the total elapsed time */ 1334 PetscCall(PetscTime(&_TotalTime)); 1335 _TotalTime -= petsc_BaseTime; 1336 /* Open log file */ 1337 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1338 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 1339 PetscCall(PetscFixFilename(file, fname)); 1340 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1341 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1342 /* Output totals */ 1343 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1344 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1345 /* Output actions */ 1346 if (petsc_logActions) { 1347 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1348 for (action = 0; action < petsc_numActions; action++) { 1349 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1350 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1351 } 1352 } 1353 /* Output objects */ 1354 if (petsc_logObjects) { 1355 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1356 for (object = 0; object < petsc_numObjects; object++) { 1357 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1358 if (!petsc_objects[object].name[0]) { 1359 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1360 } else { 1361 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1362 } 1363 if (petsc_objects[object].info[0] != 0) { 1364 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1365 } else { 1366 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1367 } 1368 } 1369 } 1370 /* Output events */ 1371 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1372 PetscCall(PetscLogGetStageLog(&stageLog)); 1373 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1374 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1375 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1376 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1377 else flops = 0.0; 1378 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1379 } 1380 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1381 PetscFunctionReturn(PETSC_SUCCESS); 1382 } 1383 1384 /* 1385 PetscLogView_Detailed - Each process prints the times for its own events 1386 1387 */ 1388 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1389 { 1390 PetscStageLog stageLog; 1391 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1392 PetscLogDouble locTotalTime, numRed, maxMem; 1393 int numStages, numEvents, stage, event; 1394 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1395 PetscMPIInt rank, size; 1396 1397 PetscFunctionBegin; 1398 PetscCallMPI(MPI_Comm_size(comm, &size)); 1399 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1400 /* Must preserve reduction count before we go on */ 1401 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1402 /* Get the total elapsed time */ 1403 PetscCall(PetscTime(&locTotalTime)); 1404 locTotalTime -= petsc_BaseTime; 1405 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1406 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1407 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1408 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1409 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1410 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1411 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1412 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1413 PetscCall(PetscLogGetStageLog(&stageLog)); 1414 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1415 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1416 for (stage = 0; stage < numStages; stage++) { 1417 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1418 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1419 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1420 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1421 } 1422 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1423 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1424 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1425 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1426 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1427 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1428 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1429 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1430 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1431 PetscCall(PetscViewerFlush(viewer)); 1432 for (stage = 0; stage < numStages; stage++) { 1433 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1434 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1435 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1436 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1437 for (event = 0; event < numEvents; event++) { 1438 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1439 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1440 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1441 eventInfo->flops)); 1442 if (eventInfo->dof[0] >= 0.) { 1443 PetscInt d, e; 1444 1445 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1446 for (d = 0; d < 8; ++d) { 1447 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1448 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1449 } 1450 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1451 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1452 for (e = 0; e < 8; ++e) { 1453 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1454 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1455 } 1456 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1457 } 1458 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1459 } 1460 } 1461 PetscCall(PetscViewerFlush(viewer)); 1462 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 /* 1467 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1468 */ 1469 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1470 { 1471 PetscStageLog stageLog; 1472 PetscEventPerfInfo *eventInfo = NULL; 1473 PetscLogDouble locTotalTime, maxMem; 1474 int numStages, numEvents, stage, event; 1475 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1476 PetscMPIInt rank, size; 1477 1478 PetscFunctionBegin; 1479 PetscCallMPI(MPI_Comm_size(comm, &size)); 1480 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1481 /* Must preserve reduction count before we go on */ 1482 /* Get the total elapsed time */ 1483 PetscCall(PetscTime(&locTotalTime)); 1484 locTotalTime -= petsc_BaseTime; 1485 PetscCall(PetscLogGetStageLog(&stageLog)); 1486 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1487 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1488 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1489 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1490 PetscCall(PetscViewerFlush(viewer)); 1491 for (stage = 0; stage < numStages; stage++) { 1492 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1493 1494 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1495 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1496 for (event = 0; event < numEvents; event++) { 1497 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1498 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1499 eventInfo->numReductions, eventInfo->flops)); 1500 if (eventInfo->dof[0] >= 0.) { 1501 PetscInt d, e; 1502 1503 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1504 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1505 } 1506 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1507 } 1508 } 1509 PetscCall(PetscViewerFlush(viewer)); 1510 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1511 PetscFunctionReturn(PETSC_SUCCESS); 1512 } 1513 1514 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1515 { 1516 PetscFunctionBegin; 1517 if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS); 1518 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1519 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1520 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1521 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1522 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1523 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1524 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1525 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1526 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1527 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1528 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1529 PetscFunctionReturn(PETSC_SUCCESS); 1530 } 1531 1532 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1533 { 1534 PetscFunctionBegin; 1535 if (PetscDefined(USE_DEBUG)) { 1536 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1537 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1538 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1539 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1540 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1541 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1542 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1543 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1544 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1545 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1546 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1547 } 1548 PetscFunctionReturn(PETSC_SUCCESS); 1549 } 1550 1551 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1552 { 1553 #if defined(PETSC_HAVE_DEVICE) 1554 PetscMPIInt size; 1555 PetscBool deviceInitialized = PETSC_FALSE; 1556 1557 PetscFunctionBegin; 1558 PetscCallMPI(MPI_Comm_size(comm, &size)); 1559 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1560 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1561 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1562 deviceInitialized = PETSC_TRUE; 1563 break; 1564 } 1565 } 1566 /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1567 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS); 1568 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1569 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1570 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1571 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1572 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1573 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1574 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1575 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1576 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1577 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1578 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1579 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1580 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1581 PetscFunctionReturn(PETSC_SUCCESS); 1582 #else 1583 return PETSC_SUCCESS; 1584 #endif 1585 } 1586 1587 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1588 { 1589 #if defined(PETSC_HAVE_DEVICE) 1590 1591 PetscFunctionBegin; 1592 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS); 1593 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1594 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1595 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1596 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1597 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1598 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1599 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1600 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1601 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1602 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1603 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1604 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1605 PetscFunctionReturn(PETSC_SUCCESS); 1606 #else 1607 return PETSC_SUCCESS; 1608 #endif 1609 } 1610 1611 PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1612 { 1613 FILE *fd; 1614 PetscLogDouble zero = 0.0; 1615 PetscStageLog stageLog; 1616 PetscStageInfo *stageInfo = NULL; 1617 PetscEventPerfInfo *eventInfo = NULL; 1618 PetscClassPerfInfo *classInfo; 1619 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1620 const char *name; 1621 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1622 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1623 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1624 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1625 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1626 PetscLogDouble min, max, tot, ratio, avg, x, y; 1627 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1628 #if defined(PETSC_HAVE_DEVICE) 1629 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1630 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1631 #endif 1632 PetscMPIInt minC, maxC; 1633 PetscMPIInt size, rank; 1634 PetscBool *localStageUsed, *stageUsed; 1635 PetscBool *localStageVisible, *stageVisible; 1636 int numStages, localNumEvents, numEvents; 1637 int stage, oclass; 1638 PetscLogEvent event; 1639 char version[256]; 1640 MPI_Comm comm; 1641 #if defined(PETSC_HAVE_DEVICE) 1642 PetscLogEvent eventid; 1643 PetscInt64 nas = 0x7FF0000000000002; 1644 #endif 1645 1646 PetscFunctionBegin; 1647 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1648 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1649 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1650 PetscCallMPI(MPI_Comm_size(comm, &size)); 1651 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1652 /* Get the total elapsed time */ 1653 PetscCall(PetscTime(&locTotalTime)); 1654 locTotalTime -= petsc_BaseTime; 1655 1656 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1657 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1658 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1659 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1660 PetscCall(PetscLogViewWarnSync(comm, fd)); 1661 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1662 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1663 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1664 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1665 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1666 PetscCall(PetscGetUserName(username, sizeof(username))); 1667 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1668 PetscCall(PetscGetDate(date, sizeof(date))); 1669 PetscCall(PetscGetVersion(version, sizeof(version))); 1670 if (size == 1) { 1671 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1672 } else { 1673 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1674 } 1675 #if defined(PETSC_HAVE_OPENMP) 1676 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1677 #endif 1678 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1679 1680 /* Must preserve reduction count before we go on */ 1681 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1682 1683 /* Calculate summary information */ 1684 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1685 /* Time */ 1686 PetscCall(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1687 PetscCall(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1688 PetscCall(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1689 avg = tot / ((PetscLogDouble)size); 1690 if (min != 0.0) ratio = max / min; 1691 else ratio = 0.0; 1692 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1693 TotalTime = tot; 1694 /* Objects */ 1695 avg = (PetscLogDouble)petsc_numObjects; 1696 PetscCall(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1697 PetscCall(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1698 PetscCall(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1699 avg = tot / ((PetscLogDouble)size); 1700 if (min != 0.0) ratio = max / min; 1701 else ratio = 0.0; 1702 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1703 /* Flops */ 1704 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1705 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1706 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1707 avg = tot / ((PetscLogDouble)size); 1708 if (min != 0.0) ratio = max / min; 1709 else ratio = 0.0; 1710 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1711 TotalFlops = tot; 1712 /* Flops/sec -- Must talk to Barry here */ 1713 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1714 else flops = 0.0; 1715 PetscCall(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1716 PetscCall(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1717 PetscCall(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1718 avg = tot / ((PetscLogDouble)size); 1719 if (min != 0.0) ratio = max / min; 1720 else ratio = 0.0; 1721 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1722 /* Memory */ 1723 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1724 if (mem > 0.0) { 1725 PetscCall(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1726 PetscCall(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1727 PetscCall(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1728 avg = tot / ((PetscLogDouble)size); 1729 if (min != 0.0) ratio = max / min; 1730 else ratio = 0.0; 1731 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1732 } 1733 /* Messages */ 1734 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1735 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1736 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1737 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1738 avg = tot / ((PetscLogDouble)size); 1739 if (min != 0.0) ratio = max / min; 1740 else ratio = 0.0; 1741 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1742 numMessages = tot; 1743 /* Message Lengths */ 1744 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1745 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1746 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1747 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1748 if (numMessages != 0) avg = tot / numMessages; 1749 else avg = 0.0; 1750 if (min != 0.0) ratio = max / min; 1751 else ratio = 0.0; 1752 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1753 messageLength = tot; 1754 /* Reductions */ 1755 PetscCall(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1756 PetscCall(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1757 PetscCall(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1758 if (min != 0.0) ratio = max / min; 1759 else ratio = 0.0; 1760 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1761 numReductions = red; /* wrong because uses count from process zero */ 1762 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1763 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1764 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1765 1766 /* Get total number of stages -- 1767 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1768 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1769 This seems best accomplished by assoicating a communicator with each stage. 1770 */ 1771 PetscCall(PetscLogGetStageLog(&stageLog)); 1772 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1773 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1774 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1775 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1776 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1777 if (numStages > 0) { 1778 stageInfo = stageLog->stageInfo; 1779 for (stage = 0; stage < numStages; stage++) { 1780 if (stage < stageLog->numStages) { 1781 localStageUsed[stage] = stageInfo[stage].used; 1782 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1783 } else { 1784 localStageUsed[stage] = PETSC_FALSE; 1785 localStageVisible[stage] = PETSC_TRUE; 1786 } 1787 } 1788 PetscCall(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1789 PetscCall(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1790 for (stage = 0; stage < numStages; stage++) { 1791 if (stageUsed[stage]) { 1792 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1793 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1794 break; 1795 } 1796 } 1797 for (stage = 0; stage < numStages; stage++) { 1798 if (!stageUsed[stage]) continue; 1799 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1800 if (localStageUsed[stage]) { 1801 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1802 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1803 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1804 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1805 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1806 name = stageInfo[stage].name; 1807 } else { 1808 PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1809 PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1810 PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1811 PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1812 PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1813 name = ""; 1814 } 1815 mess *= 0.5; 1816 messLen *= 0.5; 1817 red /= size; 1818 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1819 else fracTime = 0.0; 1820 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1821 else fracFlops = 0.0; 1822 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1823 if (numMessages != 0.0) fracMessages = mess / numMessages; 1824 else fracMessages = 0.0; 1825 if (mess != 0.0) avgMessLen = messLen / mess; 1826 else avgMessLen = 0.0; 1827 if (messageLength != 0.0) fracLength = messLen / messageLength; 1828 else fracLength = 0.0; 1829 if (numReductions != 0.0) fracReductions = red / numReductions; 1830 else fracReductions = 0.0; 1831 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1832 } 1833 } 1834 1835 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1836 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1837 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1838 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1839 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1840 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1841 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1842 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1843 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1844 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1845 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1846 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1847 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1848 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1849 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1850 if (PetscLogMemory) { 1851 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1852 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1853 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1854 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1855 } 1856 #if defined(PETSC_HAVE_DEVICE) 1857 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1858 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1859 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1860 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1861 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1862 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1863 #endif 1864 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1865 1866 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1867 1868 /* Report events */ 1869 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1870 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1871 #if defined(PETSC_HAVE_DEVICE) 1872 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1873 #endif 1874 PetscCall(PetscFPrintf(comm, fd, "\n")); 1875 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1876 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1877 #if defined(PETSC_HAVE_DEVICE) 1878 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1879 #endif 1880 PetscCall(PetscFPrintf(comm, fd, "\n")); 1881 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1882 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1883 #if defined(PETSC_HAVE_DEVICE) 1884 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1885 #endif 1886 PetscCall(PetscFPrintf(comm, fd, "\n")); 1887 1888 #if defined(PETSC_HAVE_DEVICE) 1889 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1890 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1891 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1892 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1893 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1894 #endif 1895 1896 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1897 for (stage = 0; stage < numStages; stage++) { 1898 if (!stageVisible[stage]) continue; 1899 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1900 if (localStageUsed[stage]) { 1901 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1902 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1903 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1904 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1905 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1906 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1907 } else { 1908 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1909 PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1910 PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1911 PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1912 PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1913 PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1914 } 1915 mess *= 0.5; 1916 messLen *= 0.5; 1917 red /= size; 1918 1919 /* Get total number of events in this stage -- 1920 Currently, a single processor can register more events than another, but events must all be registered in order, 1921 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1922 on the event ID. This seems best accomplished by associating a communicator with each stage. 1923 1924 Problem: If the event did not happen on proc 1, its name will not be available. 1925 Problem: Event visibility is not implemented 1926 */ 1927 if (localStageUsed[stage]) { 1928 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1929 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1930 } else localNumEvents = 0; 1931 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1932 for (event = 0; event < numEvents; event++) { 1933 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1934 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1935 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1936 else flopr = 0.0; 1937 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1938 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1939 PetscCall(MPIU_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1940 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1941 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1942 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1943 PetscCall(MPIU_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1944 PetscCall(MPIU_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1945 PetscCall(MPIU_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1946 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1947 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1948 if (PetscLogMemory) { 1949 PetscCall(MPIU_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1950 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1951 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1952 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1953 } 1954 #if defined(PETSC_HAVE_DEVICE) 1955 PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1956 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1957 PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1958 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1959 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1960 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1961 #endif 1962 name = stageLog->eventLog->eventInfo[event].name; 1963 } else { 1964 int ierr = 0; 1965 1966 flopr = 0.0; 1967 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1968 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1969 PetscCall(MPIU_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1970 PetscCall(MPIU_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1971 PetscCall(MPIU_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1972 PetscCall(MPIU_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1973 PetscCall(MPIU_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1974 PetscCall(MPIU_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1975 PetscCall(MPIU_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1976 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1977 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1978 if (PetscLogMemory) { 1979 PetscCall(MPIU_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1980 PetscCall(MPIU_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1981 PetscCall(MPIU_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1982 PetscCall(MPIU_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1983 } 1984 #if defined(PETSC_HAVE_DEVICE) 1985 PetscCall(MPIU_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1986 PetscCall(MPIU_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1987 PetscCall(MPIU_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1988 PetscCall(MPIU_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1989 PetscCall(MPIU_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1990 PetscCall(MPIU_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1991 #endif 1992 name = ""; 1993 } 1994 if (mint < 0.0) { 1995 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 1996 mint = 0; 1997 } 1998 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 1999 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 2000 #if defined(PETSC_HAVE_DEVICE) 2001 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 2002 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 2003 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 2004 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 2005 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 2006 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 2007 } 2008 } 2009 #endif 2010 totm *= 0.5; 2011 totml *= 0.5; 2012 totr /= size; 2013 2014 if (maxC != 0) { 2015 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 2016 else ratC = 0.0; 2017 if (mint != 0.0) ratt = maxt / mint; 2018 else ratt = 0.0; 2019 if (minf != 0.0) ratf = maxf / minf; 2020 else ratf = 0.0; 2021 if (TotalTime != 0.0) fracTime = tott / TotalTime; 2022 else fracTime = 0.0; 2023 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 2024 else fracFlops = 0.0; 2025 if (stageTime != 0.0) fracStageTime = tott / stageTime; 2026 else fracStageTime = 0.0; 2027 if (flops != 0.0) fracStageFlops = totf / flops; 2028 else fracStageFlops = 0.0; 2029 if (numMessages != 0.0) fracMess = totm / numMessages; 2030 else fracMess = 0.0; 2031 if (messageLength != 0.0) fracMessLen = totml / messageLength; 2032 else fracMessLen = 0.0; 2033 if (numReductions != 0.0) fracRed = totr / numReductions; 2034 else fracRed = 0.0; 2035 if (mess != 0.0) fracStageMess = totm / mess; 2036 else fracStageMess = 0.0; 2037 if (messLen != 0.0) fracStageMessLen = totml / messLen; 2038 else fracStageMessLen = 0.0; 2039 if (red != 0.0) fracStageRed = totr / red; 2040 else fracStageRed = 0.0; 2041 if (totm != 0.0) totml /= totm; 2042 else totml = 0.0; 2043 if (maxt != 0.0) flopr = totf / maxt; 2044 else flopr = 0.0; 2045 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 2046 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 2047 else 2048 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 2049 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 2050 #if defined(PETSC_HAVE_DEVICE) 2051 if (totf != 0.0) fracgflops = gflops / totf; 2052 else fracgflops = 0.0; 2053 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 2054 else gflopr = 0.0; 2055 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 2056 #endif 2057 PetscCall(PetscFPrintf(comm, fd, "\n")); 2058 } 2059 } 2060 } 2061 2062 /* Memory usage and object creation */ 2063 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 2064 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 2065 #if defined(PETSC_HAVE_DEVICE) 2066 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 2067 #endif 2068 PetscCall(PetscFPrintf(comm, fd, "\n")); 2069 PetscCall(PetscFPrintf(comm, fd, "\n")); 2070 2071 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 2072 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 2073 stats for stages local to processor sets. 2074 */ 2075 /* We should figure out the longest object name here (now 20 characters) */ 2076 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 2077 for (stage = 0; stage < numStages; stage++) { 2078 if (localStageUsed[stage]) { 2079 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 2080 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 2081 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 2082 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 2083 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 2084 } 2085 } 2086 } else { 2087 if (!localStageVisible[stage]) continue; 2088 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 2089 } 2090 } 2091 2092 PetscCall(PetscFree(localStageUsed)); 2093 PetscCall(PetscFree(stageUsed)); 2094 PetscCall(PetscFree(localStageVisible)); 2095 PetscCall(PetscFree(stageVisible)); 2096 2097 /* Information unrelated to this particular run */ 2098 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 2099 PetscCall(PetscTime(&y)); 2100 PetscCall(PetscTime(&x)); 2101 PetscCall(PetscTime(&y)); 2102 PetscCall(PetscTime(&y)); 2103 PetscCall(PetscTime(&y)); 2104 PetscCall(PetscTime(&y)); 2105 PetscCall(PetscTime(&y)); 2106 PetscCall(PetscTime(&y)); 2107 PetscCall(PetscTime(&y)); 2108 PetscCall(PetscTime(&y)); 2109 PetscCall(PetscTime(&y)); 2110 PetscCall(PetscTime(&y)); 2111 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 2112 /* MPI information */ 2113 if (size > 1) { 2114 MPI_Status status; 2115 PetscMPIInt tag; 2116 MPI_Comm newcomm; 2117 2118 PetscCallMPI(MPI_Barrier(comm)); 2119 PetscCall(PetscTime(&x)); 2120 PetscCallMPI(MPI_Barrier(comm)); 2121 PetscCallMPI(MPI_Barrier(comm)); 2122 PetscCallMPI(MPI_Barrier(comm)); 2123 PetscCallMPI(MPI_Barrier(comm)); 2124 PetscCallMPI(MPI_Barrier(comm)); 2125 PetscCall(PetscTime(&y)); 2126 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 2127 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 2128 PetscCallMPI(MPI_Barrier(comm)); 2129 if (rank) { 2130 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 2131 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 2132 } else { 2133 PetscCall(PetscTime(&x)); 2134 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 2135 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 2136 PetscCall(PetscTime(&y)); 2137 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 2138 } 2139 PetscCall(PetscCommDestroy(&newcomm)); 2140 } 2141 PetscCall(PetscOptionsView(NULL, viewer)); 2142 2143 /* Machine and compile information */ 2144 #if defined(PETSC_USE_FORTRAN_KERNELS) 2145 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 2146 #else 2147 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 2148 #endif 2149 #if defined(PETSC_USE_64BIT_INDICES) 2150 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64-bit PetscInt\n")); 2151 #elif defined(PETSC_USE___FLOAT128) 2152 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32-bit PetscInt\n")); 2153 #endif 2154 #if defined(PETSC_USE_REAL_SINGLE) 2155 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2156 #elif defined(PETSC_USE___FLOAT128) 2157 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2158 #endif 2159 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2160 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2161 #else 2162 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2163 #endif 2164 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 2165 2166 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 2167 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2168 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2169 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2170 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2171 2172 /* Cleanup */ 2173 PetscCall(PetscFPrintf(comm, fd, "\n")); 2174 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 2175 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 2176 PetscCall(PetscFPTrapPop()); 2177 PetscFunctionReturn(PETSC_SUCCESS); 2178 } 2179 2180 /*@C 2181 PetscLogView - Prints a summary of the logging. 2182 2183 Collective over MPI_Comm 2184 2185 Input Parameter: 2186 . viewer - an ASCII viewer 2187 2188 Options Database Keys: 2189 + -log_view [:filename] - Prints summary of log information 2190 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2191 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2192 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2193 . -log_view_memory - Also display memory usage in each event 2194 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2195 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2196 - -log_trace [filename] - Displays a trace of what each process is doing 2197 2198 Level: beginner 2199 2200 Notes: 2201 It is possible to control the logging programmatically but we recommend using the options database approach whenever possible 2202 By default the summary is printed to stdout. 2203 2204 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2205 2206 If PETSc is configured with --with-logging=0 then this functionality is not available 2207 2208 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2209 directory then open filename.xml with your browser. Specific notes for certain browsers 2210 $ Firefox and Internet explorer - simply open the file 2211 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2212 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2213 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2214 your browser. 2215 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2216 window and render the XML log file contents. 2217 2218 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2219 2220 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2221 or using speedscope (https://www.speedscope.app). 2222 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2223 2224 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogDump()` 2225 @*/ 2226 PetscErrorCode PetscLogView(PetscViewer viewer) 2227 { 2228 PetscBool isascii; 2229 PetscViewerFormat format; 2230 int stage, lastStage; 2231 PetscStageLog stageLog; 2232 2233 PetscFunctionBegin; 2234 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2235 /* Pop off any stages the user forgot to remove */ 2236 lastStage = 0; 2237 PetscCall(PetscLogGetStageLog(&stageLog)); 2238 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2239 while (stage >= 0) { 2240 lastStage = stage; 2241 PetscCall(PetscStageLogPop(stageLog)); 2242 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2243 } 2244 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2245 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2246 PetscCall(PetscViewerGetFormat(viewer, &format)); 2247 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2248 PetscCall(PetscLogView_Default(viewer)); 2249 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2250 PetscCall(PetscLogView_Detailed(viewer)); 2251 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2252 PetscCall(PetscLogView_CSV(viewer)); 2253 } else if (format == PETSC_VIEWER_ASCII_XML) { 2254 PetscCall(PetscLogView_Nested(viewer)); 2255 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2256 PetscCall(PetscLogView_Flamegraph(viewer)); 2257 } 2258 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2259 PetscFunctionReturn(PETSC_SUCCESS); 2260 } 2261 2262 /*@C 2263 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2264 2265 Collective on `PETSC_COMM_WORLD` 2266 2267 Level: developer 2268 2269 .seealso: [](ch_profiling), `PetscLogView()` 2270 @*/ 2271 PetscErrorCode PetscLogViewFromOptions(void) 2272 { 2273 PetscViewer viewer; 2274 PetscBool flg; 2275 PetscViewerFormat format; 2276 2277 PetscFunctionBegin; 2278 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2279 if (flg) { 2280 PetscCall(PetscViewerPushFormat(viewer, format)); 2281 PetscCall(PetscLogView(viewer)); 2282 PetscCall(PetscViewerPopFormat(viewer)); 2283 PetscCall(PetscViewerDestroy(&viewer)); 2284 } 2285 PetscFunctionReturn(PETSC_SUCCESS); 2286 } 2287 2288 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2289 /*@C 2290 PetscGetFlops - Returns the number of flops used on this processor 2291 since the program began. 2292 2293 Not Collective 2294 2295 Output Parameter: 2296 flops - number of floating point operations 2297 2298 Level: intermediate 2299 2300 Notes: 2301 A global counter logs all PETSc flop counts. The user can use 2302 `PetscLogFlops()` to increment this counter to include flops for the 2303 application code. 2304 2305 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2306 2307 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2308 @*/ 2309 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2310 { 2311 PetscFunctionBegin; 2312 *flops = petsc_TotalFlops; 2313 PetscFunctionReturn(PETSC_SUCCESS); 2314 } 2315 2316 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2317 { 2318 size_t fullLength; 2319 va_list Argp; 2320 2321 PetscFunctionBegin; 2322 if (!petsc_logObjects) PetscFunctionReturn(PETSC_SUCCESS); 2323 va_start(Argp, format); 2324 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2325 va_end(Argp); 2326 PetscFunctionReturn(PETSC_SUCCESS); 2327 } 2328 2329 /*MC 2330 PetscLogFlops - Adds floating point operations to the global counter. 2331 2332 Synopsis: 2333 #include <petsclog.h> 2334 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2335 2336 Not Collective 2337 2338 Input Parameter: 2339 . f - flop counter 2340 2341 Usage: 2342 .vb 2343 PetscLogEvent USER_EVENT; 2344 PetscLogEventRegister("User event",0,&USER_EVENT); 2345 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2346 [code segment to monitor] 2347 PetscLogFlops(user_flops) 2348 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2349 .ve 2350 2351 Level: intermediate 2352 2353 Note: 2354 A global counter logs all PETSc flop counts. The user can use 2355 PetscLogFlops() to increment this counter to include flops for the 2356 application code. 2357 2358 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2359 M*/ 2360 2361 /*MC 2362 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2363 to get accurate timings 2364 2365 Synopsis: 2366 #include <petsclog.h> 2367 void PetscPreLoadBegin(PetscBool flag,char *name); 2368 2369 Not Collective 2370 2371 Input Parameters: 2372 + flag - `PETSC_TRUE` to run twice, `PETSC_FALSE` to run once, may be overridden 2373 with command line option -preload true or -preload false 2374 - name - name of first stage (lines of code timed separately with `-log_view`) to 2375 be preloaded 2376 2377 Usage: 2378 .vb 2379 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2380 lines of code 2381 PetscPreLoadStage("second stage"); 2382 lines of code 2383 PetscPreLoadEnd(); 2384 .ve 2385 2386 Level: intermediate 2387 2388 Note: 2389 Only works in C/C++, not Fortran 2390 2391 Flags available within the macro. 2392 + PetscPreLoadingUsed - true if we are or have done preloading 2393 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2394 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2395 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2396 The first two variables are available throughout the program, the second two only between the `PetscPreLoadBegin()` 2397 and `PetscPreLoadEnd()` 2398 2399 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2400 M*/ 2401 2402 /*MC 2403 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2404 to get accurate timings 2405 2406 Synopsis: 2407 #include <petsclog.h> 2408 void PetscPreLoadEnd(void); 2409 2410 Not Collective 2411 2412 Usage: 2413 .vb 2414 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2415 lines of code 2416 PetscPreLoadStage("second stage"); 2417 lines of code 2418 PetscPreLoadEnd(); 2419 .ve 2420 2421 Level: intermediate 2422 2423 Note: 2424 Only works in C/C++ not fortran 2425 2426 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2427 M*/ 2428 2429 /*MC 2430 PetscPreLoadStage - Start a new segment of code to be timed separately. 2431 to get accurate timings 2432 2433 Synopsis: 2434 #include <petsclog.h> 2435 void PetscPreLoadStage(char *name); 2436 2437 Not Collective 2438 2439 Usage: 2440 .vb 2441 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2442 lines of code 2443 PetscPreLoadStage("second stage"); 2444 lines of code 2445 PetscPreLoadEnd(); 2446 .ve 2447 2448 Level: intermediate 2449 2450 Note: 2451 Only works in C/C++ not fortran 2452 2453 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2454 M*/ 2455 2456 #if PetscDefined(HAVE_DEVICE) 2457 #include <petsc/private/deviceimpl.h> 2458 2459 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2460 2461 /* 2462 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2463 because it will result in timing results that cannot be interpreted. 2464 */ 2465 static PetscErrorCode PetscLogGpuTime_Off(void) 2466 { 2467 PetscLogGpuTimeFlag = PETSC_FALSE; 2468 return PETSC_SUCCESS; 2469 } 2470 2471 /*@C 2472 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2473 2474 Options Database Key: 2475 . -log_view_gpu_time - provide the GPU times in the -log_view output 2476 2477 Level: advanced 2478 2479 Notes: 2480 Turning on the timing of the 2481 GPU kernels can slow down the entire computation and should only be used when studying the performance 2482 of operations on GPU such as vector operations and matrix-vector operations. 2483 2484 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2485 2486 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2487 @*/ 2488 PetscErrorCode PetscLogGpuTime(void) 2489 { 2490 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2491 PetscLogGpuTimeFlag = PETSC_TRUE; 2492 return PETSC_SUCCESS; 2493 } 2494 2495 /*@C 2496 PetscLogGpuTimeBegin - Start timer for device 2497 2498 Level: intermediate 2499 2500 Notes: 2501 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2502 2503 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2504 2505 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()` 2506 2507 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2508 2509 The regular logging captures the time for data transfers and any CPU activities during the event 2510 2511 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2512 2513 Developer Notes: 2514 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2515 2516 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the 2517 event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution, 2518 but not continued GPU execution, until the timer event is recorded. 2519 2520 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2521 @*/ 2522 PetscErrorCode PetscLogGpuTimeBegin(void) 2523 { 2524 PetscFunctionBegin; 2525 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS); 2526 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2527 PetscDeviceContext dctx; 2528 2529 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2530 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2531 } else { 2532 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2533 } 2534 PetscFunctionReturn(PETSC_SUCCESS); 2535 } 2536 2537 /*@C 2538 PetscLogGpuTimeEnd - Stop timer for device 2539 2540 Level: intermediate 2541 2542 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2543 @*/ 2544 PetscErrorCode PetscLogGpuTimeEnd(void) 2545 { 2546 PetscFunctionBegin; 2547 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS); 2548 if (PetscDefined(HAVE_CUDA) || PetscDefined(HAVE_HIP)) { 2549 PetscDeviceContext dctx; 2550 PetscLogDouble elapsed; 2551 2552 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2553 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2554 petsc_gtime += (elapsed / 1000.0); 2555 } else { 2556 PetscCall(PetscTimeAdd(&petsc_gtime)); 2557 } 2558 PetscFunctionReturn(PETSC_SUCCESS); 2559 } 2560 2561 #endif /* end of PETSC_HAVE_DEVICE */ 2562 2563 #else /* end of -DPETSC_USE_LOG section */ 2564 2565 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2566 { 2567 PetscFunctionBegin; 2568 PetscFunctionReturn(PETSC_SUCCESS); 2569 } 2570 2571 #endif /* PETSC_USE_LOG*/ 2572 2573 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2574 PetscClassId PETSC_OBJECT_CLASSID = 0; 2575 2576 /*@C 2577 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2578 2579 Not Collective 2580 2581 Input Parameter: 2582 . name - The class name 2583 2584 Output Parameter: 2585 . oclass - The class id or classid 2586 2587 Level: developer 2588 2589 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2590 @*/ 2591 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) 2592 { 2593 #if defined(PETSC_USE_LOG) 2594 PetscStageLog stageLog; 2595 PetscInt stage; 2596 #endif 2597 2598 PetscFunctionBegin; 2599 *oclass = ++PETSC_LARGEST_CLASSID; 2600 #if defined(PETSC_USE_LOG) 2601 PetscCall(PetscLogGetStageLog(&stageLog)); 2602 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2603 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2604 #endif 2605 PetscFunctionReturn(PETSC_SUCCESS); 2606 } 2607 2608 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2609 #include <mpe.h> 2610 2611 PetscBool PetscBeganMPE = PETSC_FALSE; 2612 2613 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2614 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2615 2616 /*@C 2617 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2618 and slows the program down. 2619 2620 Collective over `PETSC_COMM_WORLD` 2621 2622 Options Database Key: 2623 . -log_mpe - Prints extensive log information 2624 2625 Level: advanced 2626 2627 Note: 2628 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 2629 intended for production runs since it logs only flop rates and object 2630 creation (and should not significantly slow the programs). 2631 2632 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2633 `PetscLogEventDeactivate()` 2634 @*/ 2635 PetscErrorCode PetscLogMPEBegin(void) 2636 { 2637 PetscFunctionBegin; 2638 /* Do MPE initialization */ 2639 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2640 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2641 PetscCall(MPE_Init_log()); 2642 2643 PetscBeganMPE = PETSC_TRUE; 2644 } else { 2645 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2646 } 2647 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2648 PetscFunctionReturn(PETSC_SUCCESS); 2649 } 2650 2651 /*@C 2652 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2653 2654 Collective over `PETSC_COMM_WORLD` 2655 2656 Level: advanced 2657 2658 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2659 @*/ 2660 PetscErrorCode PetscLogMPEDump(const char sname[]) 2661 { 2662 char name[PETSC_MAX_PATH_LEN]; 2663 2664 PetscFunctionBegin; 2665 if (PetscBeganMPE) { 2666 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2667 if (sname) { 2668 PetscCall(PetscStrncpy(name, sname, sizeof(name))); 2669 } else { 2670 PetscCall(PetscGetProgramName(name, sizeof(name))); 2671 } 2672 PetscCall(MPE_Finish_log(name)); 2673 } else { 2674 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2675 } 2676 PetscFunctionReturn(PETSC_SUCCESS); 2677 } 2678 2679 #define PETSC_RGB_COLORS_MAX 39 2680 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2681 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2682 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2683 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2684 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2685 2686 /*@C 2687 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2688 2689 Not collective. Maybe it should be? 2690 2691 Output Parameter: 2692 . str - character string representing the color 2693 2694 Level: developer 2695 2696 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2697 @*/ 2698 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2699 { 2700 static int idx = 0; 2701 2702 PetscFunctionBegin; 2703 *str = PetscLogMPERGBColors[idx]; 2704 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2705 PetscFunctionReturn(PETSC_SUCCESS); 2706 } 2707 2708 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2709