1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 #include <petscdevice.h> 15 #include <petsc/private/deviceimpl.h> 16 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 17 #include <../src/sys/perfstubs/timer.h> 18 #endif 19 20 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 21 22 #if defined(PETSC_USE_LOG) 23 #include <petscmachineinfo.h> 24 #include <petscconfiginfo.h> 25 26 #if defined(PETSC_HAVE_THREADSAFETY) 27 28 PetscInt petsc_log_gid = -1; /* Global threadId counter */ 29 PETSC_TLS PetscInt petsc_log_tid = -1; /* Local threadId */ 30 31 /* shared variables */ 32 PetscSpinlock PetscLogSpinLock; 33 PetscHMapEvent eventInfoMap_th = NULL; 34 35 #endif 36 37 /* used in the MPI_XXX() count macros in petsclog.h */ 38 39 /* Action and object logging variables */ 40 Action *petsc_actions = NULL; 41 Object *petsc_objects = NULL; 42 PetscBool petsc_logActions = PETSC_FALSE; 43 PetscBool petsc_logObjects = PETSC_FALSE; 44 int petsc_numActions = 0, petsc_maxActions = 100; 45 int petsc_numObjects = 0, petsc_maxObjects = 100; 46 int petsc_numObjectsDestroyed = 0; 47 48 /* Global counters */ 49 PetscLogDouble petsc_BaseTime = 0.0; 50 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 51 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 52 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 53 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 54 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 55 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 56 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 57 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 58 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 59 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 60 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 61 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 62 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 63 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 64 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 65 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 66 67 /* Thread Local storage */ 68 PETSC_TLS PetscLogDouble petsc_TotalFlops_th = 0.0; 69 PETSC_TLS PetscLogDouble petsc_send_ct_th = 0.0; 70 PETSC_TLS PetscLogDouble petsc_recv_ct_th = 0.0; 71 PETSC_TLS PetscLogDouble petsc_send_len_th = 0.0; 72 PETSC_TLS PetscLogDouble petsc_recv_len_th = 0.0; 73 PETSC_TLS PetscLogDouble petsc_isend_ct_th = 0.0; 74 PETSC_TLS PetscLogDouble petsc_irecv_ct_th = 0.0; 75 PETSC_TLS PetscLogDouble petsc_isend_len_th = 0.0; 76 PETSC_TLS PetscLogDouble petsc_irecv_len_th = 0.0; 77 PETSC_TLS PetscLogDouble petsc_wait_ct_th = 0.0; 78 PETSC_TLS PetscLogDouble petsc_wait_any_ct_th = 0.0; 79 PETSC_TLS PetscLogDouble petsc_wait_all_ct_th = 0.0; 80 PETSC_TLS PetscLogDouble petsc_sum_of_waits_ct_th = 0.0; 81 PETSC_TLS PetscLogDouble petsc_allreduce_ct_th = 0.0; 82 PETSC_TLS PetscLogDouble petsc_gather_ct_th = 0.0; 83 PETSC_TLS PetscLogDouble petsc_scatter_ct_th = 0.0; 84 85 #if defined(PETSC_HAVE_DEVICE) 86 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 87 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 88 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 89 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 90 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 91 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 92 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 93 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 94 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 95 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 96 97 PETSC_TLS PetscLogDouble petsc_ctog_ct_th = 0.0; 98 PETSC_TLS PetscLogDouble petsc_gtoc_ct_th = 0.0; 99 PETSC_TLS PetscLogDouble petsc_ctog_sz_th = 0.0; 100 PETSC_TLS PetscLogDouble petsc_gtoc_sz_th = 0.0; 101 PETSC_TLS PetscLogDouble petsc_ctog_ct_scalar_th = 0.0; 102 PETSC_TLS PetscLogDouble petsc_gtoc_ct_scalar_th = 0.0; 103 PETSC_TLS PetscLogDouble petsc_ctog_sz_scalar_th = 0.0; 104 PETSC_TLS PetscLogDouble petsc_gtoc_sz_scalar_th = 0.0; 105 PETSC_TLS PetscLogDouble petsc_gflops_th = 0.0; 106 PETSC_TLS PetscLogDouble petsc_gtime_th = 0.0; 107 #endif 108 109 #if defined(PETSC_HAVE_THREADSAFETY) 110 PetscErrorCode PetscAddLogDouble(PetscLogDouble *tot, PetscLogDouble *tot_th, PetscLogDouble tmp) 111 { 112 *tot_th += tmp; 113 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 114 *tot += tmp; 115 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 116 return PETSC_SUCCESS; 117 } 118 119 PetscErrorCode PetscAddLogDoubleCnt(PetscLogDouble *cnt, PetscLogDouble *tot, PetscLogDouble *cnt_th, PetscLogDouble *tot_th, PetscLogDouble tmp) 120 { 121 *cnt_th = *cnt_th + 1; 122 *tot_th += tmp; 123 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 124 *tot += (PetscLogDouble)(tmp); 125 *cnt += *cnt + 1; 126 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 127 return PETSC_SUCCESS; 128 } 129 130 PetscInt PetscLogGetTid(void) 131 { 132 if (petsc_log_tid < 0) { 133 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 134 petsc_log_tid = ++petsc_log_gid; 135 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 136 } 137 return petsc_log_tid; 138 } 139 140 #endif 141 142 /* Logging functions */ 143 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 144 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 145 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 146 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 147 148 /* Tracing event logging variables */ 149 FILE *petsc_tracefile = NULL; 150 int petsc_tracelevel = 0; 151 const char *petsc_traceblanks = " "; 152 char petsc_tracespace[128] = " "; 153 PetscLogDouble petsc_tracetime = 0.0; 154 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 155 156 static PetscIntStack current_log_event_stack = NULL; 157 158 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 159 { 160 int stage; 161 PetscBool opt; 162 163 PetscFunctionBegin; 164 if (PetscLogInitializeCalled) PetscFunctionReturn(PETSC_SUCCESS); 165 PetscLogInitializeCalled = PETSC_TRUE; 166 167 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 168 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 169 if (opt) petsc_logActions = PETSC_FALSE; 170 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 171 if (opt) petsc_logObjects = PETSC_FALSE; 172 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 173 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 174 PetscLogPHC = PetscLogObjCreateDefault; 175 PetscLogPHD = PetscLogObjDestroyDefault; 176 /* Setup default logging structures */ 177 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 178 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 179 180 PetscCall(PetscSpinlockCreate(&PetscLogSpinLock)); 181 #if defined(PETSC_HAVE_THREADSAFETY) 182 petsc_log_tid = 0; 183 petsc_log_gid = 0; 184 PetscCall(PetscHMapEventCreate(&eventInfoMap_th)); 185 #endif 186 187 /* All processors sync here for more consistent logging */ 188 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 189 PetscCall(PetscTime(&petsc_BaseTime)); 190 PetscCall(PetscLogStagePush(stage)); 191 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 192 PetscStackCallExternalVoid("ps_initialize_", ps_initialize_()); 193 #endif 194 PetscFunctionReturn(PETSC_SUCCESS); 195 } 196 197 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 198 { 199 PetscStageLog stageLog; 200 201 PetscFunctionBegin; 202 #if defined(PETSC_HAVE_THREADSAFETY) 203 if (eventInfoMap_th) { 204 PetscEventPerfInfo **array; 205 PetscInt n, off = 0; 206 207 PetscCall(PetscHMapEventGetSize(eventInfoMap_th, &n)); 208 PetscCall(PetscMalloc1(n, &array)); 209 PetscCall(PetscHMapEventGetVals(eventInfoMap_th, &off, array)); 210 for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i])); 211 PetscCall(PetscFree(array)); 212 PetscCall(PetscHMapEventDestroy(&eventInfoMap_th)); 213 } 214 #endif 215 PetscCall(PetscFree(petsc_actions)); 216 PetscCall(PetscFree(petsc_objects)); 217 PetscCall(PetscLogNestedEnd()); 218 PetscCall(PetscLogSet(NULL, NULL)); 219 220 /* Resetting phase */ 221 PetscCall(PetscLogGetStageLog(&stageLog)); 222 PetscCall(PetscStageLogDestroy(stageLog)); 223 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 224 current_log_event_stack = NULL; 225 226 petsc_TotalFlops = 0.0; 227 petsc_numActions = 0; 228 petsc_numObjects = 0; 229 petsc_numObjectsDestroyed = 0; 230 petsc_maxActions = 100; 231 petsc_maxObjects = 100; 232 petsc_actions = NULL; 233 petsc_objects = NULL; 234 petsc_logActions = PETSC_FALSE; 235 petsc_logObjects = PETSC_FALSE; 236 petsc_BaseTime = 0.0; 237 petsc_TotalFlops = 0.0; 238 petsc_send_ct = 0.0; 239 petsc_recv_ct = 0.0; 240 petsc_send_len = 0.0; 241 petsc_recv_len = 0.0; 242 petsc_isend_ct = 0.0; 243 petsc_irecv_ct = 0.0; 244 petsc_isend_len = 0.0; 245 petsc_irecv_len = 0.0; 246 petsc_wait_ct = 0.0; 247 petsc_wait_any_ct = 0.0; 248 petsc_wait_all_ct = 0.0; 249 petsc_sum_of_waits_ct = 0.0; 250 petsc_allreduce_ct = 0.0; 251 petsc_gather_ct = 0.0; 252 petsc_scatter_ct = 0.0; 253 petsc_TotalFlops_th = 0.0; 254 petsc_send_ct_th = 0.0; 255 petsc_recv_ct_th = 0.0; 256 petsc_send_len_th = 0.0; 257 petsc_recv_len_th = 0.0; 258 petsc_isend_ct_th = 0.0; 259 petsc_irecv_ct_th = 0.0; 260 petsc_isend_len_th = 0.0; 261 petsc_irecv_len_th = 0.0; 262 petsc_wait_ct_th = 0.0; 263 petsc_wait_any_ct_th = 0.0; 264 petsc_wait_all_ct_th = 0.0; 265 petsc_sum_of_waits_ct_th = 0.0; 266 petsc_allreduce_ct_th = 0.0; 267 petsc_gather_ct_th = 0.0; 268 petsc_scatter_ct_th = 0.0; 269 270 #if defined(PETSC_HAVE_DEVICE) 271 petsc_ctog_ct = 0.0; 272 petsc_gtoc_ct = 0.0; 273 petsc_ctog_sz = 0.0; 274 petsc_gtoc_sz = 0.0; 275 petsc_gflops = 0.0; 276 petsc_gtime = 0.0; 277 petsc_ctog_ct_th = 0.0; 278 petsc_gtoc_ct_th = 0.0; 279 petsc_ctog_sz_th = 0.0; 280 petsc_gtoc_sz_th = 0.0; 281 petsc_gflops_th = 0.0; 282 petsc_gtime_th = 0.0; 283 #endif 284 285 PETSC_LARGEST_EVENT = PETSC_EVENT; 286 PetscLogPHC = NULL; 287 PetscLogPHD = NULL; 288 petsc_tracefile = NULL; 289 petsc_tracelevel = 0; 290 petsc_traceblanks = " "; 291 petsc_tracespace[0] = ' '; 292 petsc_tracespace[1] = 0; 293 petsc_tracetime = 0.0; 294 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 295 PETSC_OBJECT_CLASSID = 0; 296 petsc_stageLog = NULL; 297 PetscLogInitializeCalled = PETSC_FALSE; 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 /*@C 302 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 303 304 Not Collective 305 306 Input Parameters: 307 + b - The function called at beginning of event 308 - e - The function called at end of event 309 310 Level: developer 311 312 Developer Notes: 313 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 314 315 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 316 @*/ 317 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 318 { 319 PetscFunctionBegin; 320 PetscLogPLB = b; 321 PetscLogPLE = e; 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 /*@C 326 PetscLogIsActive - Check if logging is currently in progress. 327 328 Not Collective 329 330 Output Parameter: 331 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 332 333 Level: beginner 334 335 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 336 @*/ 337 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 338 { 339 PetscFunctionBegin; 340 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 /*@C 345 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 346 rates and object creation and should not slow programs down too much. 347 This routine may be called more than once. 348 349 Logically Collective over `PETSC_COMM_WORLD` 350 351 Options Database Key: 352 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 353 screen (for code configured with --with-log=1 (which is the default)) 354 355 Usage: 356 .vb 357 PetscInitialize(...); 358 PetscLogDefaultBegin(); 359 ... code ... 360 PetscLogView(viewer); or PetscLogDump(); 361 PetscFinalize(); 362 .ve 363 364 Level: advanced 365 366 Note: 367 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 368 the logging information. 369 370 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 371 @*/ 372 PetscErrorCode PetscLogDefaultBegin(void) 373 { 374 PetscFunctionBegin; 375 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 376 PetscFunctionReturn(PETSC_SUCCESS); 377 } 378 379 /*@C 380 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 381 all events. This creates large log files and slows the program down. 382 383 Logically Collective on `PETSC_COMM_WORLD` 384 385 Options Database Key: 386 . -log_all - Prints extensive log information 387 388 Usage: 389 .vb 390 PetscInitialize(...); 391 PetscLogAllBegin(); 392 ... code ... 393 PetscLogDump(filename); 394 PetscFinalize(); 395 .ve 396 397 Level: advanced 398 399 Note: 400 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 401 intended for production runs since it logs only flop rates and object 402 creation (and shouldn't significantly slow the programs). 403 404 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 405 @*/ 406 PetscErrorCode PetscLogAllBegin(void) 407 { 408 PetscFunctionBegin; 409 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 410 PetscFunctionReturn(PETSC_SUCCESS); 411 } 412 413 /*@C 414 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 415 begins or ends, the event name is printed. 416 417 Logically Collective on `PETSC_COMM_WORLD` 418 419 Input Parameter: 420 . file - The file to print trace in (e.g. stdout) 421 422 Options Database Key: 423 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 424 425 Level: intermediate 426 427 Notes: 428 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 429 then "Event begin:" or "Event end:" followed by the event name. 430 431 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 432 to determine where a program is hanging without running in the 433 debugger. Can be used in conjunction with the -info option. 434 435 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 436 @*/ 437 PetscErrorCode PetscLogTraceBegin(FILE *file) 438 { 439 PetscFunctionBegin; 440 petsc_tracefile = file; 441 442 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 443 PetscFunctionReturn(PETSC_SUCCESS); 444 } 445 446 /*@ 447 PetscLogActions - Determines whether actions are logged for the graphical viewer. 448 449 Not Collective 450 451 Input Parameter: 452 . flag - `PETSC_TRUE` if actions are to be logged 453 454 Options Database Key: 455 . -log_exclude_actions - Turns off actions logging 456 457 Level: intermediate 458 459 Note: 460 Logging of actions continues to consume more memory as the program 461 runs. Long running programs should consider turning this feature off. 462 463 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 464 @*/ 465 PetscErrorCode PetscLogActions(PetscBool flag) 466 { 467 PetscFunctionBegin; 468 petsc_logActions = flag; 469 PetscFunctionReturn(PETSC_SUCCESS); 470 } 471 472 /*@ 473 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 474 475 Not Collective 476 477 Input Parameter: 478 . flag - `PETSC_TRUE` if objects are to be logged 479 480 Options Database Key: 481 . -log_exclude_objects - Turns off objects logging 482 483 Level: intermediate 484 485 Note: 486 Logging of objects continues to consume more memory as the program 487 runs. Long running programs should consider turning this feature off. 488 489 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 490 @*/ 491 PetscErrorCode PetscLogObjects(PetscBool flag) 492 { 493 PetscFunctionBegin; 494 petsc_logObjects = flag; 495 PetscFunctionReturn(PETSC_SUCCESS); 496 } 497 498 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 499 /*@C 500 PetscLogStageRegister - Attaches a character string name to a logging stage. 501 502 Not Collective 503 504 Input Parameter: 505 . sname - The name to associate with that stage 506 507 Output Parameter: 508 . stage - The stage number 509 510 Level: intermediate 511 512 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 513 @*/ 514 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) 515 { 516 PetscStageLog stageLog; 517 PetscLogEvent event; 518 519 PetscFunctionBegin; 520 PetscCall(PetscLogGetStageLog(&stageLog)); 521 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 522 /* Copy events already changed in the main stage, this sucks */ 523 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 524 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 525 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 526 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 527 if (perfstubs_initialized == PERFSTUBS_SUCCESS) PetscStackCallExternalVoid("ps_timer_create_", stageLog->stageInfo[*stage].timer = ps_timer_create_(sname)); 528 #endif 529 PetscFunctionReturn(PETSC_SUCCESS); 530 } 531 532 /*@C 533 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 534 535 Not Collective 536 537 Input Parameter: 538 . stage - The stage on which to log 539 540 Usage: 541 If the option -log_view is used to run the program containing the 542 following code, then 2 sets of summary data will be printed during 543 PetscFinalize(). 544 .vb 545 PetscInitialize(int *argc,char ***args,0,0); 546 [stage 0 of code] 547 PetscLogStagePush(1); 548 [stage 1 of code] 549 PetscLogStagePop(); 550 PetscBarrier(...); 551 [more stage 0 of code] 552 PetscFinalize(); 553 .ve 554 555 Level: intermediate 556 557 Note: 558 Use `PetscLogStageRegister()` to register a stage. 559 560 .seealso: [](ch_profiling), `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 561 @*/ 562 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 563 { 564 PetscStageLog stageLog; 565 566 PetscFunctionBegin; 567 PetscCall(PetscLogGetStageLog(&stageLog)); 568 PetscCall(PetscStageLogPush(stageLog, stage)); 569 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 570 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stage].timer != NULL) PetscStackCallExternalVoid("ps_timer_start_", ps_timer_start_(stageLog->stageInfo[stage].timer)); 571 #endif 572 PetscFunctionReturn(PETSC_SUCCESS); 573 } 574 575 /*@C 576 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 577 578 Not Collective 579 580 Usage: 581 If the option -log_view is used to run the program containing the 582 following code, then 2 sets of summary data will be printed during 583 PetscFinalize(). 584 .vb 585 PetscInitialize(int *argc,char ***args,0,0); 586 [stage 0 of code] 587 PetscLogStagePush(1); 588 [stage 1 of code] 589 PetscLogStagePop(); 590 PetscBarrier(...); 591 [more stage 0 of code] 592 PetscFinalize(); 593 .ve 594 595 Level: intermediate 596 597 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 598 @*/ 599 PetscErrorCode PetscLogStagePop(void) 600 { 601 PetscStageLog stageLog; 602 603 PetscFunctionBegin; 604 PetscCall(PetscLogGetStageLog(&stageLog)); 605 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 606 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stageLog->curStage].timer != NULL) PetscStackCallExternalVoid("ps_timer_stop_", ps_timer_stop_(stageLog->stageInfo[stageLog->curStage].timer)); 607 #endif 608 PetscCall(PetscStageLogPop(stageLog)); 609 PetscFunctionReturn(PETSC_SUCCESS); 610 } 611 612 /*@ 613 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 614 615 Not Collective 616 617 Input Parameters: 618 + stage - The stage 619 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 620 621 Level: intermediate 622 623 Note: 624 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 625 626 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 627 @*/ 628 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 629 { 630 PetscStageLog stageLog; 631 632 PetscFunctionBegin; 633 PetscCall(PetscLogGetStageLog(&stageLog)); 634 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /*@ 639 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 640 641 Not Collective 642 643 Input Parameter: 644 . stage - The stage 645 646 Output Parameter: 647 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 648 649 Level: intermediate 650 651 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 652 @*/ 653 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 654 { 655 PetscStageLog stageLog; 656 657 PetscFunctionBegin; 658 PetscCall(PetscLogGetStageLog(&stageLog)); 659 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 660 PetscFunctionReturn(PETSC_SUCCESS); 661 } 662 663 /*@ 664 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 665 666 Not Collective 667 668 Input Parameters: 669 + stage - The stage 670 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 671 672 Level: intermediate 673 674 Developer Notes: 675 What does visible mean, needs to be documented. 676 677 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 678 @*/ 679 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 680 { 681 PetscStageLog stageLog; 682 683 PetscFunctionBegin; 684 PetscCall(PetscLogGetStageLog(&stageLog)); 685 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 /*@ 690 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 691 692 Not Collective 693 694 Input Parameter: 695 . stage - The stage 696 697 Output Parameter: 698 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 699 700 Level: intermediate 701 702 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 703 @*/ 704 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 705 { 706 PetscStageLog stageLog; 707 708 PetscFunctionBegin; 709 PetscCall(PetscLogGetStageLog(&stageLog)); 710 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 711 PetscFunctionReturn(PETSC_SUCCESS); 712 } 713 714 /*@C 715 PetscLogStageGetId - Returns the stage id when given the stage name. 716 717 Not Collective 718 719 Input Parameter: 720 . name - The stage name 721 722 Output Parameter: 723 . stage - The stage, , or -1 if no stage with that name exists 724 725 Level: intermediate 726 727 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 728 @*/ 729 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 730 { 731 PetscStageLog stageLog; 732 733 PetscFunctionBegin; 734 PetscCall(PetscLogGetStageLog(&stageLog)); 735 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 736 PetscFunctionReturn(PETSC_SUCCESS); 737 } 738 739 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 740 741 /*@C 742 PetscLogEventRegister - Registers an event name for logging operations 743 744 Not Collective 745 746 Input Parameters: 747 + name - The name associated with the event 748 - classid - The classid associated to the class for this event, obtain either with 749 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 750 are only available in C code 751 752 Output Parameter: 753 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 754 755 Example of Usage: 756 .vb 757 PetscLogEvent USER_EVENT; 758 PetscClassId classid; 759 PetscLogDouble user_event_flops; 760 PetscClassIdRegister("class name",&classid); 761 PetscLogEventRegister("User event name",classid,&USER_EVENT); 762 PetscLogEventBegin(USER_EVENT,0,0,0,0); 763 [code segment to monitor] 764 PetscLogFlops(user_event_flops); 765 PetscLogEventEnd(USER_EVENT,0,0,0,0); 766 .ve 767 768 Level: intermediate 769 770 Notes: 771 PETSc automatically logs library events if the code has been 772 configured with --with-log (which is the default) and 773 -log_view or -log_all is specified. `PetscLogEventRegister()` is 774 intended for logging user events to supplement this PETSc 775 information. 776 777 PETSc can gather data for use with the utilities Jumpshot 778 (part of the MPICH distribution). If PETSc has been compiled 779 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 780 MPICH), the user can employ another command line option, -log_mpe, 781 to create a logfile, "mpe.log", which can be visualized 782 Jumpshot. 783 784 The classid is associated with each event so that classes of events 785 can be disabled simultaneously, such as all matrix events. The user 786 can either use an existing classid, such as `MAT_CLASSID`, or create 787 their own as shown in the example. 788 789 If an existing event with the same name exists, its event handle is 790 returned instead of creating a new event. 791 792 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 793 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 794 @*/ 795 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) 796 { 797 PetscStageLog stageLog; 798 int stage; 799 800 PetscFunctionBegin; 801 *event = PETSC_DECIDE; 802 PetscCall(PetscLogGetStageLog(&stageLog)); 803 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 804 if (*event > 0) PetscFunctionReturn(PETSC_SUCCESS); 805 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 806 for (stage = 0; stage < stageLog->numStages; stage++) { 807 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 808 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 809 } 810 PetscFunctionReturn(PETSC_SUCCESS); 811 } 812 813 /*@ 814 PetscLogEventSetCollective - Indicates that a particular event is collective. 815 816 Not Collective 817 818 Input Parameters: 819 + event - The event id 820 - collective - Boolean flag indicating whether a particular event is collective 821 822 Level: developer 823 824 Notes: 825 New events returned from `PetscLogEventRegister()` are collective by default. 826 827 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 828 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 829 to be performed. This option is useful to debug imbalance within the computations or communications 830 831 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 832 @*/ 833 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) 834 { 835 PetscStageLog stageLog; 836 PetscEventRegLog eventRegLog; 837 838 PetscFunctionBegin; 839 PetscCall(PetscLogGetStageLog(&stageLog)); 840 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 841 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 842 eventRegLog->eventInfo[event].collective = collective; 843 PetscFunctionReturn(PETSC_SUCCESS); 844 } 845 846 /*@ 847 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 848 849 Not Collective 850 851 Input Parameter: 852 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 853 854 Level: developer 855 856 .seealso: [](ch_profiling), `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 857 @*/ 858 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 859 { 860 PetscStageLog stageLog; 861 int stage; 862 863 PetscFunctionBegin; 864 PetscCall(PetscLogGetStageLog(&stageLog)); 865 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 866 PetscFunctionReturn(PETSC_SUCCESS); 867 } 868 869 /*@ 870 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 871 872 Not Collective 873 874 Input Parameter: 875 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 876 877 Level: developer 878 879 Note: 880 If a class is excluded then events associated with that class are not logged. 881 882 .seealso: [](ch_profiling), `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 883 @*/ 884 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 885 { 886 PetscStageLog stageLog; 887 int stage; 888 889 PetscFunctionBegin; 890 PetscCall(PetscLogGetStageLog(&stageLog)); 891 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 892 PetscFunctionReturn(PETSC_SUCCESS); 893 } 894 895 /*@ 896 PetscLogEventActivate - Indicates that a particular event should be logged. 897 898 Not Collective 899 900 Input Parameter: 901 . event - The event id 902 903 Usage: 904 .vb 905 PetscLogEventDeactivate(VEC_SetValues); 906 [code where you do not want to log VecSetValues()] 907 PetscLogEventActivate(VEC_SetValues); 908 [code where you do want to log VecSetValues()] 909 .ve 910 911 Level: advanced 912 913 Note: 914 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 915 or an event number obtained with `PetscLogEventRegister()`. 916 917 .seealso: [](ch_profiling), `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 918 @*/ 919 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 920 { 921 PetscStageLog stageLog; 922 int stage; 923 924 PetscFunctionBegin; 925 PetscCall(PetscLogGetStageLog(&stageLog)); 926 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 927 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 928 PetscFunctionReturn(PETSC_SUCCESS); 929 } 930 931 /*@ 932 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 933 934 Not Collective 935 936 Input Parameter: 937 . event - The event id 938 939 Usage: 940 .vb 941 PetscLogEventDeactivate(VEC_SetValues); 942 [code where you do not want to log VecSetValues()] 943 PetscLogEventActivate(VEC_SetValues); 944 [code where you do want to log VecSetValues()] 945 .ve 946 947 Level: advanced 948 949 Note: 950 The event may be either a pre-defined PETSc event (found in 951 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 952 953 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 954 @*/ 955 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 956 { 957 PetscStageLog stageLog; 958 int stage; 959 960 PetscFunctionBegin; 961 PetscCall(PetscLogGetStageLog(&stageLog)); 962 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 963 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 964 PetscFunctionReturn(PETSC_SUCCESS); 965 } 966 967 /*@ 968 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 969 970 Not Collective 971 972 Input Parameter: 973 . event - The event id 974 975 Usage: 976 .vb 977 PetscLogEventDeactivatePush(VEC_SetValues); 978 [code where you do not want to log VecSetValues()] 979 PetscLogEventDeactivatePop(VEC_SetValues); 980 [code where you do want to log VecSetValues()] 981 .ve 982 983 Level: advanced 984 985 Note: 986 The event may be either a pre-defined PETSc event (found in 987 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 988 989 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 990 @*/ 991 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 992 { 993 PetscStageLog stageLog; 994 int stage; 995 996 PetscFunctionBegin; 997 PetscCall(PetscLogGetStageLog(&stageLog)); 998 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 999 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 1000 PetscFunctionReturn(PETSC_SUCCESS); 1001 } 1002 1003 /*@ 1004 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 1005 1006 Not Collective 1007 1008 Input Parameter: 1009 . event - The event id 1010 1011 Usage: 1012 .vb 1013 PetscLogEventDeactivatePush(VEC_SetValues); 1014 [code where you do not want to log VecSetValues()] 1015 PetscLogEventDeactivatePop(VEC_SetValues); 1016 [code where you do want to log VecSetValues()] 1017 .ve 1018 1019 Level: advanced 1020 1021 Note: 1022 The event may be either a pre-defined PETSc event (found in 1023 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 1024 1025 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 1026 @*/ 1027 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 1028 { 1029 PetscStageLog stageLog; 1030 int stage; 1031 1032 PetscFunctionBegin; 1033 PetscCall(PetscLogGetStageLog(&stageLog)); 1034 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1035 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } 1038 1039 /*@ 1040 PetscLogEventSetActiveAll - Turns on logging of all events 1041 1042 Not Collective 1043 1044 Input Parameters: 1045 + event - The event id 1046 - isActive - The activity flag determining whether the event is logged 1047 1048 Level: advanced 1049 1050 .seealso: [](ch_profiling), `PlogEventActivate()`, `PlogEventDeactivate()` 1051 @*/ 1052 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 1053 { 1054 PetscStageLog stageLog; 1055 int stage; 1056 1057 PetscFunctionBegin; 1058 PetscCall(PetscLogGetStageLog(&stageLog)); 1059 for (stage = 0; stage < stageLog->numStages; stage++) { 1060 if (isActive) { 1061 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 1062 } else { 1063 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 1064 } 1065 } 1066 PetscFunctionReturn(PETSC_SUCCESS); 1067 } 1068 1069 /*@ 1070 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 1071 1072 Not Collective 1073 1074 Input Parameter: 1075 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 1076 1077 Level: developer 1078 1079 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1080 @*/ 1081 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 1082 { 1083 PetscStageLog stageLog; 1084 int stage; 1085 1086 PetscFunctionBegin; 1087 PetscCall(PetscLogGetStageLog(&stageLog)); 1088 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1089 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1090 PetscFunctionReturn(PETSC_SUCCESS); 1091 } 1092 1093 /*@ 1094 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 1095 1096 Not Collective 1097 1098 Input Parameter: 1099 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 1100 1101 Level: developer 1102 1103 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1104 @*/ 1105 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 1106 { 1107 PetscStageLog stageLog; 1108 int stage; 1109 1110 PetscFunctionBegin; 1111 PetscCall(PetscLogGetStageLog(&stageLog)); 1112 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1113 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1114 PetscFunctionReturn(PETSC_SUCCESS); 1115 } 1116 1117 /*MC 1118 PetscLogEventSync - Synchronizes the beginning of a user event. 1119 1120 Synopsis: 1121 #include <petsclog.h> 1122 PetscErrorCode PetscLogEventSync(int e,MPI_Comm comm) 1123 1124 Collective 1125 1126 Input Parameters: 1127 + e - integer associated with the event obtained from PetscLogEventRegister() 1128 - comm - an MPI communicator 1129 1130 Usage: 1131 .vb 1132 PetscLogEvent USER_EVENT; 1133 PetscLogEventRegister("User event",0,&USER_EVENT); 1134 PetscLogEventSync(USER_EVENT,PETSC_COMM_WORLD); 1135 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1136 [code segment to monitor] 1137 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1138 .ve 1139 1140 Level: developer 1141 1142 Note: 1143 This routine should be called only if there is not a 1144 `PetscObject` available to pass to `PetscLogEventBegin()`. 1145 1146 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1147 M*/ 1148 1149 /*MC 1150 PetscLogEventBegin - Logs the beginning of a user event. 1151 1152 Synopsis: 1153 #include <petsclog.h> 1154 PetscErrorCode PetscLogEventBegin(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1155 1156 Not Collective 1157 1158 Input Parameters: 1159 + e - integer associated with the event obtained from PetscLogEventRegister() 1160 - o1,o2,o3,o4 - objects associated with the event, or 0 1161 1162 Fortran Synopsis: 1163 void PetscLogEventBegin(int e,PetscErrorCode ierr) 1164 1165 Usage: 1166 .vb 1167 PetscLogEvent USER_EVENT; 1168 PetscLogDouble user_event_flops; 1169 PetscLogEventRegister("User event",0,&USER_EVENT); 1170 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1171 [code segment to monitor] 1172 PetscLogFlops(user_event_flops); 1173 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1174 .ve 1175 1176 Level: intermediate 1177 1178 Developer Note: 1179 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly handling the 1180 errors that occur in the macro directly because other packages that use this macros have used them in their 1181 own functions or methods that do not return error codes and it would be disruptive to change the current 1182 behavior. 1183 1184 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1185 M*/ 1186 1187 /*MC 1188 PetscLogEventEnd - Log the end of a user event. 1189 1190 Synopsis: 1191 #include <petsclog.h> 1192 PetscErrorCode PetscLogEventEnd(int e,PetscObject o1,PetscObject o2,PetscObject o3,PetscObject o4) 1193 1194 Not Collective 1195 1196 Input Parameters: 1197 + e - integer associated with the event obtained with PetscLogEventRegister() 1198 - o1,o2,o3,o4 - objects associated with the event, or 0 1199 1200 Fortran Synopsis: 1201 void PetscLogEventEnd(int e,PetscErrorCode ierr) 1202 1203 Usage: 1204 .vb 1205 PetscLogEvent USER_EVENT; 1206 PetscLogDouble user_event_flops; 1207 PetscLogEventRegister("User event",0,&USER_EVENT,); 1208 PetscLogEventBegin(USER_EVENT,0,0,0,0); 1209 [code segment to monitor] 1210 PetscLogFlops(user_event_flops); 1211 PetscLogEventEnd(USER_EVENT,0,0,0,0); 1212 .ve 1213 1214 Level: intermediate 1215 1216 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1217 M*/ 1218 1219 /*@C 1220 PetscLogEventGetId - Returns the event id when given the event name. 1221 1222 Not Collective 1223 1224 Input Parameter: 1225 . name - The event name 1226 1227 Output Parameter: 1228 . event - The event, or -1 if no event with that name exists 1229 1230 Level: intermediate 1231 1232 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1233 @*/ 1234 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1235 { 1236 PetscStageLog stageLog; 1237 1238 PetscFunctionBegin; 1239 PetscCall(PetscLogGetStageLog(&stageLog)); 1240 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) 1245 { 1246 PetscFunctionBegin; 1247 if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1248 PetscFunctionReturn(PETSC_SUCCESS); 1249 } 1250 1251 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) 1252 { 1253 PetscFunctionBegin; 1254 if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1255 PetscFunctionReturn(PETSC_SUCCESS); 1256 } 1257 1258 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) 1259 { 1260 PetscBool empty; 1261 1262 PetscFunctionBegin; 1263 PetscValidIntPointer(event, 1); 1264 *event = PETSC_DECIDE; 1265 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1266 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1267 PetscFunctionReturn(PETSC_SUCCESS); 1268 } 1269 1270 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) 1271 { 1272 PetscFunctionBegin; 1273 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) 1278 { 1279 PetscStageLog stageLog; 1280 PetscEventPerfLog eventLog; 1281 int stage; 1282 1283 PetscFunctionBegin; 1284 if (event == PETSC_DECIDE) PetscFunctionReturn(PETSC_SUCCESS); 1285 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1286 PetscCall(PetscLogGetStageLog(&stageLog)); 1287 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1288 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1289 eventLog->eventInfo[event].count--; 1290 PetscFunctionReturn(PETSC_SUCCESS); 1291 } 1292 1293 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1294 /*@C 1295 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1296 be read by bin/petscview. This program no longer exists. 1297 1298 Collective on `PETSC_COMM_WORLD` 1299 1300 Input Parameter: 1301 . sname - an optional file name 1302 1303 Usage: 1304 .vb 1305 PetscInitialize(...); 1306 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1307 ... code ... 1308 PetscLogDump(filename); 1309 PetscFinalize(); 1310 .ve 1311 1312 Level: advanced 1313 1314 Note: 1315 The default file name is Log.<rank> where <rank> is the MPI process rank. If no name is specified, 1316 this file will be used. 1317 1318 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1319 @*/ 1320 PetscErrorCode PetscLogDump(const char sname[]) 1321 { 1322 PetscStageLog stageLog; 1323 PetscEventPerfInfo *eventInfo; 1324 FILE *fd; 1325 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1326 PetscLogDouble flops, _TotalTime; 1327 PetscMPIInt rank; 1328 int action, object, curStage; 1329 PetscLogEvent event; 1330 1331 PetscFunctionBegin; 1332 /* Calculate the total elapsed time */ 1333 PetscCall(PetscTime(&_TotalTime)); 1334 _TotalTime -= petsc_BaseTime; 1335 /* Open log file */ 1336 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1337 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 1338 PetscCall(PetscFixFilename(file, fname)); 1339 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1340 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1341 /* Output totals */ 1342 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1343 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1344 /* Output actions */ 1345 if (petsc_logActions) { 1346 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1347 for (action = 0; action < petsc_numActions; action++) { 1348 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1349 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1350 } 1351 } 1352 /* Output objects */ 1353 if (petsc_logObjects) { 1354 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1355 for (object = 0; object < petsc_numObjects; object++) { 1356 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1357 if (!petsc_objects[object].name[0]) { 1358 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1359 } else { 1360 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1361 } 1362 if (petsc_objects[object].info[0] != 0) { 1363 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1364 } else { 1365 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1366 } 1367 } 1368 } 1369 /* Output events */ 1370 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1371 PetscCall(PetscLogGetStageLog(&stageLog)); 1372 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1373 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1374 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1375 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1376 else flops = 0.0; 1377 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1378 } 1379 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 /* 1384 PetscLogView_Detailed - Each process prints the times for its own events 1385 1386 */ 1387 PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1388 { 1389 PetscStageLog stageLog; 1390 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1391 PetscLogDouble locTotalTime, numRed, maxMem; 1392 int numStages, numEvents, stage, event; 1393 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1394 PetscMPIInt rank, size; 1395 1396 PetscFunctionBegin; 1397 PetscCallMPI(MPI_Comm_size(comm, &size)); 1398 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1399 /* Must preserve reduction count before we go on */ 1400 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1401 /* Get the total elapsed time */ 1402 PetscCall(PetscTime(&locTotalTime)); 1403 locTotalTime -= petsc_BaseTime; 1404 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1405 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1406 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1407 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1408 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1409 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1410 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1411 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1412 PetscCall(PetscLogGetStageLog(&stageLog)); 1413 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1414 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1415 for (stage = 0; stage < numStages; stage++) { 1416 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1417 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1418 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1419 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1420 } 1421 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1422 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1423 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1424 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1425 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1426 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1427 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1428 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1429 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1430 PetscCall(PetscViewerFlush(viewer)); 1431 for (stage = 0; stage < numStages; stage++) { 1432 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1433 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1434 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1435 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1436 for (event = 0; event < numEvents; event++) { 1437 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1438 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1439 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1440 eventInfo->flops)); 1441 if (eventInfo->dof[0] >= 0.) { 1442 PetscInt d, e; 1443 1444 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1445 for (d = 0; d < 8; ++d) { 1446 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1447 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1448 } 1449 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1450 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1451 for (e = 0; e < 8; ++e) { 1452 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1453 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1454 } 1455 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1456 } 1457 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1458 } 1459 } 1460 PetscCall(PetscViewerFlush(viewer)); 1461 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1462 PetscFunctionReturn(PETSC_SUCCESS); 1463 } 1464 1465 /* 1466 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1467 */ 1468 PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1469 { 1470 PetscStageLog stageLog; 1471 PetscEventPerfInfo *eventInfo = NULL; 1472 PetscLogDouble locTotalTime, maxMem; 1473 int numStages, numEvents, stage, event; 1474 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1475 PetscMPIInt rank, size; 1476 1477 PetscFunctionBegin; 1478 PetscCallMPI(MPI_Comm_size(comm, &size)); 1479 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1480 /* Must preserve reduction count before we go on */ 1481 /* Get the total elapsed time */ 1482 PetscCall(PetscTime(&locTotalTime)); 1483 locTotalTime -= petsc_BaseTime; 1484 PetscCall(PetscLogGetStageLog(&stageLog)); 1485 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1486 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1487 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1488 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1489 PetscCall(PetscViewerFlush(viewer)); 1490 for (stage = 0; stage < numStages; stage++) { 1491 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1492 1493 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1494 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1495 for (event = 0; event < numEvents; event++) { 1496 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1497 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1498 eventInfo->numReductions, eventInfo->flops)); 1499 if (eventInfo->dof[0] >= 0.) { 1500 PetscInt d, e; 1501 1502 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1503 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1504 } 1505 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1506 } 1507 } 1508 PetscCall(PetscViewerFlush(viewer)); 1509 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1510 PetscFunctionReturn(PETSC_SUCCESS); 1511 } 1512 1513 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1514 { 1515 PetscFunctionBegin; 1516 if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS); 1517 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1518 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1519 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1520 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1521 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1522 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1523 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1524 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1525 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1526 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1527 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1528 PetscFunctionReturn(PETSC_SUCCESS); 1529 } 1530 1531 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1532 { 1533 PetscFunctionBegin; 1534 if (PetscDefined(USE_DEBUG)) { 1535 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1536 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1537 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1538 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1539 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1540 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1541 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1542 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1543 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1544 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1545 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1546 } 1547 PetscFunctionReturn(PETSC_SUCCESS); 1548 } 1549 1550 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1551 { 1552 #if defined(PETSC_HAVE_DEVICE) 1553 PetscMPIInt size; 1554 PetscBool deviceInitialized = PETSC_FALSE; 1555 1556 PetscFunctionBegin; 1557 PetscCallMPI(MPI_Comm_size(comm, &size)); 1558 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1559 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1560 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1561 deviceInitialized = PETSC_TRUE; 1562 break; 1563 } 1564 } 1565 /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1566 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS); 1567 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1568 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1569 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1570 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1571 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1572 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1573 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1574 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1575 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1576 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1577 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1578 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1579 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1580 PetscFunctionReturn(PETSC_SUCCESS); 1581 #else 1582 return PETSC_SUCCESS; 1583 #endif 1584 } 1585 1586 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1587 { 1588 #if defined(PETSC_HAVE_DEVICE) 1589 1590 PetscFunctionBegin; 1591 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS); 1592 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1593 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1594 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1595 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1596 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1597 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1598 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1599 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1600 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1601 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1602 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1603 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1604 PetscFunctionReturn(PETSC_SUCCESS); 1605 #else 1606 return PETSC_SUCCESS; 1607 #endif 1608 } 1609 1610 PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1611 { 1612 FILE *fd; 1613 PetscLogDouble zero = 0.0; 1614 PetscStageLog stageLog; 1615 PetscStageInfo *stageInfo = NULL; 1616 PetscEventPerfInfo *eventInfo = NULL; 1617 PetscClassPerfInfo *classInfo; 1618 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1619 const char *name; 1620 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1621 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1622 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1623 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1624 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1625 PetscLogDouble min, max, tot, ratio, avg, x, y; 1626 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1627 #if defined(PETSC_HAVE_DEVICE) 1628 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1629 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1630 #endif 1631 PetscMPIInt minC, maxC; 1632 PetscMPIInt size, rank; 1633 PetscBool *localStageUsed, *stageUsed; 1634 PetscBool *localStageVisible, *stageVisible; 1635 int numStages, localNumEvents, numEvents; 1636 int stage, oclass; 1637 PetscLogEvent event; 1638 char version[256]; 1639 MPI_Comm comm; 1640 #if defined(PETSC_HAVE_DEVICE) 1641 PetscLogEvent eventid; 1642 PetscInt64 nas = 0x7FF0000000000002; 1643 #endif 1644 1645 PetscFunctionBegin; 1646 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1647 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1648 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1649 PetscCallMPI(MPI_Comm_size(comm, &size)); 1650 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1651 /* Get the total elapsed time */ 1652 PetscCall(PetscTime(&locTotalTime)); 1653 locTotalTime -= petsc_BaseTime; 1654 1655 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1656 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1657 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1658 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1659 PetscCall(PetscLogViewWarnSync(comm, fd)); 1660 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1661 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1662 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1663 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1664 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1665 PetscCall(PetscGetUserName(username, sizeof(username))); 1666 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1667 PetscCall(PetscGetDate(date, sizeof(date))); 1668 PetscCall(PetscGetVersion(version, sizeof(version))); 1669 if (size == 1) { 1670 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1671 } else { 1672 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1673 } 1674 #if defined(PETSC_HAVE_OPENMP) 1675 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1676 #endif 1677 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1678 1679 /* Must preserve reduction count before we go on */ 1680 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1681 1682 /* Calculate summary information */ 1683 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1684 /* Time */ 1685 PetscCall(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1686 PetscCall(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1687 PetscCall(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1688 avg = tot / ((PetscLogDouble)size); 1689 if (min != 0.0) ratio = max / min; 1690 else ratio = 0.0; 1691 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1692 TotalTime = tot; 1693 /* Objects */ 1694 avg = (PetscLogDouble)petsc_numObjects; 1695 PetscCall(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1696 PetscCall(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1697 PetscCall(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1698 avg = tot / ((PetscLogDouble)size); 1699 if (min != 0.0) ratio = max / min; 1700 else ratio = 0.0; 1701 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1702 /* Flops */ 1703 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1704 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1705 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1706 avg = tot / ((PetscLogDouble)size); 1707 if (min != 0.0) ratio = max / min; 1708 else ratio = 0.0; 1709 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1710 TotalFlops = tot; 1711 /* Flops/sec -- Must talk to Barry here */ 1712 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1713 else flops = 0.0; 1714 PetscCall(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1715 PetscCall(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1716 PetscCall(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1717 avg = tot / ((PetscLogDouble)size); 1718 if (min != 0.0) ratio = max / min; 1719 else ratio = 0.0; 1720 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1721 /* Memory */ 1722 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1723 if (mem > 0.0) { 1724 PetscCall(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1725 PetscCall(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1726 PetscCall(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1727 avg = tot / ((PetscLogDouble)size); 1728 if (min != 0.0) ratio = max / min; 1729 else ratio = 0.0; 1730 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1731 } 1732 /* Messages */ 1733 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1734 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1735 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1736 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1737 avg = tot / ((PetscLogDouble)size); 1738 if (min != 0.0) ratio = max / min; 1739 else ratio = 0.0; 1740 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1741 numMessages = tot; 1742 /* Message Lengths */ 1743 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1744 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1745 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1746 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1747 if (numMessages != 0) avg = tot / numMessages; 1748 else avg = 0.0; 1749 if (min != 0.0) ratio = max / min; 1750 else ratio = 0.0; 1751 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1752 messageLength = tot; 1753 /* Reductions */ 1754 PetscCall(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1755 PetscCall(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1756 PetscCall(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1757 if (min != 0.0) ratio = max / min; 1758 else ratio = 0.0; 1759 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1760 numReductions = red; /* wrong because uses count from process zero */ 1761 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1762 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1763 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1764 1765 /* Get total number of stages -- 1766 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1767 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1768 This seems best accomplished by assoicating a communicator with each stage. 1769 */ 1770 PetscCall(PetscLogGetStageLog(&stageLog)); 1771 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1772 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1773 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1774 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1775 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1776 if (numStages > 0) { 1777 stageInfo = stageLog->stageInfo; 1778 for (stage = 0; stage < numStages; stage++) { 1779 if (stage < stageLog->numStages) { 1780 localStageUsed[stage] = stageInfo[stage].used; 1781 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1782 } else { 1783 localStageUsed[stage] = PETSC_FALSE; 1784 localStageVisible[stage] = PETSC_TRUE; 1785 } 1786 } 1787 PetscCall(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1788 PetscCall(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1789 for (stage = 0; stage < numStages; stage++) { 1790 if (stageUsed[stage]) { 1791 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1792 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1793 break; 1794 } 1795 } 1796 for (stage = 0; stage < numStages; stage++) { 1797 if (!stageUsed[stage]) continue; 1798 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1799 if (localStageUsed[stage]) { 1800 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1801 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1802 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1803 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1804 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1805 name = stageInfo[stage].name; 1806 } else { 1807 PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1808 PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1809 PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1810 PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1811 PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1812 name = ""; 1813 } 1814 mess *= 0.5; 1815 messLen *= 0.5; 1816 red /= size; 1817 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1818 else fracTime = 0.0; 1819 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1820 else fracFlops = 0.0; 1821 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1822 if (numMessages != 0.0) fracMessages = mess / numMessages; 1823 else fracMessages = 0.0; 1824 if (mess != 0.0) avgMessLen = messLen / mess; 1825 else avgMessLen = 0.0; 1826 if (messageLength != 0.0) fracLength = messLen / messageLength; 1827 else fracLength = 0.0; 1828 if (numReductions != 0.0) fracReductions = red / numReductions; 1829 else fracReductions = 0.0; 1830 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1831 } 1832 } 1833 1834 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1835 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1836 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1837 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1838 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1839 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1840 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1841 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1842 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1843 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1844 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1845 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1846 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1847 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1848 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1849 if (PetscLogMemory) { 1850 PetscCall(PetscFPrintf(comm, fd, " Memory usage is summed over all MPI processes, it is given in mega-bytes\n")); 1851 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1852 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1853 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1854 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1855 } 1856 #if defined(PETSC_HAVE_DEVICE) 1857 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1858 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1859 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1860 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1861 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1862 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1863 #endif 1864 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1865 1866 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1867 1868 /* Report events */ 1869 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1870 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1871 #if defined(PETSC_HAVE_DEVICE) 1872 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1873 #endif 1874 PetscCall(PetscFPrintf(comm, fd, "\n")); 1875 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1876 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1877 #if defined(PETSC_HAVE_DEVICE) 1878 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1879 #endif 1880 PetscCall(PetscFPrintf(comm, fd, "\n")); 1881 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1882 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1883 #if defined(PETSC_HAVE_DEVICE) 1884 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1885 #endif 1886 PetscCall(PetscFPrintf(comm, fd, "\n")); 1887 1888 #if defined(PETSC_HAVE_DEVICE) 1889 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1890 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1891 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1892 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1893 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1894 #endif 1895 1896 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1897 for (stage = 0; stage < numStages; stage++) { 1898 if (!stageVisible[stage]) continue; 1899 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1900 if (localStageUsed[stage]) { 1901 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1902 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1903 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1904 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1905 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1906 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1907 } else { 1908 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1909 PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1910 PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1911 PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1912 PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1913 PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1914 } 1915 mess *= 0.5; 1916 messLen *= 0.5; 1917 red /= size; 1918 1919 /* Get total number of events in this stage -- 1920 Currently, a single processor can register more events than another, but events must all be registered in order, 1921 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1922 on the event ID. This seems best accomplished by associating a communicator with each stage. 1923 1924 Problem: If the event did not happen on proc 1, its name will not be available. 1925 Problem: Event visibility is not implemented 1926 */ 1927 if (localStageUsed[stage]) { 1928 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1929 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1930 } else localNumEvents = 0; 1931 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1932 for (event = 0; event < numEvents; event++) { 1933 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1934 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1935 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1936 else flopr = 0.0; 1937 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1938 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1939 PetscCall(MPIU_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1940 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1941 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1942 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1943 PetscCall(MPIU_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1944 PetscCall(MPIU_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1945 PetscCall(MPIU_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1946 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1947 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1948 if (PetscLogMemory) { 1949 PetscCall(MPIU_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1950 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1951 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1952 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1953 } 1954 #if defined(PETSC_HAVE_DEVICE) 1955 PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1956 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1957 PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1958 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1959 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1960 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1961 #endif 1962 name = stageLog->eventLog->eventInfo[event].name; 1963 } else { 1964 int ierr = 0; 1965 1966 flopr = 0.0; 1967 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1968 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1969 PetscCall(MPIU_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1970 PetscCall(MPIU_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1971 PetscCall(MPIU_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1972 PetscCall(MPIU_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1973 PetscCall(MPIU_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1974 PetscCall(MPIU_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1975 PetscCall(MPIU_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1976 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 1977 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1978 if (PetscLogMemory) { 1979 PetscCall(MPIU_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1980 PetscCall(MPIU_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1981 PetscCall(MPIU_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1982 PetscCall(MPIU_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1983 } 1984 #if defined(PETSC_HAVE_DEVICE) 1985 PetscCall(MPIU_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1986 PetscCall(MPIU_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1987 PetscCall(MPIU_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1988 PetscCall(MPIU_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1989 PetscCall(MPIU_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1990 PetscCall(MPIU_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1991 #endif 1992 name = ""; 1993 } 1994 if (mint < 0.0) { 1995 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 1996 mint = 0; 1997 } 1998 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 1999 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 2000 #if defined(PETSC_HAVE_DEVICE) 2001 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 2002 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 2003 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 2004 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 2005 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 2006 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 2007 } 2008 } 2009 #endif 2010 totm *= 0.5; 2011 totml *= 0.5; 2012 totr /= size; 2013 2014 if (maxC != 0) { 2015 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 2016 else ratC = 0.0; 2017 if (mint != 0.0) ratt = maxt / mint; 2018 else ratt = 0.0; 2019 if (minf != 0.0) ratf = maxf / minf; 2020 else ratf = 0.0; 2021 if (TotalTime != 0.0) fracTime = tott / TotalTime; 2022 else fracTime = 0.0; 2023 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 2024 else fracFlops = 0.0; 2025 if (stageTime != 0.0) fracStageTime = tott / stageTime; 2026 else fracStageTime = 0.0; 2027 if (flops != 0.0) fracStageFlops = totf / flops; 2028 else fracStageFlops = 0.0; 2029 if (numMessages != 0.0) fracMess = totm / numMessages; 2030 else fracMess = 0.0; 2031 if (messageLength != 0.0) fracMessLen = totml / messageLength; 2032 else fracMessLen = 0.0; 2033 if (numReductions != 0.0) fracRed = totr / numReductions; 2034 else fracRed = 0.0; 2035 if (mess != 0.0) fracStageMess = totm / mess; 2036 else fracStageMess = 0.0; 2037 if (messLen != 0.0) fracStageMessLen = totml / messLen; 2038 else fracStageMessLen = 0.0; 2039 if (red != 0.0) fracStageRed = totr / red; 2040 else fracStageRed = 0.0; 2041 if (totm != 0.0) totml /= totm; 2042 else totml = 0.0; 2043 if (maxt != 0.0) flopr = totf / maxt; 2044 else flopr = 0.0; 2045 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 2046 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 2047 else 2048 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 2049 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 2050 #if defined(PETSC_HAVE_DEVICE) 2051 if (totf != 0.0) fracgflops = gflops / totf; 2052 else fracgflops = 0.0; 2053 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 2054 else gflopr = 0.0; 2055 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 2056 #endif 2057 PetscCall(PetscFPrintf(comm, fd, "\n")); 2058 } 2059 } 2060 } 2061 2062 /* Memory usage and object creation */ 2063 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 2064 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 2065 #if defined(PETSC_HAVE_DEVICE) 2066 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 2067 #endif 2068 PetscCall(PetscFPrintf(comm, fd, "\n")); 2069 PetscCall(PetscFPrintf(comm, fd, "\n")); 2070 2071 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 2072 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 2073 stats for stages local to processor sets. 2074 */ 2075 /* We should figure out the longest object name here (now 20 characters) */ 2076 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 2077 for (stage = 0; stage < numStages; stage++) { 2078 if (localStageUsed[stage]) { 2079 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 2080 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 2081 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 2082 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 2083 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 2084 } 2085 } 2086 } else { 2087 if (!localStageVisible[stage]) continue; 2088 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 2089 } 2090 } 2091 2092 PetscCall(PetscFree(localStageUsed)); 2093 PetscCall(PetscFree(stageUsed)); 2094 PetscCall(PetscFree(localStageVisible)); 2095 PetscCall(PetscFree(stageVisible)); 2096 2097 /* Information unrelated to this particular run */ 2098 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 2099 PetscCall(PetscTime(&y)); 2100 PetscCall(PetscTime(&x)); 2101 PetscCall(PetscTime(&y)); 2102 PetscCall(PetscTime(&y)); 2103 PetscCall(PetscTime(&y)); 2104 PetscCall(PetscTime(&y)); 2105 PetscCall(PetscTime(&y)); 2106 PetscCall(PetscTime(&y)); 2107 PetscCall(PetscTime(&y)); 2108 PetscCall(PetscTime(&y)); 2109 PetscCall(PetscTime(&y)); 2110 PetscCall(PetscTime(&y)); 2111 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 2112 /* MPI information */ 2113 if (size > 1) { 2114 MPI_Status status; 2115 PetscMPIInt tag; 2116 MPI_Comm newcomm; 2117 2118 PetscCallMPI(MPI_Barrier(comm)); 2119 PetscCall(PetscTime(&x)); 2120 PetscCallMPI(MPI_Barrier(comm)); 2121 PetscCallMPI(MPI_Barrier(comm)); 2122 PetscCallMPI(MPI_Barrier(comm)); 2123 PetscCallMPI(MPI_Barrier(comm)); 2124 PetscCallMPI(MPI_Barrier(comm)); 2125 PetscCall(PetscTime(&y)); 2126 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 2127 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 2128 PetscCallMPI(MPI_Barrier(comm)); 2129 if (rank) { 2130 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 2131 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 2132 } else { 2133 PetscCall(PetscTime(&x)); 2134 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 2135 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 2136 PetscCall(PetscTime(&y)); 2137 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 2138 } 2139 PetscCall(PetscCommDestroy(&newcomm)); 2140 } 2141 PetscCall(PetscOptionsView(NULL, viewer)); 2142 2143 /* Machine and compile information */ 2144 #if defined(PETSC_USE_FORTRAN_KERNELS) 2145 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 2146 #else 2147 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 2148 #endif 2149 #if defined(PETSC_USE_64BIT_INDICES) 2150 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64-bit PetscInt\n")); 2151 #elif defined(PETSC_USE___FLOAT128) 2152 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32-bit PetscInt\n")); 2153 #endif 2154 #if defined(PETSC_USE_REAL_SINGLE) 2155 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2156 #elif defined(PETSC_USE___FLOAT128) 2157 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2158 #endif 2159 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2160 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2161 #else 2162 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2163 #endif 2164 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 2165 2166 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 2167 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2168 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2169 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2170 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2171 2172 /* Cleanup */ 2173 PetscCall(PetscFPrintf(comm, fd, "\n")); 2174 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 2175 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 2176 PetscCall(PetscFPTrapPop()); 2177 PetscFunctionReturn(PETSC_SUCCESS); 2178 } 2179 2180 /*@C 2181 PetscLogView - Prints a summary of the logging. 2182 2183 Collective over MPI_Comm 2184 2185 Input Parameter: 2186 . viewer - an ASCII viewer 2187 2188 Options Database Keys: 2189 + -log_view [:filename] - Prints summary of log information 2190 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2191 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2192 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2193 . -log_view_memory - Also display memory usage in each event 2194 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2195 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2196 - -log_trace [filename] - Displays a trace of what each process is doing 2197 2198 Level: beginner 2199 2200 Notes: 2201 It is possible to control the logging programmatically but we recommend using the options database approach whenever possible 2202 By default the summary is printed to stdout. 2203 2204 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2205 2206 If PETSc is configured with --with-logging=0 then this functionality is not available 2207 2208 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2209 directory then open filename.xml with your browser. Specific notes for certain browsers 2210 $ Firefox and Internet explorer - simply open the file 2211 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2212 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2213 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2214 your browser. 2215 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2216 window and render the XML log file contents. 2217 2218 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2219 2220 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2221 or using speedscope (https://www.speedscope.app). 2222 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2223 2224 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogDump()` 2225 @*/ 2226 PetscErrorCode PetscLogView(PetscViewer viewer) 2227 { 2228 PetscBool isascii; 2229 PetscViewerFormat format; 2230 int stage, lastStage; 2231 PetscStageLog stageLog; 2232 2233 PetscFunctionBegin; 2234 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2235 /* Pop off any stages the user forgot to remove */ 2236 lastStage = 0; 2237 PetscCall(PetscLogGetStageLog(&stageLog)); 2238 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2239 while (stage >= 0) { 2240 lastStage = stage; 2241 PetscCall(PetscStageLogPop(stageLog)); 2242 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2243 } 2244 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2245 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2246 PetscCall(PetscViewerGetFormat(viewer, &format)); 2247 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2248 PetscCall(PetscLogView_Default(viewer)); 2249 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2250 PetscCall(PetscLogView_Detailed(viewer)); 2251 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2252 PetscCall(PetscLogView_CSV(viewer)); 2253 } else if (format == PETSC_VIEWER_ASCII_XML) { 2254 PetscCall(PetscLogView_Nested(viewer)); 2255 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2256 PetscCall(PetscLogView_Flamegraph(viewer)); 2257 } 2258 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2259 PetscFunctionReturn(PETSC_SUCCESS); 2260 } 2261 2262 /*@C 2263 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2264 2265 Collective on `PETSC_COMM_WORLD` 2266 2267 Level: developer 2268 2269 .seealso: [](ch_profiling), `PetscLogView()` 2270 @*/ 2271 PetscErrorCode PetscLogViewFromOptions(void) 2272 { 2273 PetscViewer viewer; 2274 PetscBool flg; 2275 PetscViewerFormat format; 2276 2277 PetscFunctionBegin; 2278 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2279 if (flg) { 2280 PetscCall(PetscViewerPushFormat(viewer, format)); 2281 PetscCall(PetscLogView(viewer)); 2282 PetscCall(PetscViewerPopFormat(viewer)); 2283 PetscCall(PetscViewerDestroy(&viewer)); 2284 } 2285 PetscFunctionReturn(PETSC_SUCCESS); 2286 } 2287 2288 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2289 /*@C 2290 PetscGetFlops - Returns the number of flops used on this processor 2291 since the program began. 2292 2293 Not Collective 2294 2295 Output Parameter: 2296 flops - number of floating point operations 2297 2298 Level: intermediate 2299 2300 Notes: 2301 A global counter logs all PETSc flop counts. The user can use 2302 `PetscLogFlops()` to increment this counter to include flops for the 2303 application code. 2304 2305 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2306 2307 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2308 @*/ 2309 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2310 { 2311 PetscFunctionBegin; 2312 *flops = petsc_TotalFlops; 2313 PetscFunctionReturn(PETSC_SUCCESS); 2314 } 2315 2316 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2317 { 2318 size_t fullLength; 2319 va_list Argp; 2320 2321 PetscFunctionBegin; 2322 if (!petsc_logObjects) PetscFunctionReturn(PETSC_SUCCESS); 2323 va_start(Argp, format); 2324 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2325 va_end(Argp); 2326 PetscFunctionReturn(PETSC_SUCCESS); 2327 } 2328 2329 /*MC 2330 PetscLogFlops - Adds floating point operations to the global counter. 2331 2332 Synopsis: 2333 #include <petsclog.h> 2334 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2335 2336 Not Collective 2337 2338 Input Parameter: 2339 . f - flop counter 2340 2341 Usage: 2342 .vb 2343 PetscLogEvent USER_EVENT; 2344 PetscLogEventRegister("User event",0,&USER_EVENT); 2345 PetscLogEventBegin(USER_EVENT,0,0,0,0); 2346 [code segment to monitor] 2347 PetscLogFlops(user_flops) 2348 PetscLogEventEnd(USER_EVENT,0,0,0,0); 2349 .ve 2350 2351 Level: intermediate 2352 2353 Note: 2354 A global counter logs all PETSc flop counts. The user can use 2355 PetscLogFlops() to increment this counter to include flops for the 2356 application code. 2357 2358 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2359 M*/ 2360 2361 /*MC 2362 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) 2363 to get accurate timings 2364 2365 Synopsis: 2366 #include <petsclog.h> 2367 void PetscPreLoadBegin(PetscBool flag,char *name); 2368 2369 Not Collective 2370 2371 Input Parameters: 2372 + flag - `PETSC_TRUE` to run twice, `PETSC_FALSE` to run once, may be overridden 2373 with command line option -preload true or -preload false 2374 - name - name of first stage (lines of code timed separately with `-log_view`) to 2375 be preloaded 2376 2377 Usage: 2378 .vb 2379 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2380 lines of code 2381 PetscPreLoadStage("second stage"); 2382 lines of code 2383 PetscPreLoadEnd(); 2384 .ve 2385 2386 Level: intermediate 2387 2388 Note: 2389 Only works in C/C++, not Fortran 2390 2391 Flags available within the macro. 2392 + PetscPreLoadingUsed - true if we are or have done preloading 2393 . PetscPreLoadingOn - true if it is CURRENTLY doing preload 2394 . PetscPreLoadIt - 0 for the first computation (with preloading turned off it is only 0) 1 for the second 2395 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is turned on 2396 The first two variables are available throughout the program, the second two only between the `PetscPreLoadBegin()` 2397 and `PetscPreLoadEnd()` 2398 2399 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2400 M*/ 2401 2402 /*MC 2403 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) 2404 to get accurate timings 2405 2406 Synopsis: 2407 #include <petsclog.h> 2408 void PetscPreLoadEnd(void); 2409 2410 Not Collective 2411 2412 Usage: 2413 .vb 2414 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2415 lines of code 2416 PetscPreLoadStage("second stage"); 2417 lines of code 2418 PetscPreLoadEnd(); 2419 .ve 2420 2421 Level: intermediate 2422 2423 Note: 2424 Only works in C/C++ not fortran 2425 2426 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2427 M*/ 2428 2429 /*MC 2430 PetscPreLoadStage - Start a new segment of code to be timed separately. 2431 to get accurate timings 2432 2433 Synopsis: 2434 #include <petsclog.h> 2435 void PetscPreLoadStage(char *name); 2436 2437 Not Collective 2438 2439 Usage: 2440 .vb 2441 PetscPreLoadBegin(PETSC_TRUE,"first stage); 2442 lines of code 2443 PetscPreLoadStage("second stage"); 2444 lines of code 2445 PetscPreLoadEnd(); 2446 .ve 2447 2448 Level: intermediate 2449 2450 Note: 2451 Only works in C/C++ not fortran 2452 2453 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2454 M*/ 2455 2456 #if PetscDefined(HAVE_DEVICE) 2457 #include <petsc/private/deviceimpl.h> 2458 2459 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2460 2461 /* 2462 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2463 because it will result in timing results that cannot be interpreted. 2464 */ 2465 static PetscErrorCode PetscLogGpuTime_Off(void) 2466 { 2467 PetscLogGpuTimeFlag = PETSC_FALSE; 2468 return PETSC_SUCCESS; 2469 } 2470 2471 /*@C 2472 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2473 2474 Options Database Key: 2475 . -log_view_gpu_time - provide the GPU times in the -log_view output 2476 2477 Level: advanced 2478 2479 Notes: 2480 Turning on the timing of the 2481 GPU kernels can slow down the entire computation and should only be used when studying the performance 2482 of operations on GPU such as vector operations and matrix-vector operations. 2483 2484 This routine should only be called once near the beginning of the program. Once it is started it cannot be turned off. 2485 2486 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2487 @*/ 2488 PetscErrorCode PetscLogGpuTime(void) 2489 { 2490 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2491 PetscLogGpuTimeFlag = PETSC_TRUE; 2492 return PETSC_SUCCESS; 2493 } 2494 2495 /*@C 2496 PetscLogGpuTimeBegin - Start timer for device 2497 2498 Level: intermediate 2499 2500 Notes: 2501 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time devoted to GPU computations (excluding kernel launch times). 2502 2503 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of time devoted to GPU computations (including kernel launch times). 2504 2505 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and `PetscLogGpuTimeEnd()` 2506 2507 This timer should NOT include times for data transfers between the GPU and CPU, nor setup actions such as allocating space. 2508 2509 The regular logging captures the time for data transfers and any CPU activities during the event 2510 2511 It is used to compute the flop rate on the GPU as it is actively engaged in running a kernel. 2512 2513 Developer Notes: 2514 The GPU event timer captures the execution time of all the kernels launched in the default stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2515 2516 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the default stream (stream 0). The device will record a time stamp for the 2517 event when it reaches that event in the stream. The function xxxEventSynchronize() is called in `PetsLogGpuTimeEnd()` to block CPU execution, 2518 but not continued GPU execution, until the timer event is recorded. 2519 2520 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2521 @*/ 2522 PetscErrorCode PetscLogGpuTimeBegin(void) 2523 { 2524 PetscFunctionBegin; 2525 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS); 2526 if (PetscDefined(HAVE_DEVICE)) { 2527 PetscDeviceContext dctx; 2528 2529 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2530 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2531 } else { 2532 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2533 } 2534 PetscFunctionReturn(PETSC_SUCCESS); 2535 } 2536 2537 /*@C 2538 PetscLogGpuTimeEnd - Stop timer for device 2539 2540 Level: intermediate 2541 2542 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2543 @*/ 2544 PetscErrorCode PetscLogGpuTimeEnd(void) 2545 { 2546 PetscFunctionBegin; 2547 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS); 2548 if (PetscDefined(HAVE_DEVICE)) { 2549 PetscDeviceContext dctx; 2550 PetscLogDouble elapsed; 2551 2552 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2553 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2554 petsc_gtime += (elapsed / 1000.0); 2555 } else { 2556 PetscCall(PetscTimeAdd(&petsc_gtime)); 2557 } 2558 PetscFunctionReturn(PETSC_SUCCESS); 2559 } 2560 2561 #endif /* end of PETSC_HAVE_DEVICE */ 2562 2563 #else /* end of -DPETSC_USE_LOG section */ 2564 2565 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2566 { 2567 PetscFunctionBegin; 2568 PetscFunctionReturn(PETSC_SUCCESS); 2569 } 2570 2571 #endif /* PETSC_USE_LOG*/ 2572 2573 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2574 PetscClassId PETSC_OBJECT_CLASSID = 0; 2575 2576 /*@C 2577 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2578 2579 Not Collective 2580 2581 Input Parameter: 2582 . name - The class name 2583 2584 Output Parameter: 2585 . oclass - The class id or classid 2586 2587 Level: developer 2588 2589 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2590 @*/ 2591 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) 2592 { 2593 #if defined(PETSC_USE_LOG) 2594 PetscStageLog stageLog; 2595 PetscInt stage; 2596 #endif 2597 2598 PetscFunctionBegin; 2599 *oclass = ++PETSC_LARGEST_CLASSID; 2600 #if defined(PETSC_USE_LOG) 2601 PetscCall(PetscLogGetStageLog(&stageLog)); 2602 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2603 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2604 #endif 2605 PetscFunctionReturn(PETSC_SUCCESS); 2606 } 2607 2608 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2609 #include <mpe.h> 2610 2611 PetscBool PetscBeganMPE = PETSC_FALSE; 2612 2613 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2614 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2615 2616 /*@C 2617 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files 2618 and slows the program down. 2619 2620 Collective over `PETSC_COMM_WORLD` 2621 2622 Options Database Key: 2623 . -log_mpe - Prints extensive log information 2624 2625 Level: advanced 2626 2627 Note: 2628 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 2629 intended for production runs since it logs only flop rates and object 2630 creation (and should not significantly slow the programs). 2631 2632 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2633 `PetscLogEventDeactivate()` 2634 @*/ 2635 PetscErrorCode PetscLogMPEBegin(void) 2636 { 2637 PetscFunctionBegin; 2638 /* Do MPE initialization */ 2639 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2640 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2641 PetscCall(MPE_Init_log()); 2642 2643 PetscBeganMPE = PETSC_TRUE; 2644 } else { 2645 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2646 } 2647 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2648 PetscFunctionReturn(PETSC_SUCCESS); 2649 } 2650 2651 /*@C 2652 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2653 2654 Collective over `PETSC_COMM_WORLD` 2655 2656 Level: advanced 2657 2658 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2659 @*/ 2660 PetscErrorCode PetscLogMPEDump(const char sname[]) 2661 { 2662 char name[PETSC_MAX_PATH_LEN]; 2663 2664 PetscFunctionBegin; 2665 if (PetscBeganMPE) { 2666 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2667 if (sname) { 2668 PetscCall(PetscStrncpy(name, sname, sizeof(name))); 2669 } else { 2670 PetscCall(PetscGetProgramName(name, sizeof(name))); 2671 } 2672 PetscCall(MPE_Finish_log(name)); 2673 } else { 2674 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2675 } 2676 PetscFunctionReturn(PETSC_SUCCESS); 2677 } 2678 2679 #define PETSC_RGB_COLORS_MAX 39 2680 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2681 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2682 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2683 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2684 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2685 2686 /*@C 2687 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2688 2689 Not collective. Maybe it should be? 2690 2691 Output Parameter: 2692 . str - character string representing the color 2693 2694 Level: developer 2695 2696 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2697 @*/ 2698 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2699 { 2700 static int idx = 0; 2701 2702 PetscFunctionBegin; 2703 *str = PetscLogMPERGBColors[idx]; 2704 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2705 PetscFunctionReturn(PETSC_SUCCESS); 2706 } 2707 2708 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2709