1 2 /* 3 PETSc code to log object creation and destruction and PETSc events. 4 5 This provides the public API used by the rest of PETSc and by users. 6 7 These routines use a private API that is not used elsewhere in PETSc and is not 8 accessible to users. The private API is defined in logimpl.h and the utils directory. 9 10 */ 11 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 12 #include <petsctime.h> 13 #include <petscviewer.h> 14 #include <petscdevice.h> 15 #include <petsc/private/deviceimpl.h> 16 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 17 #include <../src/sys/perfstubs/timer.h> 18 #endif 19 20 PetscLogEvent PETSC_LARGEST_EVENT = PETSC_EVENT; 21 22 #if defined(PETSC_USE_LOG) 23 #include <petscmachineinfo.h> 24 #include <petscconfiginfo.h> 25 26 #if defined(PETSC_HAVE_THREADSAFETY) 27 28 PetscInt petsc_log_gid = -1; /* Global threadId counter */ 29 PETSC_TLS PetscInt petsc_log_tid = -1; /* Local threadId */ 30 31 /* shared variables */ 32 PetscSpinlock PetscLogSpinLock; 33 PetscHMapEvent eventInfoMap_th = NULL; 34 35 #endif 36 37 /* used in the MPI_XXX() count macros in petsclog.h */ 38 39 /* Action and object logging variables */ 40 Action *petsc_actions = NULL; 41 Object *petsc_objects = NULL; 42 PetscBool petsc_logActions = PETSC_FALSE; 43 PetscBool petsc_logObjects = PETSC_FALSE; 44 int petsc_numActions = 0, petsc_maxActions = 100; 45 int petsc_numObjects = 0, petsc_maxObjects = 100; 46 int petsc_numObjectsDestroyed = 0; 47 48 /* Global counters */ 49 PetscLogDouble petsc_BaseTime = 0.0; 50 PetscLogDouble petsc_TotalFlops = 0.0; /* The number of flops */ 51 PetscLogDouble petsc_send_ct = 0.0; /* The number of sends */ 52 PetscLogDouble petsc_recv_ct = 0.0; /* The number of receives */ 53 PetscLogDouble petsc_send_len = 0.0; /* The total length of all sent messages */ 54 PetscLogDouble petsc_recv_len = 0.0; /* The total length of all received messages */ 55 PetscLogDouble petsc_isend_ct = 0.0; /* The number of immediate sends */ 56 PetscLogDouble petsc_irecv_ct = 0.0; /* The number of immediate receives */ 57 PetscLogDouble petsc_isend_len = 0.0; /* The total length of all immediate send messages */ 58 PetscLogDouble petsc_irecv_len = 0.0; /* The total length of all immediate receive messages */ 59 PetscLogDouble petsc_wait_ct = 0.0; /* The number of waits */ 60 PetscLogDouble petsc_wait_any_ct = 0.0; /* The number of anywaits */ 61 PetscLogDouble petsc_wait_all_ct = 0.0; /* The number of waitalls */ 62 PetscLogDouble petsc_sum_of_waits_ct = 0.0; /* The total number of waits */ 63 PetscLogDouble petsc_allreduce_ct = 0.0; /* The number of reductions */ 64 PetscLogDouble petsc_gather_ct = 0.0; /* The number of gathers and gathervs */ 65 PetscLogDouble petsc_scatter_ct = 0.0; /* The number of scatters and scattervs */ 66 67 /* Thread Local storage */ 68 PETSC_TLS PetscLogDouble petsc_TotalFlops_th = 0.0; 69 PETSC_TLS PetscLogDouble petsc_send_ct_th = 0.0; 70 PETSC_TLS PetscLogDouble petsc_recv_ct_th = 0.0; 71 PETSC_TLS PetscLogDouble petsc_send_len_th = 0.0; 72 PETSC_TLS PetscLogDouble petsc_recv_len_th = 0.0; 73 PETSC_TLS PetscLogDouble petsc_isend_ct_th = 0.0; 74 PETSC_TLS PetscLogDouble petsc_irecv_ct_th = 0.0; 75 PETSC_TLS PetscLogDouble petsc_isend_len_th = 0.0; 76 PETSC_TLS PetscLogDouble petsc_irecv_len_th = 0.0; 77 PETSC_TLS PetscLogDouble petsc_wait_ct_th = 0.0; 78 PETSC_TLS PetscLogDouble petsc_wait_any_ct_th = 0.0; 79 PETSC_TLS PetscLogDouble petsc_wait_all_ct_th = 0.0; 80 PETSC_TLS PetscLogDouble petsc_sum_of_waits_ct_th = 0.0; 81 PETSC_TLS PetscLogDouble petsc_allreduce_ct_th = 0.0; 82 PETSC_TLS PetscLogDouble petsc_gather_ct_th = 0.0; 83 PETSC_TLS PetscLogDouble petsc_scatter_ct_th = 0.0; 84 85 #if defined(PETSC_HAVE_DEVICE) 86 PetscLogDouble petsc_ctog_ct = 0.0; /* The total number of CPU to GPU copies */ 87 PetscLogDouble petsc_gtoc_ct = 0.0; /* The total number of GPU to CPU copies */ 88 PetscLogDouble petsc_ctog_sz = 0.0; /* The total size of CPU to GPU copies */ 89 PetscLogDouble petsc_gtoc_sz = 0.0; /* The total size of GPU to CPU copies */ 90 PetscLogDouble petsc_ctog_ct_scalar = 0.0; /* The total number of CPU to GPU copies */ 91 PetscLogDouble petsc_gtoc_ct_scalar = 0.0; /* The total number of GPU to CPU copies */ 92 PetscLogDouble petsc_ctog_sz_scalar = 0.0; /* The total size of CPU to GPU copies */ 93 PetscLogDouble petsc_gtoc_sz_scalar = 0.0; /* The total size of GPU to CPU copies */ 94 PetscLogDouble petsc_gflops = 0.0; /* The flops done on a GPU */ 95 PetscLogDouble petsc_gtime = 0.0; /* The time spent on a GPU */ 96 97 PETSC_TLS PetscLogDouble petsc_ctog_ct_th = 0.0; 98 PETSC_TLS PetscLogDouble petsc_gtoc_ct_th = 0.0; 99 PETSC_TLS PetscLogDouble petsc_ctog_sz_th = 0.0; 100 PETSC_TLS PetscLogDouble petsc_gtoc_sz_th = 0.0; 101 PETSC_TLS PetscLogDouble petsc_ctog_ct_scalar_th = 0.0; 102 PETSC_TLS PetscLogDouble petsc_gtoc_ct_scalar_th = 0.0; 103 PETSC_TLS PetscLogDouble petsc_ctog_sz_scalar_th = 0.0; 104 PETSC_TLS PetscLogDouble petsc_gtoc_sz_scalar_th = 0.0; 105 PETSC_TLS PetscLogDouble petsc_gflops_th = 0.0; 106 PETSC_TLS PetscLogDouble petsc_gtime_th = 0.0; 107 #endif 108 109 #if defined(PETSC_HAVE_THREADSAFETY) 110 PetscErrorCode PetscAddLogDouble(PetscLogDouble *tot, PetscLogDouble *tot_th, PetscLogDouble tmp) 111 { 112 *tot_th += tmp; 113 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 114 *tot += tmp; 115 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 116 return PETSC_SUCCESS; 117 } 118 119 PetscErrorCode PetscAddLogDoubleCnt(PetscLogDouble *cnt, PetscLogDouble *tot, PetscLogDouble *cnt_th, PetscLogDouble *tot_th, PetscLogDouble tmp) 120 { 121 *cnt_th = *cnt_th + 1; 122 *tot_th += tmp; 123 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 124 *tot += (PetscLogDouble)(tmp); 125 *cnt += *cnt + 1; 126 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 127 return PETSC_SUCCESS; 128 } 129 130 PetscInt PetscLogGetTid(void) 131 { 132 if (petsc_log_tid < 0) { 133 PetscCall(PetscSpinlockLock(&PetscLogSpinLock)); 134 petsc_log_tid = ++petsc_log_gid; 135 PetscCall(PetscSpinlockUnlock(&PetscLogSpinLock)); 136 } 137 return petsc_log_tid; 138 } 139 140 #endif 141 142 /* Logging functions */ 143 PetscErrorCode (*PetscLogPHC)(PetscObject) = NULL; 144 PetscErrorCode (*PetscLogPHD)(PetscObject) = NULL; 145 PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 146 PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject) = NULL; 147 148 /* Tracing event logging variables */ 149 FILE *petsc_tracefile = NULL; 150 int petsc_tracelevel = 0; 151 const char *petsc_traceblanks = " "; 152 char petsc_tracespace[128] = " "; 153 PetscLogDouble petsc_tracetime = 0.0; 154 static PetscBool PetscLogInitializeCalled = PETSC_FALSE; 155 156 static PetscIntStack current_log_event_stack = NULL; 157 158 PETSC_INTERN PetscErrorCode PetscLogInitialize(void) 159 { 160 int stage; 161 PetscBool opt; 162 163 PetscFunctionBegin; 164 if (PetscLogInitializeCalled) PetscFunctionReturn(PETSC_SUCCESS); 165 PetscLogInitializeCalled = PETSC_TRUE; 166 167 PetscCall(PetscIntStackCreate(¤t_log_event_stack)); 168 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_actions", &opt)); 169 if (opt) petsc_logActions = PETSC_FALSE; 170 PetscCall(PetscOptionsHasName(NULL, NULL, "-log_exclude_objects", &opt)); 171 if (opt) petsc_logObjects = PETSC_FALSE; 172 if (petsc_logActions) PetscCall(PetscMalloc1(petsc_maxActions, &petsc_actions)); 173 if (petsc_logObjects) PetscCall(PetscMalloc1(petsc_maxObjects, &petsc_objects)); 174 PetscLogPHC = PetscLogObjCreateDefault; 175 PetscLogPHD = PetscLogObjDestroyDefault; 176 /* Setup default logging structures */ 177 PetscCall(PetscStageLogCreate(&petsc_stageLog)); 178 PetscCall(PetscStageLogRegister(petsc_stageLog, "Main Stage", &stage)); 179 180 PetscCall(PetscSpinlockCreate(&PetscLogSpinLock)); 181 #if defined(PETSC_HAVE_THREADSAFETY) 182 petsc_log_tid = 0; 183 petsc_log_gid = 0; 184 PetscCall(PetscHMapEventCreate(&eventInfoMap_th)); 185 #endif 186 187 /* All processors sync here for more consistent logging */ 188 PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD)); 189 PetscCall(PetscTime(&petsc_BaseTime)); 190 PetscCall(PetscLogStagePush(stage)); 191 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 192 PetscStackCallExternalVoid("ps_initialize_", ps_initialize_()); 193 #endif 194 PetscFunctionReturn(PETSC_SUCCESS); 195 } 196 197 PETSC_INTERN PetscErrorCode PetscLogFinalize(void) 198 { 199 PetscStageLog stageLog; 200 201 PetscFunctionBegin; 202 #if defined(PETSC_HAVE_THREADSAFETY) 203 if (eventInfoMap_th) { 204 PetscEventPerfInfo **array; 205 PetscInt n, off = 0; 206 207 PetscCall(PetscHMapEventGetSize(eventInfoMap_th, &n)); 208 PetscCall(PetscMalloc1(n, &array)); 209 PetscCall(PetscHMapEventGetVals(eventInfoMap_th, &off, array)); 210 for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i])); 211 PetscCall(PetscFree(array)); 212 PetscCall(PetscHMapEventDestroy(&eventInfoMap_th)); 213 } 214 #endif 215 PetscCall(PetscFree(petsc_actions)); 216 PetscCall(PetscFree(petsc_objects)); 217 PetscCall(PetscLogNestedEnd()); 218 PetscCall(PetscLogSet(NULL, NULL)); 219 220 /* Resetting phase */ 221 PetscCall(PetscLogGetStageLog(&stageLog)); 222 PetscCall(PetscStageLogDestroy(stageLog)); 223 PetscCall(PetscIntStackDestroy(current_log_event_stack)); 224 current_log_event_stack = NULL; 225 226 petsc_TotalFlops = 0.0; 227 petsc_numActions = 0; 228 petsc_numObjects = 0; 229 petsc_numObjectsDestroyed = 0; 230 petsc_maxActions = 100; 231 petsc_maxObjects = 100; 232 petsc_actions = NULL; 233 petsc_objects = NULL; 234 petsc_logActions = PETSC_FALSE; 235 petsc_logObjects = PETSC_FALSE; 236 petsc_BaseTime = 0.0; 237 petsc_TotalFlops = 0.0; 238 petsc_send_ct = 0.0; 239 petsc_recv_ct = 0.0; 240 petsc_send_len = 0.0; 241 petsc_recv_len = 0.0; 242 petsc_isend_ct = 0.0; 243 petsc_irecv_ct = 0.0; 244 petsc_isend_len = 0.0; 245 petsc_irecv_len = 0.0; 246 petsc_wait_ct = 0.0; 247 petsc_wait_any_ct = 0.0; 248 petsc_wait_all_ct = 0.0; 249 petsc_sum_of_waits_ct = 0.0; 250 petsc_allreduce_ct = 0.0; 251 petsc_gather_ct = 0.0; 252 petsc_scatter_ct = 0.0; 253 petsc_TotalFlops_th = 0.0; 254 petsc_send_ct_th = 0.0; 255 petsc_recv_ct_th = 0.0; 256 petsc_send_len_th = 0.0; 257 petsc_recv_len_th = 0.0; 258 petsc_isend_ct_th = 0.0; 259 petsc_irecv_ct_th = 0.0; 260 petsc_isend_len_th = 0.0; 261 petsc_irecv_len_th = 0.0; 262 petsc_wait_ct_th = 0.0; 263 petsc_wait_any_ct_th = 0.0; 264 petsc_wait_all_ct_th = 0.0; 265 petsc_sum_of_waits_ct_th = 0.0; 266 petsc_allreduce_ct_th = 0.0; 267 petsc_gather_ct_th = 0.0; 268 petsc_scatter_ct_th = 0.0; 269 270 #if defined(PETSC_HAVE_DEVICE) 271 petsc_ctog_ct = 0.0; 272 petsc_gtoc_ct = 0.0; 273 petsc_ctog_sz = 0.0; 274 petsc_gtoc_sz = 0.0; 275 petsc_gflops = 0.0; 276 petsc_gtime = 0.0; 277 petsc_ctog_ct_th = 0.0; 278 petsc_gtoc_ct_th = 0.0; 279 petsc_ctog_sz_th = 0.0; 280 petsc_gtoc_sz_th = 0.0; 281 petsc_gflops_th = 0.0; 282 petsc_gtime_th = 0.0; 283 #endif 284 285 PETSC_LARGEST_EVENT = PETSC_EVENT; 286 PetscLogPHC = NULL; 287 PetscLogPHD = NULL; 288 petsc_tracefile = NULL; 289 petsc_tracelevel = 0; 290 petsc_traceblanks = " "; 291 petsc_tracespace[0] = ' '; 292 petsc_tracespace[1] = 0; 293 petsc_tracetime = 0.0; 294 PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 295 PETSC_OBJECT_CLASSID = 0; 296 petsc_stageLog = NULL; 297 PetscLogInitializeCalled = PETSC_FALSE; 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 /*@C 302 PetscLogSet - Sets the logging functions called at the beginning and ending of every event. 303 304 Not Collective 305 306 Input Parameters: 307 + b - The function called at beginning of event 308 - e - The function called at end of event 309 310 Level: developer 311 312 Developer Notes: 313 The default loggers are `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. 314 315 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogTraceBegin()`, `PetscLogEventBeginDefault()`, `PetscLogEventEndDefault()` 316 @*/ 317 PetscErrorCode PetscLogSet(PetscErrorCode (*b)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*e)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject)) 318 { 319 PetscFunctionBegin; 320 PetscLogPLB = b; 321 PetscLogPLE = e; 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 /*@C 326 PetscLogIsActive - Check if logging is currently in progress. 327 328 Not Collective 329 330 Output Parameter: 331 . isActive - `PETSC_TRUE` if logging is in progress, `PETSC_FALSE` otherwise 332 333 Level: beginner 334 335 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogSet()` 336 @*/ 337 PetscErrorCode PetscLogIsActive(PetscBool *isActive) 338 { 339 PetscFunctionBegin; 340 *isActive = (PetscLogPLB && PetscLogPLE) ? PETSC_TRUE : PETSC_FALSE; 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 /*@C 345 PetscLogDefaultBegin - Turns on logging of objects and events using the default logging functions `PetscLogEventBeginDefault()` and `PetscLogEventEndDefault()`. This logs flop 346 rates and object creation and should not slow programs down too much. 347 This routine may be called more than once. 348 349 Logically Collective over `PETSC_COMM_WORLD` 350 351 Options Database Key: 352 . -log_view [viewertype:filename:viewerformat] - Prints summary of flop and timing information to the 353 screen (for code configured with --with-log=1 (which is the default)) 354 355 Example Usage: 356 .vb 357 PetscInitialize(...); 358 PetscLogDefaultBegin(); 359 ... code ... 360 PetscLogView(viewer); or PetscLogDump(); 361 PetscFinalize(); 362 .ve 363 364 Level: advanced 365 366 Note: 367 `PetscLogView()` or `PetscLogDump()` actually cause the printing of 368 the logging information. 369 370 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogTraceBegin()` 371 @*/ 372 PetscErrorCode PetscLogDefaultBegin(void) 373 { 374 PetscFunctionBegin; 375 PetscCall(PetscLogSet(PetscLogEventBeginDefault, PetscLogEventEndDefault)); 376 PetscFunctionReturn(PETSC_SUCCESS); 377 } 378 379 /*@C 380 PetscLogAllBegin - Turns on extensive logging of objects and events. Logs 381 all events. This creates large log files and slows the program down. 382 383 Logically Collective on `PETSC_COMM_WORLD` 384 385 Options Database Key: 386 . -log_all - Prints extensive log information 387 388 Example Usage: 389 .vb 390 PetscInitialize(...); 391 PetscLogAllBegin(); 392 ... code ... 393 PetscLogDump(filename); 394 PetscFinalize(); 395 .ve 396 397 Level: advanced 398 399 Note: 400 A related routine is `PetscLogDefaultBegin()` (with the options key -log_view), which is 401 intended for production runs since it logs only flop rates and object 402 creation (and shouldn't significantly slow the programs). 403 404 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogTraceBegin()` 405 @*/ 406 PetscErrorCode PetscLogAllBegin(void) 407 { 408 PetscFunctionBegin; 409 PetscCall(PetscLogSet(PetscLogEventBeginComplete, PetscLogEventEndComplete)); 410 PetscFunctionReturn(PETSC_SUCCESS); 411 } 412 413 /*@C 414 PetscLogTraceBegin - Activates trace logging. Every time a PETSc event 415 begins or ends, the event name is printed. 416 417 Logically Collective on `PETSC_COMM_WORLD` 418 419 Input Parameter: 420 . file - The file to print trace in (e.g. stdout) 421 422 Options Database Key: 423 . -log_trace [filename] - Activates `PetscLogTraceBegin()` 424 425 Level: intermediate 426 427 Notes: 428 `PetscLogTraceBegin()` prints the processor number, the execution time (sec), 429 then "Event begin:" or "Event end:" followed by the event name. 430 431 `PetscLogTraceBegin()` allows tracing of all PETSc calls, which is useful 432 to determine where a program is hanging without running in the 433 debugger. Can be used in conjunction with the -info option. 434 435 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogView()`, `PetscLogDefaultBegin()` 436 @*/ 437 PetscErrorCode PetscLogTraceBegin(FILE *file) 438 { 439 PetscFunctionBegin; 440 petsc_tracefile = file; 441 442 PetscCall(PetscLogSet(PetscLogEventBeginTrace, PetscLogEventEndTrace)); 443 PetscFunctionReturn(PETSC_SUCCESS); 444 } 445 446 /*@ 447 PetscLogActions - Determines whether actions are logged for the graphical viewer. 448 449 Not Collective 450 451 Input Parameter: 452 . flag - `PETSC_TRUE` if actions are to be logged 453 454 Options Database Key: 455 . -log_exclude_actions - Turns off actions logging 456 457 Level: intermediate 458 459 Note: 460 Logging of actions continues to consume more memory as the program 461 runs. Long running programs should consider turning this feature off. 462 463 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 464 @*/ 465 PetscErrorCode PetscLogActions(PetscBool flag) 466 { 467 PetscFunctionBegin; 468 petsc_logActions = flag; 469 PetscFunctionReturn(PETSC_SUCCESS); 470 } 471 472 /*@ 473 PetscLogObjects - Determines whether objects are logged for the graphical viewer. 474 475 Not Collective 476 477 Input Parameter: 478 . flag - `PETSC_TRUE` if objects are to be logged 479 480 Options Database Key: 481 . -log_exclude_objects - Turns off objects logging 482 483 Level: intermediate 484 485 Note: 486 Logging of objects continues to consume more memory as the program 487 runs. Long running programs should consider turning this feature off. 488 489 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 490 @*/ 491 PetscErrorCode PetscLogObjects(PetscBool flag) 492 { 493 PetscFunctionBegin; 494 petsc_logObjects = flag; 495 PetscFunctionReturn(PETSC_SUCCESS); 496 } 497 498 /*------------------------------------------------ Stage Functions --------------------------------------------------*/ 499 /*@C 500 PetscLogStageRegister - Attaches a character string name to a logging stage. 501 502 Not Collective 503 504 Input Parameter: 505 . sname - The name to associate with that stage 506 507 Output Parameter: 508 . stage - The stage number 509 510 Level: intermediate 511 512 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStagePop()` 513 @*/ 514 PetscErrorCode PetscLogStageRegister(const char sname[], PetscLogStage *stage) 515 { 516 PetscStageLog stageLog; 517 PetscLogEvent event; 518 519 PetscFunctionBegin; 520 PetscCall(PetscLogGetStageLog(&stageLog)); 521 PetscCall(PetscStageLogRegister(stageLog, sname, stage)); 522 /* Copy events already changed in the main stage, this sucks */ 523 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[*stage].eventLog, stageLog->eventLog->numEvents)); 524 for (event = 0; event < stageLog->eventLog->numEvents; event++) PetscCall(PetscEventPerfInfoCopy(&stageLog->stageInfo[0].eventLog->eventInfo[event], &stageLog->stageInfo[*stage].eventLog->eventInfo[event])); 525 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[*stage].classLog, stageLog->classLog->numClasses)); 526 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 527 if (perfstubs_initialized == PERFSTUBS_SUCCESS) PetscStackCallExternalVoid("ps_timer_create_", stageLog->stageInfo[*stage].timer = ps_timer_create_(sname)); 528 #endif 529 PetscFunctionReturn(PETSC_SUCCESS); 530 } 531 532 /*@C 533 PetscLogStagePush - This function pushes a stage on the logging stack. Events started and stopped until `PetscLogStagePop()` will be associated with the stage 534 535 Not Collective 536 537 Input Parameter: 538 . stage - The stage on which to log 539 540 Example Usage: 541 If the option -log_view is used to run the program containing the 542 following code, then 2 sets of summary data will be printed during 543 PetscFinalize(). 544 .vb 545 PetscInitialize(int *argc,char ***args,0,0); 546 [stage 0 of code] 547 PetscLogStagePush(1); 548 [stage 1 of code] 549 PetscLogStagePop(); 550 PetscBarrier(...); 551 [more stage 0 of code] 552 PetscFinalize(); 553 .ve 554 555 Level: intermediate 556 557 Note: 558 Use `PetscLogStageRegister()` to register a stage. 559 560 .seealso: [](ch_profiling), `PetscLogStagePop()`, `PetscLogStageRegister()`, `PetscBarrier()` 561 @*/ 562 PetscErrorCode PetscLogStagePush(PetscLogStage stage) 563 { 564 PetscStageLog stageLog; 565 566 PetscFunctionBegin; 567 PetscCall(PetscLogGetStageLog(&stageLog)); 568 PetscCall(PetscStageLogPush(stageLog, stage)); 569 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 570 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stage].timer != NULL) PetscStackCallExternalVoid("ps_timer_start_", ps_timer_start_(stageLog->stageInfo[stage].timer)); 571 #endif 572 PetscFunctionReturn(PETSC_SUCCESS); 573 } 574 575 /*@C 576 PetscLogStagePop - This function pops a stage from the logging stack that was pushed with `PetscLogStagePush()` 577 578 Not Collective 579 580 Example Usage: 581 If the option -log_view is used to run the program containing the 582 following code, then 2 sets of summary data will be printed during 583 PetscFinalize(). 584 .vb 585 PetscInitialize(int *argc,char ***args,0,0); 586 [stage 0 of code] 587 PetscLogStagePush(1); 588 [stage 1 of code] 589 PetscLogStagePop(); 590 PetscBarrier(...); 591 [more stage 0 of code] 592 PetscFinalize(); 593 .ve 594 595 Level: intermediate 596 597 .seealso: [](ch_profiling), `PetscLogStagePush()`, `PetscLogStageRegister()`, `PetscBarrier()` 598 @*/ 599 PetscErrorCode PetscLogStagePop(void) 600 { 601 PetscStageLog stageLog; 602 603 PetscFunctionBegin; 604 PetscCall(PetscLogGetStageLog(&stageLog)); 605 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 606 if (perfstubs_initialized == PERFSTUBS_SUCCESS && stageLog->stageInfo[stageLog->curStage].timer != NULL) PetscStackCallExternalVoid("ps_timer_stop_", ps_timer_stop_(stageLog->stageInfo[stageLog->curStage].timer)); 607 #endif 608 PetscCall(PetscStageLogPop(stageLog)); 609 PetscFunctionReturn(PETSC_SUCCESS); 610 } 611 612 /*@ 613 PetscLogStageSetActive - Sets if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 614 615 Not Collective 616 617 Input Parameters: 618 + stage - The stage 619 - isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 620 621 Level: intermediate 622 623 Note: 624 If this is set to `PETSC_FALSE` the logging acts as if the stage did not exist 625 626 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 627 @*/ 628 PetscErrorCode PetscLogStageSetActive(PetscLogStage stage, PetscBool isActive) 629 { 630 PetscStageLog stageLog; 631 632 PetscFunctionBegin; 633 PetscCall(PetscLogGetStageLog(&stageLog)); 634 PetscCall(PetscStageLogSetActive(stageLog, stage, isActive)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /*@ 639 PetscLogStageGetActive - Checks if a stage is used for `PetscLogEventBegin()` and `PetscLogEventEnd()`. 640 641 Not Collective 642 643 Input Parameter: 644 . stage - The stage 645 646 Output Parameter: 647 . isActive - The activity flag, `PETSC_TRUE` for logging, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 648 649 Level: intermediate 650 651 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 652 @*/ 653 PetscErrorCode PetscLogStageGetActive(PetscLogStage stage, PetscBool *isActive) 654 { 655 PetscStageLog stageLog; 656 657 PetscFunctionBegin; 658 PetscCall(PetscLogGetStageLog(&stageLog)); 659 PetscCall(PetscStageLogGetActive(stageLog, stage, isActive)); 660 PetscFunctionReturn(PETSC_SUCCESS); 661 } 662 663 /*@ 664 PetscLogStageSetVisible - Determines stage visibility in `PetscLogView()` 665 666 Not Collective 667 668 Input Parameters: 669 + stage - The stage 670 - isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 671 672 Level: intermediate 673 674 Developer Notes: 675 What does visible mean, needs to be documented. 676 677 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 678 @*/ 679 PetscErrorCode PetscLogStageSetVisible(PetscLogStage stage, PetscBool isVisible) 680 { 681 PetscStageLog stageLog; 682 683 PetscFunctionBegin; 684 PetscCall(PetscLogGetStageLog(&stageLog)); 685 PetscCall(PetscStageLogSetVisible(stageLog, stage, isVisible)); 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 /*@ 690 PetscLogStageGetVisible - Returns stage visibility in `PetscLogView()` 691 692 Not Collective 693 694 Input Parameter: 695 . stage - The stage 696 697 Output Parameter: 698 . isVisible - The visibility flag, `PETSC_TRUE` to print, else `PETSC_FALSE` (defaults to `PETSC_TRUE`) 699 700 Level: intermediate 701 702 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogView()` 703 @*/ 704 PetscErrorCode PetscLogStageGetVisible(PetscLogStage stage, PetscBool *isVisible) 705 { 706 PetscStageLog stageLog; 707 708 PetscFunctionBegin; 709 PetscCall(PetscLogGetStageLog(&stageLog)); 710 PetscCall(PetscStageLogGetVisible(stageLog, stage, isVisible)); 711 PetscFunctionReturn(PETSC_SUCCESS); 712 } 713 714 /*@C 715 PetscLogStageGetId - Returns the stage id when given the stage name. 716 717 Not Collective 718 719 Input Parameter: 720 . name - The stage name 721 722 Output Parameter: 723 . stage - The stage, , or -1 if no stage with that name exists 724 725 Level: intermediate 726 727 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 728 @*/ 729 PetscErrorCode PetscLogStageGetId(const char name[], PetscLogStage *stage) 730 { 731 PetscStageLog stageLog; 732 733 PetscFunctionBegin; 734 PetscCall(PetscLogGetStageLog(&stageLog)); 735 PetscCall(PetscStageLogGetStage(stageLog, name, stage)); 736 PetscFunctionReturn(PETSC_SUCCESS); 737 } 738 739 /*------------------------------------------------ Event Functions --------------------------------------------------*/ 740 741 /*@C 742 PetscLogEventRegister - Registers an event name for logging operations 743 744 Not Collective 745 746 Input Parameters: 747 + name - The name associated with the event 748 - classid - The classid associated to the class for this event, obtain either with 749 `PetscClassIdRegister()` or use a predefined one such as `KSP_CLASSID`, `SNES_CLASSID`, the predefined ones 750 are only available in C code 751 752 Output Parameter: 753 . event - The event id for use with `PetscLogEventBegin()` and `PetscLogEventEnd()`. 754 755 Example Usage: 756 .vb 757 PetscLogEvent USER_EVENT; 758 PetscClassId classid; 759 PetscLogDouble user_event_flops; 760 PetscClassIdRegister("class name",&classid); 761 PetscLogEventRegister("User event name",classid,&USER_EVENT); 762 PetscLogEventBegin(USER_EVENT,0,0,0,0); 763 [code segment to monitor] 764 PetscLogFlops(user_event_flops); 765 PetscLogEventEnd(USER_EVENT,0,0,0,0); 766 .ve 767 768 Level: intermediate 769 770 Notes: 771 PETSc automatically logs library events if the code has been 772 configured with --with-log (which is the default) and 773 -log_view or -log_all is specified. `PetscLogEventRegister()` is 774 intended for logging user events to supplement this PETSc 775 information. 776 777 PETSc can gather data for use with the utilities Jumpshot 778 (part of the MPICH distribution). If PETSc has been compiled 779 with flag -DPETSC_HAVE_MPE (MPE is an additional utility within 780 MPICH), the user can employ another command line option, -log_mpe, 781 to create a logfile, "mpe.log", which can be visualized 782 Jumpshot. 783 784 The classid is associated with each event so that classes of events 785 can be disabled simultaneously, such as all matrix events. The user 786 can either use an existing classid, such as `MAT_CLASSID`, or create 787 their own as shown in the example. 788 789 If an existing event with the same name exists, its event handle is 790 returned instead of creating a new event. 791 792 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogFlops()`, 793 `PetscLogEventActivate()`, `PetscLogEventDeactivate()`, `PetscClassIdRegister()` 794 @*/ 795 PetscErrorCode PetscLogEventRegister(const char name[], PetscClassId classid, PetscLogEvent *event) 796 { 797 PetscStageLog stageLog; 798 int stage; 799 800 PetscFunctionBegin; 801 *event = PETSC_DECIDE; 802 PetscCall(PetscLogGetStageLog(&stageLog)); 803 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 804 if (*event > 0) PetscFunctionReturn(PETSC_SUCCESS); 805 PetscCall(PetscEventRegLogRegister(stageLog->eventLog, name, classid, event)); 806 for (stage = 0; stage < stageLog->numStages; stage++) { 807 PetscCall(PetscEventPerfLogEnsureSize(stageLog->stageInfo[stage].eventLog, stageLog->eventLog->numEvents)); 808 PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 809 } 810 PetscFunctionReturn(PETSC_SUCCESS); 811 } 812 813 /*@ 814 PetscLogEventSetCollective - Indicates that a particular event is collective. 815 816 Not Collective 817 818 Input Parameters: 819 + event - The event id 820 - collective - Boolean flag indicating whether a particular event is collective 821 822 Level: developer 823 824 Notes: 825 New events returned from `PetscLogEventRegister()` are collective by default. 826 827 Collective events are handled specially if the -log_sync is used. In that case the logging saves information about 828 two parts of the event; the time for all the MPI ranks to synchronize and then the time for the actual computation/communication 829 to be performed. This option is useful to debug imbalance within the computations or communications 830 831 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogEventRegister()` 832 @*/ 833 PetscErrorCode PetscLogEventSetCollective(PetscLogEvent event, PetscBool collective) 834 { 835 PetscStageLog stageLog; 836 PetscEventRegLog eventRegLog; 837 838 PetscFunctionBegin; 839 PetscCall(PetscLogGetStageLog(&stageLog)); 840 PetscCall(PetscStageLogGetEventRegLog(stageLog, &eventRegLog)); 841 PetscCheck(event >= 0 && event <= eventRegLog->numEvents, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid event id"); 842 eventRegLog->eventInfo[event].collective = collective; 843 PetscFunctionReturn(PETSC_SUCCESS); 844 } 845 846 /*@ 847 PetscLogEventIncludeClass - Activates event logging for a PETSc object class in every stage. 848 849 Not Collective 850 851 Input Parameter: 852 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 853 854 Level: developer 855 856 .seealso: [](ch_profiling), `PetscLogEventActivateClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 857 @*/ 858 PetscErrorCode PetscLogEventIncludeClass(PetscClassId classid) 859 { 860 PetscStageLog stageLog; 861 int stage; 862 863 PetscFunctionBegin; 864 PetscCall(PetscLogGetStageLog(&stageLog)); 865 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 866 PetscFunctionReturn(PETSC_SUCCESS); 867 } 868 869 /*@ 870 PetscLogEventExcludeClass - Deactivates event logging for a PETSc object class in every stage. 871 872 Not Collective 873 874 Input Parameter: 875 . classid - The object class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 876 877 Level: developer 878 879 Note: 880 If a class is excluded then events associated with that class are not logged. 881 882 .seealso: [](ch_profiling), `PetscLogEventDeactivateClass()`, `PetscLogEventActivateClass()`, `PetscLogEventDeactivate()`, `PetscLogEventActivate()` 883 @*/ 884 PetscErrorCode PetscLogEventExcludeClass(PetscClassId classid) 885 { 886 PetscStageLog stageLog; 887 int stage; 888 889 PetscFunctionBegin; 890 PetscCall(PetscLogGetStageLog(&stageLog)); 891 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 892 PetscFunctionReturn(PETSC_SUCCESS); 893 } 894 895 /*@ 896 PetscLogEventActivate - Indicates that a particular event should be logged. 897 898 Not Collective 899 900 Input Parameter: 901 . event - The event id 902 903 Example Usage: 904 .vb 905 PetscLogEventDeactivate(VEC_SetValues); 906 [code where you do not want to log VecSetValues()] 907 PetscLogEventActivate(VEC_SetValues); 908 [code where you do want to log VecSetValues()] 909 .ve 910 911 Level: advanced 912 913 Note: 914 The event may be either a pre-defined PETSc event (found in include/petsclog.h) 915 or an event number obtained with `PetscLogEventRegister()`. 916 917 .seealso: [](ch_profiling), `PlogEventDeactivate()`, `PlogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 918 @*/ 919 PetscErrorCode PetscLogEventActivate(PetscLogEvent event) 920 { 921 PetscStageLog stageLog; 922 int stage; 923 924 PetscFunctionBegin; 925 PetscCall(PetscLogGetStageLog(&stageLog)); 926 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 927 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 928 PetscFunctionReturn(PETSC_SUCCESS); 929 } 930 931 /*@ 932 PetscLogEventDeactivate - Indicates that a particular event should not be logged. 933 934 Not Collective 935 936 Input Parameter: 937 . event - The event id 938 939 Example Usage: 940 .vb 941 PetscLogEventDeactivate(VEC_SetValues); 942 [code where you do not want to log VecSetValues()] 943 PetscLogEventActivate(VEC_SetValues); 944 [code where you do want to log VecSetValues()] 945 .ve 946 947 Level: advanced 948 949 Note: 950 The event may be either a pre-defined PETSc event (found in 951 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 952 953 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()`, `PetscLogEventDeactivatePop()` 954 @*/ 955 PetscErrorCode PetscLogEventDeactivate(PetscLogEvent event) 956 { 957 PetscStageLog stageLog; 958 int stage; 959 960 PetscFunctionBegin; 961 PetscCall(PetscLogGetStageLog(&stageLog)); 962 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 963 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 964 PetscFunctionReturn(PETSC_SUCCESS); 965 } 966 967 /*@ 968 PetscLogEventDeactivatePush - Indicates that a particular event should not be logged until `PetscLogEventDeactivatePop()` is called 969 970 Not Collective 971 972 Input Parameter: 973 . event - The event id 974 975 Example Usage: 976 .vb 977 PetscLogEventDeactivatePush(VEC_SetValues); 978 [code where you do not want to log VecSetValues()] 979 PetscLogEventDeactivatePop(VEC_SetValues); 980 [code where you do want to log VecSetValues()] 981 .ve 982 983 Level: advanced 984 985 Note: 986 The event may be either a pre-defined PETSc event (found in 987 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 988 989 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePop()`, `PetscLogEventDeactivate()` 990 @*/ 991 PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent event) 992 { 993 PetscStageLog stageLog; 994 int stage; 995 996 PetscFunctionBegin; 997 PetscCall(PetscLogGetStageLog(&stageLog)); 998 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 999 PetscCall(PetscEventPerfLogDeactivatePush(stageLog->stageInfo[stage].eventLog, event)); 1000 PetscFunctionReturn(PETSC_SUCCESS); 1001 } 1002 1003 /*@ 1004 PetscLogEventDeactivatePop - Indicates that a particular event should again be logged after the logging was turned off with `PetscLogEventDeactivatePush()` 1005 1006 Not Collective 1007 1008 Input Parameter: 1009 . event - The event id 1010 1011 Example Usage: 1012 .vb 1013 PetscLogEventDeactivatePush(VEC_SetValues); 1014 [code where you do not want to log VecSetValues()] 1015 PetscLogEventDeactivatePop(VEC_SetValues); 1016 [code where you do want to log VecSetValues()] 1017 .ve 1018 1019 Level: advanced 1020 1021 Note: 1022 The event may be either a pre-defined PETSc event (found in 1023 include/petsclog.h) or an event number obtained with `PetscLogEventRegister()`). 1024 1025 .seealso: [](ch_profiling), `PetscLogEventActivate()`, `PetscLogEventDeactivatePush()` 1026 @*/ 1027 PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent event) 1028 { 1029 PetscStageLog stageLog; 1030 int stage; 1031 1032 PetscFunctionBegin; 1033 PetscCall(PetscLogGetStageLog(&stageLog)); 1034 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1035 PetscCall(PetscEventPerfLogDeactivatePop(stageLog->stageInfo[stage].eventLog, event)); 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } 1038 1039 /*@ 1040 PetscLogEventSetActiveAll - Turns on logging of all events 1041 1042 Not Collective 1043 1044 Input Parameters: 1045 + event - The event id 1046 - isActive - The activity flag determining whether the event is logged 1047 1048 Level: advanced 1049 1050 .seealso: [](ch_profiling), `PlogEventActivate()`, `PlogEventDeactivate()` 1051 @*/ 1052 PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent event, PetscBool isActive) 1053 { 1054 PetscStageLog stageLog; 1055 int stage; 1056 1057 PetscFunctionBegin; 1058 PetscCall(PetscLogGetStageLog(&stageLog)); 1059 for (stage = 0; stage < stageLog->numStages; stage++) { 1060 if (isActive) { 1061 PetscCall(PetscEventPerfLogActivate(stageLog->stageInfo[stage].eventLog, event)); 1062 } else { 1063 PetscCall(PetscEventPerfLogDeactivate(stageLog->stageInfo[stage].eventLog, event)); 1064 } 1065 } 1066 PetscFunctionReturn(PETSC_SUCCESS); 1067 } 1068 1069 /*@ 1070 PetscLogEventActivateClass - Activates event logging for a PETSc object class for the current stage 1071 1072 Not Collective 1073 1074 Input Parameter: 1075 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 1076 1077 Level: developer 1078 1079 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventDeactivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1080 @*/ 1081 PetscErrorCode PetscLogEventActivateClass(PetscClassId classid) 1082 { 1083 PetscStageLog stageLog; 1084 int stage; 1085 1086 PetscFunctionBegin; 1087 PetscCall(PetscLogGetStageLog(&stageLog)); 1088 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1089 PetscCall(PetscEventPerfLogActivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1090 PetscFunctionReturn(PETSC_SUCCESS); 1091 } 1092 1093 /*@ 1094 PetscLogEventDeactivateClass - Deactivates event logging for a PETSc object class for the current stage 1095 1096 Not Collective 1097 1098 Input Parameter: 1099 . classid - The event class, for example `MAT_CLASSID`, `SNES_CLASSID`, etc. 1100 1101 Level: developer 1102 1103 .seealso: [](ch_profiling), `PetscLogEventIncludeClass()`, `PetscLogEventExcludeClass()`, `PetscLogEventActivateClass()`, `PetscLogEventActivate()`, `PetscLogEventDeactivate()` 1104 @*/ 1105 PetscErrorCode PetscLogEventDeactivateClass(PetscClassId classid) 1106 { 1107 PetscStageLog stageLog; 1108 int stage; 1109 1110 PetscFunctionBegin; 1111 PetscCall(PetscLogGetStageLog(&stageLog)); 1112 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1113 PetscCall(PetscEventPerfLogDeactivateClass(stageLog->stageInfo[stage].eventLog, stageLog->eventLog, classid)); 1114 PetscFunctionReturn(PETSC_SUCCESS); 1115 } 1116 1117 /*MC 1118 PetscLogEventSync - Synchronizes the beginning of a user event. 1119 1120 Synopsis: 1121 #include <petsclog.h> 1122 PetscErrorCode PetscLogEventSync(int e, MPI_Comm comm) 1123 1124 Collective 1125 1126 Input Parameters: 1127 + e - integer associated with the event obtained from PetscLogEventRegister() 1128 - comm - an MPI communicator 1129 1130 Example Usage: 1131 .vb 1132 PetscLogEvent USER_EVENT; 1133 1134 PetscLogEventRegister("User event", 0, &USER_EVENT); 1135 PetscLogEventSync(USER_EVENT, PETSC_COMM_WORLD); 1136 PetscLogEventBegin(USER_EVENT, 0, 0, 0, 0); 1137 [code segment to monitor] 1138 PetscLogEventEnd(USER_EVENT, 0, 0, 0 , 0); 1139 .ve 1140 1141 Level: developer 1142 1143 Note: 1144 This routine should be called only if there is not a `PetscObject` available to pass to 1145 `PetscLogEventBegin()`. 1146 1147 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()` 1148 M*/ 1149 1150 /*MC 1151 PetscLogEventBegin - Logs the beginning of a user event. 1152 1153 Synopsis: 1154 #include <petsclog.h> 1155 PetscErrorCode PetscLogEventBegin(int e, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 1156 1157 Not Collective 1158 1159 Input Parameters: 1160 + e - integer associated with the event obtained from PetscLogEventRegister() 1161 - o1,o2,o3,o4 - objects associated with the event, or 0 1162 1163 Fortran Synopsis: 1164 void PetscLogEventBegin(int e, PetscErrorCode ierr) 1165 1166 Example Usage: 1167 .vb 1168 PetscLogEvent USER_EVENT; 1169 1170 PetscLogDouble user_event_flops; 1171 PetscLogEventRegister("User event",0, &USER_EVENT); 1172 PetscLogEventBegin(USER_EVENT, 0, 0, 0, 0); 1173 [code segment to monitor] 1174 PetscLogFlops(user_event_flops); 1175 PetscLogEventEnd(USER_EVENT, 0, 0, 0, 0); 1176 .ve 1177 1178 Level: intermediate 1179 1180 Developer Note: 1181 `PetscLogEventBegin()` and `PetscLogEventBegin()` return error codes instead of explicitly 1182 handling the errors that occur in the macro directly because other packages that use this 1183 macros have used them in their own functions or methods that do not return error codes and it 1184 would be disruptive to change the current behavior. 1185 1186 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventEnd()`, `PetscLogFlops()` 1187 M*/ 1188 1189 /*MC 1190 PetscLogEventEnd - Log the end of a user event. 1191 1192 Synopsis: 1193 #include <petsclog.h> 1194 PetscErrorCode PetscLogEventEnd(int e, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 1195 1196 Not Collective 1197 1198 Input Parameters: 1199 + e - integer associated with the event obtained with PetscLogEventRegister() 1200 - o1,o2,o3,o4 - objects associated with the event, or 0 1201 1202 Fortran Synopsis: 1203 void PetscLogEventEnd(int e, PetscErrorCode ierr) 1204 1205 Example Usage: 1206 .vb 1207 PetscLogEvent USER_EVENT; 1208 1209 PetscLogDouble user_event_flops; 1210 PetscLogEventRegister("User event", 0, &USER_EVENT); 1211 PetscLogEventBegin(USER_EVENT, 0, 0, 0, 0); 1212 [code segment to monitor] 1213 PetscLogFlops(user_event_flops); 1214 PetscLogEventEnd(USER_EVENT, 0, 0, 0, 0); 1215 .ve 1216 1217 Level: intermediate 1218 1219 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogFlops()` 1220 M*/ 1221 1222 /*@C 1223 PetscLogEventGetId - Returns the event id when given the event name. 1224 1225 Not Collective 1226 1227 Input Parameter: 1228 . name - The event name 1229 1230 Output Parameter: 1231 . event - The event, or -1 if no event with that name exists 1232 1233 Level: intermediate 1234 1235 .seealso: [](ch_profiling), `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStageGetId()` 1236 @*/ 1237 PetscErrorCode PetscLogEventGetId(const char name[], PetscLogEvent *event) 1238 { 1239 PetscStageLog stageLog; 1240 1241 PetscFunctionBegin; 1242 PetscCall(PetscLogGetStageLog(&stageLog)); 1243 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, event)); 1244 PetscFunctionReturn(PETSC_SUCCESS); 1245 } 1246 1247 PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent event) 1248 { 1249 PetscFunctionBegin; 1250 if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPush(current_log_event_stack, event)); 1251 PetscFunctionReturn(PETSC_SUCCESS); 1252 } 1253 1254 PetscErrorCode PetscLogPopCurrentEvent_Internal(void) 1255 { 1256 PetscFunctionBegin; 1257 if (!PetscDefined(HAVE_THREADSAFETY)) PetscCall(PetscIntStackPop(current_log_event_stack, NULL)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 PetscErrorCode PetscLogGetCurrentEvent_Internal(PetscLogEvent *event) 1262 { 1263 PetscBool empty; 1264 1265 PetscFunctionBegin; 1266 PetscAssertPointer(event, 1); 1267 *event = PETSC_DECIDE; 1268 PetscCall(PetscIntStackEmpty(current_log_event_stack, &empty)); 1269 if (!empty) PetscCall(PetscIntStackTop(current_log_event_stack, event)); 1270 PetscFunctionReturn(PETSC_SUCCESS); 1271 } 1272 1273 PetscErrorCode PetscLogEventPause_Internal(PetscLogEvent event) 1274 { 1275 PetscFunctionBegin; 1276 if (event != PETSC_DECIDE) PetscCall(PetscLogEventEnd(event, NULL, NULL, NULL, NULL)); 1277 PetscFunctionReturn(PETSC_SUCCESS); 1278 } 1279 1280 PetscErrorCode PetscLogEventResume_Internal(PetscLogEvent event) 1281 { 1282 PetscStageLog stageLog; 1283 PetscEventPerfLog eventLog; 1284 int stage; 1285 1286 PetscFunctionBegin; 1287 if (event == PETSC_DECIDE) PetscFunctionReturn(PETSC_SUCCESS); 1288 PetscCall(PetscLogEventBegin(event, NULL, NULL, NULL, NULL)); 1289 PetscCall(PetscLogGetStageLog(&stageLog)); 1290 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 1291 PetscCall(PetscStageLogGetEventPerfLog(stageLog, stage, &eventLog)); 1292 eventLog->eventInfo[event].count--; 1293 PetscFunctionReturn(PETSC_SUCCESS); 1294 } 1295 1296 /*------------------------------------------------ Class Functions --------------------------------------------------*/ 1297 1298 /*MC 1299 PetscLogObjectCreate - Log the creation of a `PetscObject` 1300 1301 Synopsis: 1302 #include <petsclog.h> 1303 PetscErrorCode PetscLogObjectCreate(PetscObject h) 1304 1305 Not Collective 1306 1307 Input Parameters: 1308 . h - A `PetscObject` 1309 1310 Level: developer 1311 1312 Developer Note: 1313 Called internally by PETSc when creating objects: users do not need to call this directly. 1314 1315 .seealso: [](ch_profiling), `PetscLogObjectDestroy()` 1316 M*/ 1317 1318 /*MC 1319 PetscLogObjectDestroy - Logs the destruction of a `PetscObject` 1320 1321 Synopsis: 1322 #include <petsclog.h> 1323 PetscErrorCode PetscLogObjectDestroy(PetscObject h) 1324 1325 Not Collective 1326 1327 Input Parameters: 1328 . h - A `PetscObject` 1329 1330 Level: developer 1331 1332 Developer Note: 1333 Called internally by PETSc when destroying objects: users do not need to call this directly. 1334 1335 .seealso: [](ch_profiling), `PetscLogObjectCreate()` 1336 M*/ 1337 1338 /*------------------------------------------------ Output Functions -------------------------------------------------*/ 1339 /*@C 1340 PetscLogDump - Dumps logs of objects to a file. This file is intended to 1341 be read by bin/petscview. This program no longer exists. 1342 1343 Collective on `PETSC_COMM_WORLD` 1344 1345 Input Parameter: 1346 . sname - an optional file name 1347 1348 Example Usage: 1349 .vb 1350 PetscInitialize(...); 1351 PetscLogDefaultBegin(); or PetscLogAllBegin(); 1352 ... code ... 1353 PetscLogDump(filename); 1354 PetscFinalize(); 1355 .ve 1356 1357 Level: advanced 1358 1359 Note: 1360 The default file name is Log.<rank> where <rank> is the MPI process rank. If no name is specified, 1361 this file will be used. 1362 1363 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogView()` 1364 @*/ 1365 PetscErrorCode PetscLogDump(const char sname[]) 1366 { 1367 PetscStageLog stageLog; 1368 PetscEventPerfInfo *eventInfo; 1369 FILE *fd; 1370 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 1371 PetscLogDouble flops, _TotalTime; 1372 PetscMPIInt rank; 1373 int action, object, curStage; 1374 PetscLogEvent event; 1375 1376 PetscFunctionBegin; 1377 /* Calculate the total elapsed time */ 1378 PetscCall(PetscTime(&_TotalTime)); 1379 _TotalTime -= petsc_BaseTime; 1380 /* Open log file */ 1381 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1382 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 1383 PetscCall(PetscFixFilename(file, fname)); 1384 PetscCall(PetscFOpen(PETSC_COMM_WORLD, fname, "w", &fd)); 1385 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 1386 /* Output totals */ 1387 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 1388 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Clock Resolution %g\n", 0.0)); 1389 /* Output actions */ 1390 if (petsc_logActions) { 1391 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Actions accomplished %d\n", petsc_numActions)); 1392 for (action = 0; action < petsc_numActions; action++) { 1393 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%g %d %d %d %d %d %d %g %g %g\n", petsc_actions[action].time, petsc_actions[action].action, (int)petsc_actions[action].event, (int)petsc_actions[action].classid, petsc_actions[action].id1, 1394 petsc_actions[action].id2, petsc_actions[action].id3, petsc_actions[action].flops, petsc_actions[action].mem, petsc_actions[action].maxmem)); 1395 } 1396 } 1397 /* Output objects */ 1398 if (petsc_logObjects) { 1399 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Objects created %d destroyed %d\n", petsc_numObjects, petsc_numObjectsDestroyed)); 1400 for (object = 0; object < petsc_numObjects; object++) { 1401 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Parent ID: %d Memory: %d\n", petsc_objects[object].parent, (int)petsc_objects[object].mem)); 1402 if (!petsc_objects[object].name[0]) { 1403 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Name\n")); 1404 } else { 1405 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Name: %s\n", petsc_objects[object].name)); 1406 } 1407 if (petsc_objects[object].info[0] != 0) { 1408 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "No Info\n")); 1409 } else { 1410 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Info: %s\n", petsc_objects[object].info)); 1411 } 1412 } 1413 } 1414 /* Output events */ 1415 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "Event log:\n")); 1416 PetscCall(PetscLogGetStageLog(&stageLog)); 1417 PetscCall(PetscIntStackTop(stageLog->stack, &curStage)); 1418 eventInfo = stageLog->stageInfo[curStage].eventLog->eventInfo; 1419 for (event = 0; event < stageLog->stageInfo[curStage].eventLog->numEvents; event++) { 1420 if (eventInfo[event].time != 0.0) flops = eventInfo[event].flops / eventInfo[event].time; 1421 else flops = 0.0; 1422 PetscCall(PetscFPrintf(PETSC_COMM_WORLD, fd, "%d %16d %16g %16g %16g\n", event, eventInfo[event].count, eventInfo[event].flops, eventInfo[event].time, flops)); 1423 } 1424 PetscCall(PetscFClose(PETSC_COMM_WORLD, fd)); 1425 PetscFunctionReturn(PETSC_SUCCESS); 1426 } 1427 1428 /* 1429 PetscLogView_Detailed - Each process prints the times for its own events 1430 1431 */ 1432 static PetscErrorCode PetscLogView_Detailed(PetscViewer viewer) 1433 { 1434 PetscStageLog stageLog; 1435 PetscEventPerfInfo *eventInfo = NULL, *stageInfo = NULL; 1436 PetscLogDouble locTotalTime, numRed, maxMem; 1437 int numStages, numEvents, stage, event; 1438 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1439 PetscMPIInt rank, size; 1440 1441 PetscFunctionBegin; 1442 PetscCallMPI(MPI_Comm_size(comm, &size)); 1443 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1444 /* Must preserve reduction count before we go on */ 1445 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1446 /* Get the total elapsed time */ 1447 PetscCall(PetscTime(&locTotalTime)); 1448 locTotalTime -= petsc_BaseTime; 1449 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 1450 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 1451 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 1452 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 1453 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 1454 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 1455 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 1456 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1457 PetscCall(PetscLogGetStageLog(&stageLog)); 1458 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1459 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1460 for (stage = 0; stage < numStages; stage++) { 1461 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stageLog->stageInfo[stage].name)); 1462 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stageLog->stageInfo[stage].name)); 1463 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1464 for (event = 0; event < numEvents; event++) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name)); 1465 } 1466 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1467 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1468 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1469 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 1470 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1471 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1472 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1473 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, petsc_numObjects)); 1474 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1475 PetscCall(PetscViewerFlush(viewer)); 1476 for (stage = 0; stage < numStages; stage++) { 1477 stageInfo = &stageLog->stageInfo[stage].perfInfo; 1478 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, 1479 stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1480 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1481 for (event = 0; event < numEvents; event++) { 1482 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1483 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", 1484 stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, 1485 eventInfo->flops)); 1486 if (eventInfo->dof[0] >= 0.) { 1487 PetscInt d, e; 1488 1489 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1490 for (d = 0; d < 8; ++d) { 1491 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1492 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1493 } 1494 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1495 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1496 for (e = 0; e < 8; ++e) { 1497 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1498 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1499 } 1500 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1501 } 1502 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1503 } 1504 } 1505 PetscCall(PetscViewerFlush(viewer)); 1506 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1507 PetscFunctionReturn(PETSC_SUCCESS); 1508 } 1509 1510 /* 1511 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1512 */ 1513 static PetscErrorCode PetscLogView_CSV(PetscViewer viewer) 1514 { 1515 PetscStageLog stageLog; 1516 PetscEventPerfInfo *eventInfo = NULL; 1517 PetscLogDouble locTotalTime, maxMem; 1518 int numStages, numEvents, stage, event; 1519 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1520 PetscMPIInt rank, size; 1521 1522 PetscFunctionBegin; 1523 PetscCallMPI(MPI_Comm_size(comm, &size)); 1524 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1525 /* Must preserve reduction count before we go on */ 1526 /* Get the total elapsed time */ 1527 PetscCall(PetscTime(&locTotalTime)); 1528 locTotalTime -= petsc_BaseTime; 1529 PetscCall(PetscLogGetStageLog(&stageLog)); 1530 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1531 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1532 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1533 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1534 PetscCall(PetscViewerFlush(viewer)); 1535 for (stage = 0; stage < numStages; stage++) { 1536 PetscEventPerfInfo *stageInfo = &stageLog->stageInfo[stage].perfInfo; 1537 1538 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stageLog->stageInfo[stage].name, rank, stageInfo->time, stageInfo->numMessages, stageInfo->messageLength, stageInfo->numReductions, stageInfo->flops)); 1539 PetscCallMPI(MPI_Allreduce(&stageLog->stageInfo[stage].eventLog->numEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1540 for (event = 0; event < numEvents; event++) { 1541 eventInfo = &stageLog->stageInfo[stage].eventLog->eventInfo[event]; 1542 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stageLog->stageInfo[stage].name, stageLog->eventLog->eventInfo[event].name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, 1543 eventInfo->numReductions, eventInfo->flops)); 1544 if (eventInfo->dof[0] >= 0.) { 1545 PetscInt d, e; 1546 1547 for (d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1548 for (e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1549 } 1550 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1551 } 1552 } 1553 PetscCall(PetscViewerFlush(viewer)); 1554 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1555 PetscFunctionReturn(PETSC_SUCCESS); 1556 } 1557 1558 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1559 { 1560 PetscFunctionBegin; 1561 if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS); 1562 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1563 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1564 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1565 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1566 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1567 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1568 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1569 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1570 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1571 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1572 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1573 PetscFunctionReturn(PETSC_SUCCESS); 1574 } 1575 1576 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1577 { 1578 PetscFunctionBegin; 1579 if (PetscDefined(USE_DEBUG)) { 1580 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1581 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1582 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1583 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1584 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1585 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1586 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1587 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1588 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1589 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1590 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1591 } 1592 PetscFunctionReturn(PETSC_SUCCESS); 1593 } 1594 1595 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1596 { 1597 #if defined(PETSC_HAVE_DEVICE) 1598 PetscMPIInt size; 1599 PetscBool deviceInitialized = PETSC_FALSE; 1600 1601 PetscFunctionBegin; 1602 PetscCallMPI(MPI_Comm_size(comm, &size)); 1603 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1604 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1605 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1606 deviceInitialized = PETSC_TRUE; 1607 break; 1608 } 1609 } 1610 /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1611 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS); 1612 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1613 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1614 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1615 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1616 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1617 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1618 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1619 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1620 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1621 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1622 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1623 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1624 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1625 PetscFunctionReturn(PETSC_SUCCESS); 1626 #else 1627 (void)comm; 1628 (void)fd; 1629 return PETSC_SUCCESS; 1630 #endif 1631 } 1632 1633 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1634 { 1635 #if defined(PETSC_HAVE_DEVICE) 1636 PetscFunctionBegin; 1637 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS); 1638 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1639 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1640 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1641 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1642 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1643 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1644 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1645 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1646 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1647 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1648 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1649 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1650 PetscFunctionReturn(PETSC_SUCCESS); 1651 #else 1652 (void)comm; 1653 (void)fd; 1654 return PETSC_SUCCESS; 1655 #endif 1656 } 1657 1658 static PetscErrorCode PetscLogView_Default(PetscViewer viewer) 1659 { 1660 FILE *fd; 1661 PetscLogDouble zero = 0.0; 1662 PetscStageLog stageLog; 1663 PetscStageInfo *stageInfo = NULL; 1664 PetscEventPerfInfo *eventInfo = NULL; 1665 PetscClassPerfInfo *classInfo; 1666 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1667 const char *name; 1668 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1669 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1670 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1671 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1672 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1673 PetscLogDouble min, max, tot, ratio, avg, x, y; 1674 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1675 #if defined(PETSC_HAVE_DEVICE) 1676 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1677 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1678 #endif 1679 PetscMPIInt minC, maxC; 1680 PetscMPIInt size, rank; 1681 PetscBool *localStageUsed, *stageUsed; 1682 PetscBool *localStageVisible, *stageVisible; 1683 int numStages, localNumEvents, numEvents; 1684 int stage, oclass; 1685 PetscLogEvent event; 1686 char version[256]; 1687 MPI_Comm comm; 1688 #if defined(PETSC_HAVE_DEVICE) 1689 PetscLogEvent eventid; 1690 PetscInt64 nas = 0x7FF0000000000002; 1691 #endif 1692 1693 PetscFunctionBegin; 1694 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1695 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1696 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1697 PetscCallMPI(MPI_Comm_size(comm, &size)); 1698 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1699 /* Get the total elapsed time */ 1700 PetscCall(PetscTime(&locTotalTime)); 1701 locTotalTime -= petsc_BaseTime; 1702 1703 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1704 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1705 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1706 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1707 PetscCall(PetscLogViewWarnSync(comm, fd)); 1708 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1709 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1710 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1711 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1712 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1713 PetscCall(PetscGetUserName(username, sizeof(username))); 1714 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1715 PetscCall(PetscGetDate(date, sizeof(date))); 1716 PetscCall(PetscGetVersion(version, sizeof(version))); 1717 if (size == 1) { 1718 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1719 } else { 1720 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1721 } 1722 #if defined(PETSC_HAVE_OPENMP) 1723 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1724 #endif 1725 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1726 1727 /* Must preserve reduction count before we go on */ 1728 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1729 1730 /* Calculate summary information */ 1731 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1732 /* Time */ 1733 PetscCall(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1734 PetscCall(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1735 PetscCall(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1736 avg = tot / ((PetscLogDouble)size); 1737 if (min != 0.0) ratio = max / min; 1738 else ratio = 0.0; 1739 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1740 TotalTime = tot; 1741 /* Objects */ 1742 avg = (PetscLogDouble)petsc_numObjects; 1743 PetscCall(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1744 PetscCall(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1745 PetscCall(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1746 avg = tot / ((PetscLogDouble)size); 1747 if (min != 0.0) ratio = max / min; 1748 else ratio = 0.0; 1749 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1750 /* Flops */ 1751 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1752 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1753 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1754 avg = tot / ((PetscLogDouble)size); 1755 if (min != 0.0) ratio = max / min; 1756 else ratio = 0.0; 1757 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1758 TotalFlops = tot; 1759 /* Flops/sec -- Must talk to Barry here */ 1760 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1761 else flops = 0.0; 1762 PetscCall(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1763 PetscCall(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1764 PetscCall(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1765 avg = tot / ((PetscLogDouble)size); 1766 if (min != 0.0) ratio = max / min; 1767 else ratio = 0.0; 1768 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1769 /* Memory */ 1770 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1771 if (mem > 0.0) { 1772 PetscCall(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1773 PetscCall(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1774 PetscCall(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1775 avg = tot / ((PetscLogDouble)size); 1776 if (min != 0.0) ratio = max / min; 1777 else ratio = 0.0; 1778 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1779 } 1780 /* Messages */ 1781 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1782 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1783 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1784 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1785 avg = tot / ((PetscLogDouble)size); 1786 if (min != 0.0) ratio = max / min; 1787 else ratio = 0.0; 1788 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1789 numMessages = tot; 1790 /* Message Lengths */ 1791 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1792 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1793 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1794 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1795 if (numMessages != 0) avg = tot / numMessages; 1796 else avg = 0.0; 1797 if (min != 0.0) ratio = max / min; 1798 else ratio = 0.0; 1799 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1800 messageLength = tot; 1801 /* Reductions */ 1802 PetscCall(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1803 PetscCall(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1804 PetscCall(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1805 if (min != 0.0) ratio = max / min; 1806 else ratio = 0.0; 1807 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1808 numReductions = red; /* wrong because uses count from process zero */ 1809 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1810 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1811 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1812 1813 /* Get total number of stages -- 1814 Currently, a single processor can register more stages than another, but stages must all be registered in order. 1815 We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. 1816 This seems best accomplished by assoicating a communicator with each stage. 1817 */ 1818 PetscCall(PetscLogGetStageLog(&stageLog)); 1819 PetscCallMPI(MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm)); 1820 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1821 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1822 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1823 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1824 if (numStages > 0) { 1825 stageInfo = stageLog->stageInfo; 1826 for (stage = 0; stage < numStages; stage++) { 1827 if (stage < stageLog->numStages) { 1828 localStageUsed[stage] = stageInfo[stage].used; 1829 localStageVisible[stage] = stageInfo[stage].perfInfo.visible; 1830 } else { 1831 localStageUsed[stage] = PETSC_FALSE; 1832 localStageVisible[stage] = PETSC_TRUE; 1833 } 1834 } 1835 PetscCall(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1836 PetscCall(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1837 for (stage = 0; stage < numStages; stage++) { 1838 if (stageUsed[stage]) { 1839 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1840 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1841 break; 1842 } 1843 } 1844 for (stage = 0; stage < numStages; stage++) { 1845 if (!stageUsed[stage]) continue; 1846 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1847 if (localStageUsed[stage]) { 1848 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1849 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1850 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1851 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1852 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1853 name = stageInfo[stage].name; 1854 } else { 1855 PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1856 PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1857 PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1858 PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1859 PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1860 name = ""; 1861 } 1862 mess *= 0.5; 1863 messLen *= 0.5; 1864 red /= size; 1865 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1866 else fracTime = 0.0; 1867 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1868 else fracFlops = 0.0; 1869 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1870 if (numMessages != 0.0) fracMessages = mess / numMessages; 1871 else fracMessages = 0.0; 1872 if (mess != 0.0) avgMessLen = messLen / mess; 1873 else avgMessLen = 0.0; 1874 if (messageLength != 0.0) fracLength = messLen / messageLength; 1875 else fracLength = 0.0; 1876 if (numReductions != 0.0) fracReductions = red / numReductions; 1877 else fracReductions = 0.0; 1878 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1879 } 1880 } 1881 1882 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1883 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1884 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1885 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1886 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1887 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1888 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1889 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1890 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1891 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1892 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1893 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1894 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1895 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1896 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1897 if (PetscLogMemory) { 1898 PetscCall(PetscFPrintf(comm, fd, " Memory usage is summed over all MPI processes, it is given in mega-bytes\n")); 1899 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1900 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1901 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1902 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1903 } 1904 #if defined(PETSC_HAVE_DEVICE) 1905 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1906 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1907 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1908 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1909 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1910 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1911 #endif 1912 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1913 1914 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1915 1916 /* Report events */ 1917 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1918 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1919 #if defined(PETSC_HAVE_DEVICE) 1920 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1921 #endif 1922 PetscCall(PetscFPrintf(comm, fd, "\n")); 1923 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1924 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1925 #if defined(PETSC_HAVE_DEVICE) 1926 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1927 #endif 1928 PetscCall(PetscFPrintf(comm, fd, "\n")); 1929 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1930 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1931 #if defined(PETSC_HAVE_DEVICE) 1932 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1933 #endif 1934 PetscCall(PetscFPrintf(comm, fd, "\n")); 1935 1936 #if defined(PETSC_HAVE_DEVICE) 1937 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1938 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TAOSolve", &TAO_Solve)); 1939 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "TSStep", &TS_Step)); 1940 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "SNESSolve", &SNES_Solve)); 1941 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, "KSPSolve", &KSP_Solve)); 1942 #endif 1943 1944 /* Problem: The stage name will not show up unless the stage executed on proc 1 */ 1945 for (stage = 0; stage < numStages; stage++) { 1946 if (!stageVisible[stage]) continue; 1947 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1948 if (localStageUsed[stage]) { 1949 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 1950 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1951 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1952 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1953 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1954 PetscCall(MPIU_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1955 } else { 1956 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 1957 PetscCall(MPIU_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1958 PetscCall(MPIU_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1959 PetscCall(MPIU_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1960 PetscCall(MPIU_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1961 PetscCall(MPIU_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1962 } 1963 mess *= 0.5; 1964 messLen *= 0.5; 1965 red /= size; 1966 1967 /* Get total number of events in this stage -- 1968 Currently, a single processor can register more events than another, but events must all be registered in order, 1969 just like stages. We can removed this requirement if necessary by having a global event numbering and indirection 1970 on the event ID. This seems best accomplished by associating a communicator with each stage. 1971 1972 Problem: If the event did not happen on proc 1, its name will not be available. 1973 Problem: Event visibility is not implemented 1974 */ 1975 if (localStageUsed[stage]) { 1976 eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; 1977 localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; 1978 } else localNumEvents = 0; 1979 PetscCallMPI(MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm)); 1980 for (event = 0; event < numEvents; event++) { 1981 /* CANNOT use MPI_Allreduce() since it might fail the line number check */ 1982 if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { 1983 if ((eventInfo[event].count > 0) && (eventInfo[event].time > 0.0)) flopr = eventInfo[event].flops; 1984 else flopr = 0.0; 1985 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1986 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1987 PetscCall(MPIU_Allreduce(&eventInfo[event].flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1988 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1989 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1990 PetscCall(MPIU_Allreduce(&eventInfo[event].time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1991 PetscCall(MPIU_Allreduce(&eventInfo[event].numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1992 PetscCall(MPIU_Allreduce(&eventInfo[event].messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1993 PetscCall(MPIU_Allreduce(&eventInfo[event].numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1994 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1995 PetscCallMPI(MPI_Allreduce(&eventInfo[event].count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1996 if (PetscLogMemory) { 1997 PetscCall(MPIU_Allreduce(&eventInfo[event].memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1998 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1999 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2000 PetscCall(MPIU_Allreduce(&eventInfo[event].mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2001 } 2002 #if defined(PETSC_HAVE_DEVICE) 2003 PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2004 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2005 PetscCall(MPIU_Allreduce(&eventInfo[event].CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2006 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2007 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2008 PetscCall(MPIU_Allreduce(&eventInfo[event].GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 2009 #endif 2010 name = stageLog->eventLog->eventInfo[event].name; 2011 } else { 2012 int ierr = 0; 2013 2014 flopr = 0.0; 2015 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 2016 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 2017 PetscCall(MPIU_Allreduce(&zero, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2018 PetscCall(MPIU_Allreduce(&zero, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 2019 PetscCall(MPIU_Allreduce(&zero, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 2020 PetscCall(MPIU_Allreduce(&zero, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2021 PetscCall(MPIU_Allreduce(&zero, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2022 PetscCall(MPIU_Allreduce(&zero, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2023 PetscCall(MPIU_Allreduce(&zero, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2024 PetscCallMPI(MPI_Allreduce(&ierr, &minC, 1, MPI_INT, MPI_MIN, comm)); 2025 PetscCallMPI(MPI_Allreduce(&ierr, &maxC, 1, MPI_INT, MPI_MAX, comm)); 2026 if (PetscLogMemory) { 2027 PetscCall(MPIU_Allreduce(&zero, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2028 PetscCall(MPIU_Allreduce(&zero, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2029 PetscCall(MPIU_Allreduce(&zero, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2030 PetscCall(MPIU_Allreduce(&zero, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2031 } 2032 #if defined(PETSC_HAVE_DEVICE) 2033 PetscCall(MPIU_Allreduce(&zero, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2034 PetscCall(MPIU_Allreduce(&zero, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2035 PetscCall(MPIU_Allreduce(&zero, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2036 PetscCall(MPIU_Allreduce(&zero, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2037 PetscCall(MPIU_Allreduce(&zero, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 2038 PetscCall(MPIU_Allreduce(&zero, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 2039 #endif 2040 name = ""; 2041 } 2042 if (mint < 0.0) { 2043 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, name)); 2044 mint = 0; 2045 } 2046 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, name); 2047 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 2048 #if defined(PETSC_HAVE_DEVICE) 2049 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 2050 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 2051 PetscCall(PetscEventRegLogGetEvent(stageLog->eventLog, name, &eventid)); 2052 if (eventid != SNES_Solve && eventid != KSP_Solve && eventid != TS_Step && eventid != TAO_Solve) { 2053 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 2054 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 2055 } 2056 } 2057 #endif 2058 totm *= 0.5; 2059 totml *= 0.5; 2060 totr /= size; 2061 2062 if (maxC != 0) { 2063 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 2064 else ratC = 0.0; 2065 if (mint != 0.0) ratt = maxt / mint; 2066 else ratt = 0.0; 2067 if (minf != 0.0) ratf = maxf / minf; 2068 else ratf = 0.0; 2069 if (TotalTime != 0.0) fracTime = tott / TotalTime; 2070 else fracTime = 0.0; 2071 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 2072 else fracFlops = 0.0; 2073 if (stageTime != 0.0) fracStageTime = tott / stageTime; 2074 else fracStageTime = 0.0; 2075 if (flops != 0.0) fracStageFlops = totf / flops; 2076 else fracStageFlops = 0.0; 2077 if (numMessages != 0.0) fracMess = totm / numMessages; 2078 else fracMess = 0.0; 2079 if (messageLength != 0.0) fracMessLen = totml / messageLength; 2080 else fracMessLen = 0.0; 2081 if (numReductions != 0.0) fracRed = totr / numReductions; 2082 else fracRed = 0.0; 2083 if (mess != 0.0) fracStageMess = totm / mess; 2084 else fracStageMess = 0.0; 2085 if (messLen != 0.0) fracStageMessLen = totml / messLen; 2086 else fracStageMessLen = 0.0; 2087 if (red != 0.0) fracStageRed = totr / red; 2088 else fracStageRed = 0.0; 2089 if (totm != 0.0) totml /= totm; 2090 else totml = 0.0; 2091 if (maxt != 0.0) flopr = totf / maxt; 2092 else flopr = 0.0; 2093 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 2094 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 2095 else 2096 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 2097 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 2098 #if defined(PETSC_HAVE_DEVICE) 2099 if (totf != 0.0) fracgflops = gflops / totf; 2100 else fracgflops = 0.0; 2101 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 2102 else gflopr = 0.0; 2103 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 2104 #endif 2105 PetscCall(PetscFPrintf(comm, fd, "\n")); 2106 } 2107 } 2108 } 2109 2110 /* Memory usage and object creation */ 2111 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 2112 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 2113 #if defined(PETSC_HAVE_DEVICE) 2114 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 2115 #endif 2116 PetscCall(PetscFPrintf(comm, fd, "\n")); 2117 PetscCall(PetscFPrintf(comm, fd, "\n")); 2118 2119 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 2120 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 2121 stats for stages local to processor sets. 2122 */ 2123 /* We should figure out the longest object name here (now 20 characters) */ 2124 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 2125 for (stage = 0; stage < numStages; stage++) { 2126 if (localStageUsed[stage]) { 2127 classInfo = stageLog->stageInfo[stage].classLog->classInfo; 2128 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stageInfo[stage].name)); 2129 for (oclass = 0; oclass < stageLog->stageInfo[stage].classLog->numClasses; oclass++) { 2130 if ((classInfo[oclass].creations > 0) || (classInfo[oclass].destructions > 0)) { 2131 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", stageLog->classLog->classInfo[oclass].name, classInfo[oclass].creations, classInfo[oclass].destructions)); 2132 } 2133 } 2134 } else { 2135 if (!localStageVisible[stage]) continue; 2136 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage)); 2137 } 2138 } 2139 2140 PetscCall(PetscFree(localStageUsed)); 2141 PetscCall(PetscFree(stageUsed)); 2142 PetscCall(PetscFree(localStageVisible)); 2143 PetscCall(PetscFree(stageVisible)); 2144 2145 /* Information unrelated to this particular run */ 2146 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 2147 PetscCall(PetscTime(&y)); 2148 PetscCall(PetscTime(&x)); 2149 PetscCall(PetscTime(&y)); 2150 PetscCall(PetscTime(&y)); 2151 PetscCall(PetscTime(&y)); 2152 PetscCall(PetscTime(&y)); 2153 PetscCall(PetscTime(&y)); 2154 PetscCall(PetscTime(&y)); 2155 PetscCall(PetscTime(&y)); 2156 PetscCall(PetscTime(&y)); 2157 PetscCall(PetscTime(&y)); 2158 PetscCall(PetscTime(&y)); 2159 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 2160 /* MPI information */ 2161 if (size > 1) { 2162 MPI_Status status; 2163 PetscMPIInt tag; 2164 MPI_Comm newcomm; 2165 2166 PetscCallMPI(MPI_Barrier(comm)); 2167 PetscCall(PetscTime(&x)); 2168 PetscCallMPI(MPI_Barrier(comm)); 2169 PetscCallMPI(MPI_Barrier(comm)); 2170 PetscCallMPI(MPI_Barrier(comm)); 2171 PetscCallMPI(MPI_Barrier(comm)); 2172 PetscCallMPI(MPI_Barrier(comm)); 2173 PetscCall(PetscTime(&y)); 2174 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 2175 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 2176 PetscCallMPI(MPI_Barrier(comm)); 2177 if (rank) { 2178 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 2179 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 2180 } else { 2181 PetscCall(PetscTime(&x)); 2182 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 2183 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 2184 PetscCall(PetscTime(&y)); 2185 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 2186 } 2187 PetscCall(PetscCommDestroy(&newcomm)); 2188 } 2189 PetscCall(PetscOptionsView(NULL, viewer)); 2190 2191 /* Machine and compile information */ 2192 #if defined(PETSC_USE_FORTRAN_KERNELS) 2193 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 2194 #else 2195 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 2196 #endif 2197 #if defined(PETSC_USE_64BIT_INDICES) 2198 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64-bit PetscInt\n")); 2199 #elif defined(PETSC_USE___FLOAT128) 2200 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32-bit PetscInt\n")); 2201 #endif 2202 #if defined(PETSC_USE_REAL_SINGLE) 2203 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 2204 #elif defined(PETSC_USE___FLOAT128) 2205 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 2206 #endif 2207 #if defined(PETSC_USE_REAL_MAT_SINGLE) 2208 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 2209 #else 2210 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 2211 #endif 2212 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 2213 2214 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 2215 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 2216 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 2217 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 2218 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 2219 2220 /* Cleanup */ 2221 PetscCall(PetscFPrintf(comm, fd, "\n")); 2222 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 2223 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 2224 PetscCall(PetscFPTrapPop()); 2225 PetscFunctionReturn(PETSC_SUCCESS); 2226 } 2227 2228 /*@C 2229 PetscLogView - Prints a summary of the logging. 2230 2231 Collective over MPI_Comm 2232 2233 Input Parameter: 2234 . viewer - an ASCII viewer 2235 2236 Options Database Keys: 2237 + -log_view [:filename] - Prints summary of log information 2238 . -log_view :filename.py:ascii_info_detail - Saves logging information from each process as a Python file 2239 . -log_view :filename.xml:ascii_xml - Saves a summary of the logging information in a nested format (see below for how to view it) 2240 . -log_view :filename.txt:ascii_flamegraph - Saves logging information in a format suitable for visualising as a Flame Graph (see below for how to view it) 2241 . -log_view_memory - Also display memory usage in each event 2242 . -log_view_gpu_time - Also display time in each event for GPU kernels (Note this may slow the computation) 2243 . -log_all - Saves a file Log.rank for each MPI rank with details of each step of the computation 2244 - -log_trace [filename] - Displays a trace of what each process is doing 2245 2246 Level: beginner 2247 2248 Notes: 2249 It is possible to control the logging programmatically but we recommend using the options database approach whenever possible 2250 By default the summary is printed to stdout. 2251 2252 Before calling this routine you must have called either PetscLogDefaultBegin() or PetscLogNestedBegin() 2253 2254 If PETSc is configured with --with-logging=0 then this functionality is not available 2255 2256 To view the nested XML format filename.xml first copy ${PETSC_DIR}/share/petsc/xml/performance_xml2html.xsl to the current 2257 directory then open filename.xml with your browser. Specific notes for certain browsers 2258 $ Firefox and Internet explorer - simply open the file 2259 $ Google Chrome - you must start up Chrome with the option --allow-file-access-from-files 2260 $ Safari - see https://ccm.net/faq/36342-safari-how-to-enable-local-file-access 2261 or one can use the package http://xmlsoft.org/XSLT/xsltproc2.html to translate the xml file to html and then open it with 2262 your browser. 2263 Alternatively, use the script ${PETSC_DIR}/lib/petsc/bin/petsc-performance-view to automatically open a new browser 2264 window and render the XML log file contents. 2265 2266 The nested XML format was kindly donated by Koos Huijssen and Christiaan M. Klaij MARITIME RESEARCH INSTITUTE NETHERLANDS 2267 2268 The Flame Graph output can be visualised using either the original Flame Graph script (https://github.com/brendangregg/FlameGraph) 2269 or using speedscope (https://www.speedscope.app). 2270 Old XML profiles may be converted into this format using the script ${PETSC_DIR}/lib/petsc/bin/xml2flamegraph.py. 2271 2272 .seealso: [](ch_profiling), `PetscLogDefaultBegin()`, `PetscLogDump()` 2273 @*/ 2274 PetscErrorCode PetscLogView(PetscViewer viewer) 2275 { 2276 PetscBool isascii; 2277 PetscViewerFormat format; 2278 int stage, lastStage; 2279 PetscStageLog stageLog; 2280 2281 PetscFunctionBegin; 2282 PetscCheck(PetscLogPLB, PETSC_COMM_SELF, PETSC_ERR_SUP, "Must use -log_view or PetscLogDefaultBegin() before calling this routine"); 2283 /* Pop off any stages the user forgot to remove */ 2284 lastStage = 0; 2285 PetscCall(PetscLogGetStageLog(&stageLog)); 2286 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2287 while (stage >= 0) { 2288 lastStage = stage; 2289 PetscCall(PetscStageLogPop(stageLog)); 2290 PetscCall(PetscStageLogGetCurrent(stageLog, &stage)); 2291 } 2292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2293 PetscCheck(isascii, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Currently can only view logging to ASCII"); 2294 PetscCall(PetscViewerGetFormat(viewer, &format)); 2295 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 2296 PetscCall(PetscLogView_Default(viewer)); 2297 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 2298 PetscCall(PetscLogView_Detailed(viewer)); 2299 } else if (format == PETSC_VIEWER_ASCII_CSV) { 2300 PetscCall(PetscLogView_CSV(viewer)); 2301 } else if (format == PETSC_VIEWER_ASCII_XML) { 2302 PetscCall(PetscLogView_Nested(viewer)); 2303 } else if (format == PETSC_VIEWER_ASCII_FLAMEGRAPH) { 2304 PetscCall(PetscLogView_Flamegraph(viewer)); 2305 } 2306 PetscCall(PetscStageLogPush(stageLog, lastStage)); 2307 PetscFunctionReturn(PETSC_SUCCESS); 2308 } 2309 2310 /*@C 2311 PetscLogViewFromOptions - Processes command line options to determine if/how a `PetscLog` is to be viewed. 2312 2313 Collective on `PETSC_COMM_WORLD` 2314 2315 Level: developer 2316 2317 .seealso: [](ch_profiling), `PetscLogView()` 2318 @*/ 2319 PetscErrorCode PetscLogViewFromOptions(void) 2320 { 2321 PetscViewer viewer; 2322 PetscBool flg; 2323 PetscViewerFormat format; 2324 2325 PetscFunctionBegin; 2326 PetscCall(PetscOptionsGetViewer(PETSC_COMM_WORLD, NULL, NULL, "-log_view", &viewer, &format, &flg)); 2327 if (flg) { 2328 PetscCall(PetscViewerPushFormat(viewer, format)); 2329 PetscCall(PetscLogView(viewer)); 2330 PetscCall(PetscViewerPopFormat(viewer)); 2331 PetscCall(PetscViewerDestroy(&viewer)); 2332 } 2333 PetscFunctionReturn(PETSC_SUCCESS); 2334 } 2335 2336 /*----------------------------------------------- Counter Functions -------------------------------------------------*/ 2337 /*@C 2338 PetscGetFlops - Returns the number of flops used on this processor 2339 since the program began. 2340 2341 Not Collective 2342 2343 Output Parameter: 2344 . flops - number of floating point operations 2345 2346 Level: intermediate 2347 2348 Notes: 2349 A global counter logs all PETSc flop counts. The user can use 2350 `PetscLogFlops()` to increment this counter to include flops for the 2351 application code. 2352 2353 A separate counter `PetscLogGPUFlops()` logs the flops that occur on any GPU associated with this MPI rank 2354 2355 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscTime()`, `PetscLogFlops()` 2356 @*/ 2357 PetscErrorCode PetscGetFlops(PetscLogDouble *flops) 2358 { 2359 PetscFunctionBegin; 2360 *flops = petsc_TotalFlops; 2361 PetscFunctionReturn(PETSC_SUCCESS); 2362 } 2363 2364 /*@C 2365 PetscLogObjectState - Record information about an object with the default log handler 2366 2367 Not Collective 2368 2369 Input Parameters: 2370 + obj - the `PetscObject` 2371 . format - a printf-style format string 2372 - ... - printf arguments to format 2373 2374 Level: developer 2375 2376 .seealso: [](ch_profiling), `PetscLogObjectCreate()`, `PetscLogObjectDestroy()` 2377 @*/ 2378 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2379 { 2380 size_t fullLength; 2381 va_list Argp; 2382 2383 PetscFunctionBegin; 2384 if (!petsc_logObjects) PetscFunctionReturn(PETSC_SUCCESS); 2385 va_start(Argp, format); 2386 PetscCall(PetscVSNPrintf(petsc_objects[obj->id].info, 64, format, &fullLength, Argp)); 2387 va_end(Argp); 2388 PetscFunctionReturn(PETSC_SUCCESS); 2389 } 2390 2391 /*MC 2392 PetscLogFlops - Adds floating point operations to the global counter. 2393 2394 Synopsis: 2395 #include <petsclog.h> 2396 PetscErrorCode PetscLogFlops(PetscLogDouble f) 2397 2398 Not Collective 2399 2400 Input Parameter: 2401 . f - flop counter 2402 2403 Example Usage: 2404 .vb 2405 PetscLogEvent USER_EVENT; 2406 2407 PetscLogEventRegister("User event", 0, &USER_EVENT); 2408 PetscLogEventBegin(USER_EVENT, 0, 0, 0, 0); 2409 [code segment to monitor] 2410 PetscLogFlops(user_flops) 2411 PetscLogEventEnd(USER_EVENT, 0, 0, 0, 0); 2412 .ve 2413 2414 Level: intermediate 2415 2416 Note: 2417 A global counter logs all PETSc flop counts. The user can use PetscLogFlops() to increment 2418 this counter to include flops for the application code. 2419 2420 .seealso: [](ch_profiling), `PetscLogGPUFlops()`, `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscGetFlops()` 2421 M*/ 2422 2423 /*MC 2424 PetscPreLoadBegin - Begin a segment of code that may be preloaded (run twice) to get accurate 2425 timings 2426 2427 Synopsis: 2428 #include <petsclog.h> 2429 void PetscPreLoadBegin(PetscBool flag, char *name); 2430 2431 Not Collective 2432 2433 Input Parameters: 2434 + flag - `PETSC_TRUE` to run twice, `PETSC_FALSE` to run once, may be overridden with command 2435 line option `-preload true|false` 2436 - name - name of first stage (lines of code timed separately with `-log_view`) to be preloaded 2437 2438 Example Usage: 2439 .vb 2440 PetscPreLoadBegin(PETSC_TRUE, "first stage"); 2441 // lines of code 2442 PetscPreLoadStage("second stage"); 2443 // lines of code 2444 PetscPreLoadEnd(); 2445 .ve 2446 2447 Level: intermediate 2448 2449 Note: 2450 Only works in C/C++, not Fortran 2451 2452 Flags available within the macro\: 2453 + PetscPreLoadingUsed - `PETSC_TRUE` if we are or have done preloading 2454 . PetscPreLoadingOn - `PETSC_TRUE` if it is CURRENTLY doing preload 2455 . PetscPreLoadIt - `0` for the first computation (with preloading turned off it is only 2456 `0`) `1` for the second 2457 - PetscPreLoadMax - number of times it will do the computation, only one when preloading is 2458 turned on 2459 2460 The first two variables are available throughout the program, the second two only between the 2461 `PetscPreLoadBegin()` and `PetscPreLoadEnd()` 2462 2463 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadEnd()`, `PetscPreLoadStage()` 2464 M*/ 2465 2466 /*MC 2467 PetscPreLoadEnd - End a segment of code that may be preloaded (run twice) to get accurate 2468 timings 2469 2470 Synopsis: 2471 #include <petsclog.h> 2472 void PetscPreLoadEnd(void); 2473 2474 Not Collective 2475 2476 Example Usage: 2477 .vb 2478 PetscPreLoadBegin(PETSC_TRUE, "first stage"); 2479 // lines of code 2480 PetscPreLoadStage("second stage"); 2481 // lines of code 2482 PetscPreLoadEnd(); 2483 .ve 2484 2485 Level: intermediate 2486 2487 Note: 2488 Only works in C/C++ not fortran 2489 2490 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadStage()` 2491 M*/ 2492 2493 /*MC 2494 PetscPreLoadStage - Start a new segment of code to be timed separately to get accurate timings 2495 2496 Synopsis: 2497 #include <petsclog.h> 2498 void PetscPreLoadStage(char *name); 2499 2500 Not Collective 2501 2502 Example Usage: 2503 .vb 2504 PetscPreLoadBegin(PETSC_TRUE,"first stage"); 2505 // lines of code 2506 PetscPreLoadStage("second stage"); 2507 // lines of code 2508 PetscPreLoadEnd(); 2509 .ve 2510 2511 Level: intermediate 2512 2513 Note: 2514 Only works in C/C++ not fortran 2515 2516 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscPreLoadBegin()`, `PetscPreLoadEnd()` 2517 M*/ 2518 2519 #if PetscDefined(HAVE_DEVICE) 2520 #include <petsc/private/deviceimpl.h> 2521 2522 PetscBool PetscLogGpuTimeFlag = PETSC_FALSE; 2523 2524 /* 2525 This cannot be called by users between PetscInitialize() and PetscFinalize() at any random location in the code 2526 because it will result in timing results that cannot be interpreted. 2527 */ 2528 static PetscErrorCode PetscLogGpuTime_Off(void) 2529 { 2530 PetscLogGpuTimeFlag = PETSC_FALSE; 2531 return PETSC_SUCCESS; 2532 } 2533 2534 /*@C 2535 PetscLogGpuTime - turn on the logging of GPU time for GPU kernels 2536 2537 Options Database Key: 2538 . -log_view_gpu_time - provide the GPU times in the `-log_view` output 2539 2540 Level: advanced 2541 2542 Notes: 2543 Turning on the timing of the GPU kernels can slow down the entire computation and should only 2544 be used when studying the performance of operations on GPU such as vector operations and 2545 matrix-vector operations. 2546 2547 This routine should only be called once near the beginning of the program. Once it is started 2548 it cannot be turned off. 2549 2550 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTimeBegin()` 2551 @*/ 2552 PetscErrorCode PetscLogGpuTime(void) 2553 { 2554 if (!PetscLogGpuTimeFlag) PetscCall(PetscRegisterFinalize(PetscLogGpuTime_Off)); 2555 PetscLogGpuTimeFlag = PETSC_TRUE; 2556 return PETSC_SUCCESS; 2557 } 2558 2559 /*@C 2560 PetscLogGpuTimeBegin - Start timer for device 2561 2562 Level: intermediate 2563 2564 Notes: 2565 When CUDA or HIP is enabled, the timer is run on the GPU, it is a separate logging of time 2566 devoted to GPU computations (excluding kernel launch times). 2567 2568 When CUDA or HIP is not available, the timer is run on the CPU, it is a separate logging of 2569 time devoted to GPU computations (including kernel launch times). 2570 2571 There is no need to call WaitForCUDA() or WaitForHIP() between `PetscLogGpuTimeBegin()` and 2572 `PetscLogGpuTimeEnd()` 2573 2574 This timer should NOT include times for data transfers between the GPU and CPU, nor setup 2575 actions such as allocating space. 2576 2577 The regular logging captures the time for data transfers and any CPU activities during the 2578 event. It is used to compute the flop rate on the GPU as it is actively engaged in running a 2579 kernel. 2580 2581 Developer Notes: 2582 The GPU event timer captures the execution time of all the kernels launched in the default 2583 stream by the CPU between `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()`. 2584 2585 `PetscLogGpuTimeBegin()` and `PetsLogGpuTimeEnd()` insert the begin and end events into the 2586 default stream (stream 0). The device will record a time stamp for the event when it reaches 2587 that event in the stream. The function xxxEventSynchronize() is called in 2588 `PetsLogGpuTimeEnd()` to block CPU execution, but not continued GPU execution, until the 2589 timer event is recorded. 2590 2591 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeEnd()`, `PetscLogGpuTime()` 2592 @*/ 2593 PetscErrorCode PetscLogGpuTimeBegin(void) 2594 { 2595 PetscFunctionBegin; 2596 if (!PetscLogPLB || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS); 2597 if (PetscDefined(HAVE_DEVICE)) { 2598 PetscDeviceContext dctx; 2599 2600 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2601 PetscCall(PetscDeviceContextBeginTimer_Internal(dctx)); 2602 } else { 2603 PetscCall(PetscTimeSubtract(&petsc_gtime)); 2604 } 2605 PetscFunctionReturn(PETSC_SUCCESS); 2606 } 2607 2608 /*@C 2609 PetscLogGpuTimeEnd - Stop timer for device 2610 2611 Level: intermediate 2612 2613 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()`, `PetscLogGpuTimeBegin()` 2614 @*/ 2615 PetscErrorCode PetscLogGpuTimeEnd(void) 2616 { 2617 PetscFunctionBegin; 2618 if (!PetscLogPLE || !PetscLogGpuTimeFlag) PetscFunctionReturn(PETSC_SUCCESS); 2619 if (PetscDefined(HAVE_DEVICE)) { 2620 PetscDeviceContext dctx; 2621 PetscLogDouble elapsed; 2622 2623 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 2624 PetscCall(PetscDeviceContextEndTimer_Internal(dctx, &elapsed)); 2625 petsc_gtime += (elapsed / 1000.0); 2626 } else { 2627 PetscCall(PetscTimeAdd(&petsc_gtime)); 2628 } 2629 PetscFunctionReturn(PETSC_SUCCESS); 2630 } 2631 2632 #endif /* end of PETSC_HAVE_DEVICE */ 2633 2634 #else /* end of -DPETSC_USE_LOG section */ 2635 2636 PetscErrorCode PetscLogObjectState(PetscObject obj, const char format[], ...) 2637 { 2638 PetscFunctionBegin; 2639 PetscFunctionReturn(PETSC_SUCCESS); 2640 } 2641 2642 #endif /* PETSC_USE_LOG*/ 2643 2644 PetscClassId PETSC_LARGEST_CLASSID = PETSC_SMALLEST_CLASSID; 2645 PetscClassId PETSC_OBJECT_CLASSID = 0; 2646 2647 /*@C 2648 PetscClassIdRegister - Registers a new class name for objects and logging operations in an application code. 2649 2650 Not Collective 2651 2652 Input Parameter: 2653 . name - The class name 2654 2655 Output Parameter: 2656 . oclass - The class id or classid 2657 2658 Level: developer 2659 2660 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2661 @*/ 2662 PetscErrorCode PetscClassIdRegister(const char name[], PetscClassId *oclass) 2663 { 2664 #if defined(PETSC_USE_LOG) 2665 PetscStageLog stageLog; 2666 PetscInt stage; 2667 #endif 2668 2669 PetscFunctionBegin; 2670 *oclass = ++PETSC_LARGEST_CLASSID; 2671 #if defined(PETSC_USE_LOG) 2672 PetscCall(PetscLogGetStageLog(&stageLog)); 2673 PetscCall(PetscClassRegLogRegister(stageLog->classLog, name, *oclass)); 2674 for (stage = 0; stage < stageLog->numStages; stage++) PetscCall(PetscClassPerfLogEnsureSize(stageLog->stageInfo[stage].classLog, stageLog->classLog->numClasses)); 2675 #endif 2676 PetscFunctionReturn(PETSC_SUCCESS); 2677 } 2678 2679 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_MPE) 2680 #include <mpe.h> 2681 2682 PetscBool PetscBeganMPE = PETSC_FALSE; 2683 2684 PETSC_INTERN PetscErrorCode PetscLogEventBeginMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2685 PETSC_INTERN PetscErrorCode PetscLogEventEndMPE(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 2686 2687 /*@C 2688 PetscLogMPEBegin - Turns on MPE logging of events. This creates large log files and slows the 2689 program down. 2690 2691 Collective over `PETSC_COMM_WORLD` 2692 2693 Options Database Key: 2694 . -log_mpe - Prints extensive log information 2695 2696 Level: advanced 2697 2698 Note: 2699 A related routine is `PetscLogDefaultBegin()` (with the options key `-log_view`), which is 2700 intended for production runs since it logs only flop rates and object creation (and should 2701 not significantly slow the programs). 2702 2703 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogDefaultBegin()`, `PetscLogAllBegin()`, `PetscLogEventActivate()`, 2704 `PetscLogEventDeactivate()` 2705 @*/ 2706 PetscErrorCode PetscLogMPEBegin(void) 2707 { 2708 PetscFunctionBegin; 2709 /* Do MPE initialization */ 2710 if (!MPE_Initialized_logging()) { /* This function exists in mpich 1.1.2 and higher */ 2711 PetscCall(PetscInfo(0, "Initializing MPE.\n")); 2712 PetscCall(MPE_Init_log()); 2713 2714 PetscBeganMPE = PETSC_TRUE; 2715 } else { 2716 PetscCall(PetscInfo(0, "MPE already initialized. Not attempting to reinitialize.\n")); 2717 } 2718 PetscCall(PetscLogSet(PetscLogEventBeginMPE, PetscLogEventEndMPE)); 2719 PetscFunctionReturn(PETSC_SUCCESS); 2720 } 2721 2722 /*@C 2723 PetscLogMPEDump - Dumps the MPE logging info to file for later use with Jumpshot. 2724 2725 Input Parameter: 2726 . sname - The filename to dump to 2727 2728 Collective over `PETSC_COMM_WORLD` 2729 2730 Level: advanced 2731 2732 .seealso: [](ch_profiling), `PetscLogDump()`, `PetscLogAllBegin()`, `PetscLogMPEBegin()` 2733 @*/ 2734 PetscErrorCode PetscLogMPEDump(const char sname[]) 2735 { 2736 char name[PETSC_MAX_PATH_LEN]; 2737 2738 PetscFunctionBegin; 2739 if (PetscBeganMPE) { 2740 PetscCall(PetscInfo(0, "Finalizing MPE.\n")); 2741 if (sname) { 2742 PetscCall(PetscStrncpy(name, sname, sizeof(name))); 2743 } else { 2744 PetscCall(PetscGetProgramName(name, sizeof(name))); 2745 } 2746 PetscCall(MPE_Finish_log(name)); 2747 } else { 2748 PetscCall(PetscInfo(0, "Not finalizing MPE (not started by PETSc).\n")); 2749 } 2750 PetscFunctionReturn(PETSC_SUCCESS); 2751 } 2752 2753 #define PETSC_RGB_COLORS_MAX 39 2754 static const char *PetscLogMPERGBColors[PETSC_RGB_COLORS_MAX] = {"OliveDrab: ", "BlueViolet: ", "CadetBlue: ", "CornflowerBlue: ", "DarkGoldenrod: ", "DarkGreen: ", "DarkKhaki: ", "DarkOliveGreen: ", 2755 "DarkOrange: ", "DarkOrchid: ", "DarkSeaGreen: ", "DarkSlateGray: ", "DarkTurquoise: ", "DeepPink: ", "DarkKhaki: ", "DimGray: ", 2756 "DodgerBlue: ", "GreenYellow: ", "HotPink: ", "IndianRed: ", "LavenderBlush: ", "LawnGreen: ", "LemonChiffon: ", "LightCoral: ", 2757 "LightCyan: ", "LightPink: ", "LightSalmon: ", "LightSlateGray: ", "LightYellow: ", "LimeGreen: ", "MediumPurple: ", "MediumSeaGreen: ", 2758 "MediumSlateBlue:", "MidnightBlue: ", "MintCream: ", "MistyRose: ", "NavajoWhite: ", "NavyBlue: ", "OliveDrab: "}; 2759 2760 /*@C 2761 PetscLogMPEGetRGBColor - This routine returns a rgb color useable with `PetscLogEventRegister()` 2762 2763 Not collective. Maybe it should be? 2764 2765 Output Parameter: 2766 . str - character string representing the color 2767 2768 Level: developer 2769 2770 .seealso: [](ch_profiling), `PetscLogEventRegister()` 2771 @*/ 2772 PetscErrorCode PetscLogMPEGetRGBColor(const char *str[]) 2773 { 2774 static int idx = 0; 2775 2776 PetscFunctionBegin; 2777 *str = PetscLogMPERGBColors[idx]; 2778 idx = (idx + 1) % PETSC_RGB_COLORS_MAX; 2779 PetscFunctionReturn(PETSC_SUCCESS); 2780 } 2781 2782 #endif /* PETSC_USE_LOG && PETSC_HAVE_MPE */ 2783