1 #include <petscviewer.h> 2 #include <petscdevice.h> 3 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 4 #include <petsc/private/loghandlerimpl.h> 5 #include <petsc/private/deviceimpl.h> 6 #include <petscconfiginfo.h> 7 #include <petscmachineinfo.h> 8 #include "logdefault.h" 9 10 static PetscErrorCode PetscEventPerfInfoInit(PetscEventPerfInfo *eventInfo) 11 { 12 PetscFunctionBegin; 13 PetscCall(PetscMemzero(eventInfo, sizeof(*eventInfo))); 14 eventInfo->visible = PETSC_TRUE; 15 eventInfo->id = -1; 16 eventInfo->dof[0] = -1.0; 17 eventInfo->dof[1] = -1.0; 18 eventInfo->dof[2] = -1.0; 19 eventInfo->dof[3] = -1.0; 20 eventInfo->dof[4] = -1.0; 21 eventInfo->dof[5] = -1.0; 22 eventInfo->dof[6] = -1.0; 23 eventInfo->dof[7] = -1.0; 24 eventInfo->errors[0] = -1.0; 25 eventInfo->errors[1] = -1.0; 26 eventInfo->errors[2] = -1.0; 27 eventInfo->errors[3] = -1.0; 28 eventInfo->errors[4] = -1.0; 29 eventInfo->errors[5] = -1.0; 30 eventInfo->errors[6] = -1.0; 31 eventInfo->errors[7] = -1.0; 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 static PetscErrorCode PetscEventPerfInfoTic_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool resume) 36 { 37 PetscFunctionBegin; 38 if (resume) { 39 eventInfo->timeTmp -= time; 40 eventInfo->flopsTmp -= petsc_TotalFlops_th; 41 } else { 42 eventInfo->timeTmp = -time; 43 eventInfo->flopsTmp = -petsc_TotalFlops_th; 44 } 45 eventInfo->numMessages -= petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th; 46 eventInfo->messageLength -= petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len_th + petsc_send_len_th; 47 eventInfo->numReductions -= petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th; 48 #if defined(PETSC_HAVE_DEVICE) 49 eventInfo->CpuToGpuCount -= petsc_ctog_ct_th; 50 eventInfo->GpuToCpuCount -= petsc_gtoc_ct_th; 51 eventInfo->CpuToGpuSize -= petsc_ctog_sz_th; 52 eventInfo->GpuToCpuSize -= petsc_gtoc_sz_th; 53 eventInfo->GpuFlops -= petsc_gflops_th; 54 eventInfo->GpuTime -= petsc_gtime; 55 #endif 56 if (logMemory) { 57 PetscLogDouble usage; 58 PetscCall(PetscMemoryGetCurrentUsage(&usage)); 59 eventInfo->memIncrease -= usage; 60 PetscCall(PetscMallocGetCurrentUsage(&usage)); 61 eventInfo->mallocSpace -= usage; 62 PetscCall(PetscMallocGetMaximumUsage(&usage)); 63 eventInfo->mallocIncrease -= usage; 64 PetscCall(PetscMallocPushMaximumUsage(event)); 65 } 66 PetscFunctionReturn(PETSC_SUCCESS); 67 } 68 69 static PetscErrorCode PetscEventPerfInfoTic(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 70 { 71 PetscFunctionBegin; 72 PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 73 PetscFunctionReturn(PETSC_SUCCESS); 74 } 75 76 static PetscErrorCode PetscEventPerfInfoResume(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 77 { 78 PetscFunctionBegin; 79 PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_TRUE)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode PetscEventPerfInfoToc_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool pause) 84 { 85 PetscFunctionBegin; 86 eventInfo->timeTmp += time; 87 eventInfo->flopsTmp += petsc_TotalFlops_th; 88 if (!pause) { 89 eventInfo->time += eventInfo->timeTmp; 90 eventInfo->time2 += eventInfo->timeTmp * eventInfo->timeTmp; 91 eventInfo->flops += eventInfo->flopsTmp; 92 eventInfo->flops2 += eventInfo->flopsTmp * eventInfo->flopsTmp; 93 } 94 eventInfo->numMessages += petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th; 95 eventInfo->messageLength += petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len + petsc_send_len_th; 96 eventInfo->numReductions += petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th; 97 #if defined(PETSC_HAVE_DEVICE) 98 eventInfo->CpuToGpuCount += petsc_ctog_ct_th; 99 eventInfo->GpuToCpuCount += petsc_gtoc_ct_th; 100 eventInfo->CpuToGpuSize += petsc_ctog_sz_th; 101 eventInfo->GpuToCpuSize += petsc_gtoc_sz_th; 102 eventInfo->GpuFlops += petsc_gflops_th; 103 eventInfo->GpuTime += petsc_gtime; 104 #endif 105 if (logMemory) { 106 PetscLogDouble usage, musage; 107 PetscCall(PetscMemoryGetCurrentUsage(&usage)); /* the comments below match the column labels printed in PetscLogView_Default() */ 108 eventInfo->memIncrease += usage; /* RMI */ 109 PetscCall(PetscMallocGetCurrentUsage(&usage)); 110 eventInfo->mallocSpace += usage; /* Malloc */ 111 PetscCall(PetscMallocPopMaximumUsage((int)event, &musage)); 112 eventInfo->mallocIncreaseEvent = PetscMax(musage - usage, eventInfo->mallocIncreaseEvent); /* EMalloc */ 113 PetscCall(PetscMallocGetMaximumUsage(&usage)); 114 eventInfo->mallocIncrease += usage; /* MMalloc */ 115 } 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 static PetscErrorCode PetscEventPerfInfoToc(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 120 { 121 PetscFunctionBegin; 122 PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 123 PetscFunctionReturn(PETSC_SUCCESS); 124 } 125 126 static PetscErrorCode PetscEventPerfInfoPause(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 127 { 128 PetscFunctionBegin; 129 PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 static PetscErrorCode PetscEventPerfInfoAdd_Internal(const PetscEventPerfInfo *eventInfo, PetscEventPerfInfo *outInfo) 134 { 135 PetscFunctionBegin; 136 outInfo->count += eventInfo->count; 137 outInfo->time += eventInfo->time; 138 outInfo->time2 += eventInfo->time2; 139 outInfo->flops += eventInfo->flops; 140 outInfo->flops2 += eventInfo->flops2; 141 outInfo->numMessages += eventInfo->numMessages; 142 outInfo->messageLength += eventInfo->messageLength; 143 outInfo->numReductions += eventInfo->numReductions; 144 #if defined(PETSC_HAVE_DEVICE) 145 outInfo->CpuToGpuCount += eventInfo->CpuToGpuCount; 146 outInfo->GpuToCpuCount += eventInfo->GpuToCpuCount; 147 outInfo->CpuToGpuSize += eventInfo->CpuToGpuSize; 148 outInfo->GpuToCpuSize += eventInfo->GpuToCpuSize; 149 outInfo->GpuFlops += eventInfo->GpuFlops; 150 outInfo->GpuTime += eventInfo->GpuTime; 151 #endif 152 outInfo->memIncrease += eventInfo->memIncrease; 153 outInfo->mallocSpace += eventInfo->mallocSpace; 154 outInfo->mallocIncreaseEvent += eventInfo->mallocIncreaseEvent; 155 outInfo->mallocIncrease += eventInfo->mallocIncrease; 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } 158 159 PETSC_LOG_RESIZABLE_ARRAY(EventPerfArray, PetscEventPerfInfo, PetscLogEvent, PetscEventPerfInfoInit, NULL, NULL) 160 161 /* --- PetscClassPerf --- */ 162 163 typedef struct { 164 int creations; /* The number of objects of this class created */ 165 int destructions; /* The number of objects of this class destroyed */ 166 PetscLogDouble mem; /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */ 167 PetscLogDouble descMem; /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */ 168 } PetscClassPerf; 169 170 static PetscErrorCode PetscClassPerfInit(PetscClassPerf *classInfo) 171 { 172 PetscFunctionBegin; 173 PetscCall(PetscMemzero(classInfo, sizeof(*classInfo))); 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 PETSC_LOG_RESIZABLE_ARRAY(ClassPerfArray, PetscClassPerf, PetscLogClass, PetscClassPerfInit, NULL, NULL) 178 179 /* --- PetscStagePerf --- */ 180 181 typedef struct _PetscStagePerf { 182 PetscBool used; /* The stage was pushed on this processor */ 183 PetscEventPerfInfo perfInfo; /* The stage performance information */ 184 PetscLogEventPerfArray eventLog; /* The event information for this stage */ 185 PetscLogClassPerfArray classLog; /* The class information for this stage */ 186 } PetscStagePerf; 187 188 static PetscErrorCode PetscStageInfoInit(PetscStagePerf *stageInfo) 189 { 190 PetscFunctionBegin; 191 PetscCall(PetscMemzero(stageInfo, sizeof(*stageInfo))); 192 PetscCall(PetscLogEventPerfArrayCreate(128, &(stageInfo->eventLog))); 193 PetscCall(PetscLogClassPerfArrayCreate(128, &(stageInfo->classLog))); 194 PetscCall(PetscEventPerfInfoInit(&stageInfo->perfInfo)); 195 PetscFunctionReturn(PETSC_SUCCESS); 196 } 197 198 static PetscErrorCode PetscStageInfoReset(PetscStagePerf *stageInfo) 199 { 200 PetscFunctionBegin; 201 PetscCall(PetscLogEventPerfArrayDestroy(&(stageInfo->eventLog))); 202 PetscCall(PetscLogClassPerfArrayDestroy(&(stageInfo->classLog))); 203 PetscFunctionReturn(PETSC_SUCCESS); 204 } 205 206 PETSC_LOG_RESIZABLE_ARRAY(StageInfoArray, PetscStagePerf, PetscLogStage, PetscStageInfoInit, PetscStageInfoReset, NULL) 207 208 /* --- Action --- */ 209 210 /* The structure for action logging */ 211 typedef enum { 212 PETSC_LOG_ACTION_CREATE, 213 PETSC_LOG_ACTION_DESTROY, 214 PETSC_LOG_ACTION_BEGIN, 215 PETSC_LOG_ACTION_END, 216 } PetscLogActionType; 217 218 typedef struct _Action { 219 PetscLogActionType action; /* The type of execution */ 220 PetscLogEvent event; /* The event number */ 221 PetscClassId classid; /* The event class id */ 222 PetscLogDouble time; /* The time of occurrence */ 223 PetscLogDouble flops; /* The cumulative flops */ 224 PetscLogDouble mem; /* The current memory usage */ 225 PetscLogDouble maxmem; /* The maximum memory usage */ 226 int id1, id2, id3; /* The ids of associated objects */ 227 } Action; 228 229 PETSC_LOG_RESIZABLE_ARRAY(ActionArray, Action, PetscLogEvent, NULL, NULL, NULL) 230 231 /* --- Object --- */ 232 233 /* The structure for object logging */ 234 typedef struct _Object { 235 PetscObject obj; /* The associated PetscObject */ 236 int parent; /* The parent id */ 237 PetscLogDouble mem; /* The memory associated with the object */ 238 char name[64]; /* The object name */ 239 char info[64]; /* The information string */ 240 } Object; 241 242 PETSC_LOG_RESIZABLE_ARRAY(ObjectArray, Object, PetscObject, NULL, NULL, NULL) 243 244 /* Map from (threadid,stage,event) to perfInfo data struct */ 245 #include <petsc/private/hashmapijk.h> 246 247 PETSC_HASH_MAP(HMapEvent, PetscHashIJKKey, PetscEventPerfInfo *, PetscHashIJKKeyHash, PetscHashIJKKeyEqual, NULL) 248 249 typedef struct _n_PetscLogHandler_Default *PetscLogHandler_Default; 250 struct _n_PetscLogHandler_Default { 251 PetscLogStageInfoArray stages; 252 PetscSpinlock lock; 253 PetscLogActionArray petsc_actions; 254 PetscLogObjectArray petsc_objects; 255 PetscBool petsc_logActions; 256 PetscBool petsc_logObjects; 257 int petsc_numObjectsCreated; 258 int petsc_numObjectsDestroyed; 259 PetscHMapEvent eventInfoMap_th; 260 int pause_depth; 261 PetscBool use_threadsafe; 262 }; 263 264 /* --- PetscLogHandler_Default --- */ 265 266 static PetscErrorCode PetscLogHandlerContextCreate_Default(PetscLogHandler_Default *def_p) 267 { 268 PetscLogHandler_Default def; 269 270 PetscFunctionBegin; 271 PetscCall(PetscNew(def_p)); 272 def = *def_p; 273 PetscCall(PetscLogStageInfoArrayCreate(8, &(def->stages))); 274 PetscCall(PetscLogActionArrayCreate(64, &(def->petsc_actions))); 275 PetscCall(PetscLogObjectArrayCreate(64, &(def->petsc_objects))); 276 277 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_actions", &def->petsc_logActions, NULL)); 278 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_objects", &def->petsc_logObjects, NULL)); 279 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_handler_default_use_threadsafe_events", &def->use_threadsafe, NULL)); 280 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) { PetscCall(PetscHMapEventCreate(&def->eventInfoMap_th)); } 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 static PetscErrorCode PetscLogHandlerDestroy_Default(PetscLogHandler h) 285 { 286 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 287 288 PetscFunctionBegin; 289 PetscCall(PetscLogStageInfoArrayDestroy(&def->stages)); 290 PetscCall(PetscLogActionArrayDestroy(&def->petsc_actions)); 291 PetscCall(PetscLogObjectArrayDestroy(&def->petsc_objects)); 292 if (def->eventInfoMap_th) { 293 PetscEventPerfInfo **array; 294 PetscInt n, off = 0; 295 296 PetscCall(PetscHMapEventGetSize(def->eventInfoMap_th, &n)); 297 PetscCall(PetscMalloc1(n, &array)); 298 PetscCall(PetscHMapEventGetVals(def->eventInfoMap_th, &off, array)); 299 for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i])); 300 PetscCall(PetscFree(array)); 301 PetscCall(PetscHMapEventDestroy(&def->eventInfoMap_th)); 302 } 303 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerGetEventPerfInfo_C", NULL)); 304 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerGetStagePerfInfo_C", NULL)); 305 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerSetLogActions_C", NULL)); 306 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerSetLogObjects_C", NULL)); 307 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerLogObjectState_C", NULL)); 308 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerGetNumObjects_C", NULL)); 309 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventDeactivatePush_C", NULL)); 310 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventDeactivatePop_C", NULL)); 311 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventsPause_C", NULL)); 312 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventsResume_C", NULL)); 313 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerDump_C", NULL)); 314 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerStageSetVisible_C", NULL)); 315 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerStageGetVisible_C", NULL)); 316 PetscCall(PetscFree(def)); 317 PetscFunctionReturn(PETSC_SUCCESS); 318 } 319 320 static PetscErrorCode PetscLogHandlerDefaultGetStageInfo(PetscLogHandler handler, PetscLogStage stage, PetscStagePerf **stage_info_p) 321 { 322 PetscStagePerf *stage_info = NULL; 323 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 324 325 PetscFunctionBegin; 326 PetscCall(PetscLogStageInfoArrayResize(def->stages, stage + 1)); 327 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 328 *stage_info_p = stage_info; 329 PetscFunctionReturn(PETSC_SUCCESS); 330 } 331 332 static PetscErrorCode PetscLogHandlerGetEventPerfInfo_Default(PetscLogHandler handler, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **event_info_p) 333 { 334 PetscEventPerfInfo *event_info = NULL; 335 PetscStagePerf *stage_info = NULL; 336 PetscLogEventPerfArray event_log; 337 338 PetscFunctionBegin; 339 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(handler->state, &stage)); 340 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 341 event_log = stage_info->eventLog; 342 PetscCall(PetscLogEventPerfArrayResize(event_log, event + 1)); 343 PetscCall(PetscLogEventPerfArrayGetRef(event_log, event, &event_info)); 344 event_info->id = event; 345 *event_info_p = event_info; 346 PetscFunctionReturn(PETSC_SUCCESS); 347 } 348 349 static PetscErrorCode PetscLogHandlerGetStagePerfInfo_Default(PetscLogHandler handler, PetscLogStage stage, PetscEventPerfInfo **stage_info_p) 350 { 351 PetscStagePerf *stage_perf_info = NULL; 352 353 PetscFunctionBegin; 354 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(handler->state, &stage)); 355 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_perf_info)); 356 *stage_info_p = &stage_perf_info->perfInfo; 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 360 static PetscErrorCode PetscLogHandlerDefaultGetClassPerf(PetscLogHandler handler, PetscLogStage stage, PetscLogClass clss, PetscClassPerf **class_info) 361 { 362 PetscLogClassPerfArray class_log; 363 PetscStagePerf *stage_info; 364 365 PetscFunctionBegin; 366 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 367 class_log = stage_info->classLog; 368 PetscCall(PetscLogClassPerfArrayResize(class_log, clss + 1)); 369 PetscCall(PetscLogClassPerfArrayGetRef(class_log, clss, class_info)); 370 PetscFunctionReturn(PETSC_SUCCESS); 371 } 372 373 static PetscErrorCode PetscLogHandlerObjectCreate_Default(PetscLogHandler h, PetscObject obj) 374 { 375 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 376 PetscLogState state; 377 PetscLogStage stage; 378 PetscClassPerf *classInfo; 379 int oclass = 0; 380 381 PetscFunctionBegin; 382 PetscCall(PetscLogHandlerGetState(h, &state)); 383 PetscCall(PetscSpinlockLock(&def->lock)); 384 /* Record stage info */ 385 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 386 PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass)); 387 PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo)); 388 classInfo->creations++; 389 /* Record the creation action */ 390 if (def->petsc_logActions) { 391 Action new_action; 392 393 PetscCall(PetscTime(&new_action.time)); 394 new_action.time -= petsc_BaseTime; 395 new_action.action = PETSC_LOG_ACTION_CREATE; 396 new_action.event = -1; 397 new_action.classid = obj->classid; 398 new_action.id1 = obj->id; 399 new_action.id2 = -1; 400 new_action.id3 = -1; 401 new_action.flops = petsc_TotalFlops; 402 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 403 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 404 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 405 } 406 /* We don't just use obj->id to count all objects that are created 407 because PetscLogHandlers are objects and PetscLogObjectDestroy() will not 408 be called for them: the number of objects created and destroyed as counted 409 here and below would have an imbalance */ 410 def->petsc_numObjectsCreated++; 411 /* Record the object */ 412 if (def->petsc_logObjects) { 413 Object new_object; 414 415 new_object.parent = -1; 416 new_object.obj = obj; 417 new_object.mem = 0; 418 419 PetscCall(PetscMemzero(new_object.name, sizeof(new_object.name))); 420 PetscCall(PetscMemzero(new_object.info, sizeof(new_object.info))); 421 PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1"); 422 PetscCall(PetscLogObjectArrayResize(def->petsc_objects, obj->id)); 423 PetscCall(PetscLogObjectArraySet(def->petsc_objects, obj->id - 1, new_object)); 424 } 425 PetscCall(PetscSpinlockUnlock(&def->lock)); 426 PetscFunctionReturn(PETSC_SUCCESS); 427 } 428 429 static PetscErrorCode PetscLogHandlerObjectDestroy_Default(PetscLogHandler h, PetscObject obj) 430 { 431 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 432 PetscLogState state; 433 PetscLogStage stage; 434 PetscClassPerf *classInfo; 435 int oclass = 0; 436 437 PetscFunctionBegin; 438 PetscCall(PetscLogHandlerGetState(h, &state)); 439 /* Record stage info */ 440 PetscCall(PetscSpinlockLock(&def->lock)); 441 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 442 if (stage >= 0) { 443 /* stage < 0 can happen if the log summary is output before some things are destroyed */ 444 PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass)); 445 PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo)); 446 classInfo->destructions++; 447 } 448 /* Cannot Credit all ancestors with your memory because they may have already been destroyed*/ 449 def->petsc_numObjectsDestroyed++; 450 /* Dynamically enlarge logging structures */ 451 /* Record the destruction action */ 452 if (def->petsc_logActions) { 453 Action new_action; 454 455 PetscCall(PetscTime(&new_action.time)); 456 new_action.time -= petsc_BaseTime; 457 new_action.event = -1; 458 new_action.action = PETSC_LOG_ACTION_DESTROY; 459 new_action.classid = obj->classid; 460 new_action.id1 = obj->id; 461 new_action.id2 = -1; 462 new_action.id3 = -1; 463 new_action.flops = petsc_TotalFlops; 464 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 465 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 466 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 467 } 468 if (def->petsc_logObjects) { 469 Object *obj_entry = NULL; 470 471 PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1"); 472 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, obj->id - 1, &obj_entry)); 473 if (obj->name) PetscCall(PetscStrncpy(obj_entry->name, obj->name, 64)); 474 obj_entry->obj = NULL; 475 } 476 PetscCall(PetscSpinlockUnlock(&def->lock)); 477 PetscFunctionReturn(PETSC_SUCCESS); 478 } 479 480 static PetscErrorCode PetscLogHandlerEventSync_Default(PetscLogHandler h, PetscLogEvent event, MPI_Comm comm) 481 { 482 PetscLogState state; 483 PetscLogEventInfo event_info; 484 PetscEventPerfInfo *event_perf_info; 485 int stage; 486 PetscLogDouble time = 0.0; 487 488 PetscFunctionBegin; 489 if (!(PetscLogSyncOn) || comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS); 490 PetscCall(PetscLogHandlerGetState(h, &state)); 491 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 492 if (!event_info.collective) PetscFunctionReturn(PETSC_SUCCESS); 493 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 494 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 495 if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 496 497 PetscCall(PetscTimeSubtract(&time)); 498 PetscCallMPI(MPI_Barrier(comm)); 499 PetscCall(PetscTimeAdd(&time)); 500 event_perf_info->syncTime += time; 501 PetscFunctionReturn(PETSC_SUCCESS); 502 } 503 504 static PetscErrorCode PetscLogGetStageEventPerfInfo_threaded(PetscLogHandler_Default def, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **eventInfo) 505 { 506 PetscEventPerfInfo *leventInfo = NULL; 507 PetscHashIJKKey key; 508 509 PetscFunctionBegin; 510 511 #if PetscDefined(HAVE_THREADSAFETY) 512 key.i = PetscLogGetTid(); 513 #else 514 key.i = 0; 515 #endif 516 key.j = stage; 517 key.k = event; 518 PetscCall(PetscSpinlockLock(&def->lock)); 519 PetscCall(PetscHMapEventGet(def->eventInfoMap_th, key, &leventInfo)); 520 if (!leventInfo) { 521 PetscCall(PetscNew(&leventInfo)); 522 leventInfo->id = event; 523 PetscCall(PetscHMapEventSet(def->eventInfoMap_th, key, leventInfo)); 524 } 525 PetscCall(PetscSpinlockUnlock(&def->lock)); 526 *eventInfo = leventInfo; 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 #if defined(PETSC_HAVE_CUDA) 531 #include <nvToolsExt.h> 532 #endif 533 static PetscErrorCode PetscLogHandlerEventBegin_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 534 { 535 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 536 PetscEventPerfInfo *event_perf_info = NULL; 537 PetscLogEventInfo event_info; 538 PetscLogDouble time; 539 PetscLogState state; 540 PetscLogStage stage; 541 542 PetscFunctionBegin; 543 PetscCall(PetscLogHandlerGetState(h, &state)); 544 /* Synchronization */ 545 PetscCall(PetscLogHandlerEventSync_Default(h, event, PetscObjectComm(o1))); 546 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 547 if (def->pause_depth > 0) stage = 0; // in pause-mode, all events run on the main stage 548 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) { 549 PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info)); 550 if (event_perf_info->depth == 0) { PetscCall(PetscEventPerfInfoInit(event_perf_info)); } 551 } else { 552 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 553 } 554 PetscCheck(event_perf_info->depth >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to begin a paused event, this is not allowed"); 555 event_perf_info->depth++; 556 /* Check for double counting */ 557 if (event_perf_info->depth > 1) PetscFunctionReturn(PETSC_SUCCESS); 558 #if defined(PETSC_HAVE_CUDA) 559 if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { 560 PetscLogEventInfo event_reg_info; 561 562 PetscCall(PetscLogStateEventGetInfo(state, event, &event_reg_info)); 563 nvtxRangePushA(event_reg_info.name); 564 } 565 #endif 566 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 567 /* Log the performance info */ 568 event_perf_info->count++; 569 PetscCall(PetscTime(&time)); 570 PetscCall(PetscEventPerfInfoTic(event_perf_info, time, PetscLogMemory, (int)event)); 571 if (def->petsc_logActions) { 572 PetscLogDouble curTime; 573 Action new_action; 574 575 PetscCall(PetscTime(&curTime)); 576 new_action.time = curTime - petsc_BaseTime; 577 new_action.action = PETSC_LOG_ACTION_BEGIN; 578 new_action.event = event; 579 new_action.classid = event_info.classid; 580 new_action.id1 = o1 ? o1->id : -1; 581 new_action.id2 = o2 ? o2->id : -1; 582 new_action.id3 = o3 ? o3->id : -1; 583 new_action.flops = petsc_TotalFlops; 584 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 585 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 586 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 587 } 588 PetscFunctionReturn(PETSC_SUCCESS); 589 } 590 591 static PetscErrorCode PetscLogHandlerEventEnd_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 592 { 593 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 594 PetscEventPerfInfo *event_perf_info = NULL; 595 PetscLogDouble time; 596 PetscLogState state; 597 int stage; 598 599 PetscFunctionBegin; 600 PetscCall(PetscLogHandlerGetState(h, &state)); 601 if (def->petsc_logActions) { 602 PetscLogEventInfo event_info; 603 PetscLogDouble curTime; 604 Action new_action; 605 606 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 607 PetscCall(PetscTime(&curTime)); 608 new_action.time = curTime - petsc_BaseTime; 609 new_action.action = PETSC_LOG_ACTION_END; 610 new_action.event = event; 611 new_action.classid = event_info.classid; 612 new_action.id1 = o1 ? o1->id : -1; 613 new_action.id2 = o2 ? o2->id : -2; 614 new_action.id3 = o3 ? o3->id : -3; 615 new_action.flops = petsc_TotalFlops; 616 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 617 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 618 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 619 } 620 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 621 if (def->pause_depth > 0) stage = 0; // all events run on the main stage in pause-mode 622 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) { 623 PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info)); 624 } else { 625 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 626 } 627 PetscCheck(event_perf_info->depth > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to end paused event, not allowed"); 628 event_perf_info->depth--; 629 /* Check for double counting */ 630 if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 631 else PetscCheck(event_perf_info->depth == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Logging event had unbalanced begin/end pairs"); 632 633 /* Log performance info */ 634 PetscCall(PetscTime(&time)); 635 PetscCall(PetscEventPerfInfoToc(event_perf_info, time, PetscLogMemory, (int)event)); 636 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) { 637 PetscEventPerfInfo *event_perf_info_global; 638 PetscCall(PetscSpinlockLock(&def->lock)); 639 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info_global)); 640 PetscCall(PetscEventPerfInfoAdd_Internal(event_perf_info, event_perf_info_global)); 641 PetscCall(PetscSpinlockUnlock(&def->lock)); 642 } 643 #if defined(PETSC_HAVE_CUDA) 644 if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) nvtxRangePop(); 645 #endif 646 PetscFunctionReturn(PETSC_SUCCESS); 647 } 648 649 static PetscErrorCode PetscLogHandlerEventDeactivatePush_Default(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event) 650 { 651 PetscEventPerfInfo *event_perf_info; 652 653 PetscFunctionBegin; 654 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 655 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 656 event_perf_info->depth++; 657 PetscFunctionReturn(PETSC_SUCCESS); 658 } 659 660 static PetscErrorCode PetscLogHandlerEventDeactivatePop_Default(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event) 661 { 662 PetscEventPerfInfo *event_perf_info; 663 664 PetscFunctionBegin; 665 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 666 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 667 event_perf_info->depth--; 668 PetscFunctionReturn(PETSC_SUCCESS); 669 } 670 671 static PetscErrorCode PetscLogHandlerEventsPause_Default(PetscLogHandler h) 672 { 673 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 674 PetscLogDouble time; 675 PetscInt num_stages; 676 677 PetscFunctionBegin; 678 if (def->pause_depth++ > 0) PetscFunctionReturn(PETSC_SUCCESS); 679 PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL)); 680 PetscCall(PetscTime(&time)); 681 for (PetscInt stage = 0; stage < num_stages; stage++) { 682 PetscStagePerf *stage_info = NULL; 683 PetscInt num_events; 684 685 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 686 PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL)); 687 for (PetscInt event = 0; event < num_events; event++) { 688 PetscEventPerfInfo *event_info = NULL; 689 PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info)); 690 if (event_info->depth > 0) { 691 event_info->depth *= -1; 692 PetscCall(PetscEventPerfInfoPause(event_info, time, PetscLogMemory, event)); 693 } 694 } 695 if (stage > 0 && stage_info->perfInfo.depth > 0) { 696 stage_info->perfInfo.depth *= -1; 697 PetscCall(PetscEventPerfInfoPause(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2))); 698 } 699 } 700 PetscFunctionReturn(PETSC_SUCCESS); 701 } 702 703 static PetscErrorCode PetscLogHandlerEventsResume_Default(PetscLogHandler h) 704 { 705 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 706 PetscLogDouble time; 707 PetscInt num_stages; 708 709 PetscFunctionBegin; 710 if (--def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 711 PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL)); 712 PetscCall(PetscTime(&time)); 713 for (PetscInt stage = 0; stage < num_stages; stage++) { 714 PetscStagePerf *stage_info = NULL; 715 PetscInt num_events; 716 717 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 718 PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL)); 719 for (PetscInt event = 0; event < num_events; event++) { 720 PetscEventPerfInfo *event_info = NULL; 721 PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info)); 722 if (event_info->depth < 0) { 723 event_info->depth *= -1; 724 PetscCall(PetscEventPerfInfoResume(event_info, time, PetscLogMemory, event)); 725 } 726 } 727 if (stage > 0 && stage_info->perfInfo.depth < 0) { 728 stage_info->perfInfo.depth *= -1; 729 PetscCall(PetscEventPerfInfoResume(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2))); 730 } 731 } 732 PetscFunctionReturn(PETSC_SUCCESS); 733 } 734 735 static PetscErrorCode PetscLogHandlerStagePush_Default(PetscLogHandler h, PetscLogStage new_stage) 736 { 737 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 738 PetscLogDouble time; 739 PetscLogState state; 740 PetscLogStage current_stage; 741 PetscStagePerf *new_stage_info; 742 743 PetscFunctionBegin; 744 if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 745 PetscCall(PetscLogHandlerGetState(h, &state)); 746 current_stage = state->current_stage; 747 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, new_stage, &new_stage_info)); 748 PetscCall(PetscTime(&time)); 749 750 /* Record flops/time of previous stage */ 751 if (current_stage >= 0) { 752 if (PetscBTLookup(state->active, current_stage)) { 753 PetscStagePerf *current_stage_info; 754 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, ¤t_stage_info)); 755 PetscCall(PetscEventPerfInfoToc(¤t_stage_info->perfInfo, time, PetscLogMemory, (int)-(current_stage + 2))); 756 } 757 } 758 new_stage_info->used = PETSC_TRUE; 759 new_stage_info->perfInfo.count++; 760 new_stage_info->perfInfo.depth++; 761 /* Subtract current quantities so that we obtain the difference when we pop */ 762 if (PetscBTLookup(state->active, new_stage)) PetscCall(PetscEventPerfInfoTic(&new_stage_info->perfInfo, time, PetscLogMemory, (int)-(new_stage + 2))); 763 PetscFunctionReturn(PETSC_SUCCESS); 764 } 765 766 static PetscErrorCode PetscLogHandlerStagePop_Default(PetscLogHandler h, PetscLogStage old_stage) 767 { 768 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 769 PetscLogStage current_stage; 770 PetscStagePerf *old_stage_info; 771 PetscLogState state; 772 PetscLogDouble time; 773 774 PetscFunctionBegin; 775 if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 776 PetscCall(PetscLogHandlerGetState(h, &state)); 777 current_stage = state->current_stage; 778 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, old_stage, &old_stage_info)); 779 PetscCall(PetscTime(&time)); 780 old_stage_info->perfInfo.depth--; 781 if (PetscBTLookup(state->active, old_stage)) { PetscCall(PetscEventPerfInfoToc(&old_stage_info->perfInfo, time, PetscLogMemory, (int)-(old_stage + 2))); } 782 if (current_stage >= 0) { 783 if (PetscBTLookup(state->active, current_stage)) { 784 PetscStagePerf *current_stage_info; 785 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, ¤t_stage_info)); 786 PetscCall(PetscEventPerfInfoTic(¤t_stage_info->perfInfo, time, PetscLogMemory, (int)-(current_stage + 2))); 787 } 788 } 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 static PetscErrorCode PetscLogHandlerStageSetVisible_Default(PetscLogHandler h, PetscLogStage stage, PetscBool is_visible) 793 { 794 PetscStagePerf *stage_info; 795 796 PetscFunctionBegin; 797 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 798 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info)); 799 stage_info->perfInfo.visible = is_visible; 800 PetscFunctionReturn(PETSC_SUCCESS); 801 } 802 803 static PetscErrorCode PetscLogHandlerStageGetVisible_Default(PetscLogHandler h, PetscLogStage stage, PetscBool *is_visible) 804 { 805 PetscStagePerf *stage_info; 806 807 PetscFunctionBegin; 808 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 809 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info)); 810 *is_visible = stage_info->perfInfo.visible; 811 PetscFunctionReturn(PETSC_SUCCESS); 812 } 813 814 static PetscErrorCode PetscLogHandlerSetLogActions_Default(PetscLogHandler handler, PetscBool flag) 815 { 816 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 817 818 PetscFunctionBegin; 819 def->petsc_logActions = flag; 820 PetscFunctionReturn(PETSC_SUCCESS); 821 } 822 823 static PetscErrorCode PetscLogHandlerSetLogObjects_Default(PetscLogHandler handler, PetscBool flag) 824 { 825 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 826 827 PetscFunctionBegin; 828 def->petsc_logObjects = flag; 829 PetscFunctionReturn(PETSC_SUCCESS); 830 } 831 832 static PetscErrorCode PetscLogHandlerLogObjectState_Default(PetscLogHandler handler, PetscObject obj, const char format[], va_list Argp) 833 { 834 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 835 size_t fullLength; 836 837 PetscFunctionBegin; 838 if (def->petsc_logObjects) { 839 Object *obj_entry = NULL; 840 841 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, obj->id - 1, &obj_entry)); 842 PetscCall(PetscVSNPrintf(obj_entry->info, 64, format, &fullLength, Argp)); 843 } 844 PetscFunctionReturn(PETSC_SUCCESS); 845 } 846 847 static PetscErrorCode PetscLogHandlerGetNumObjects_Default(PetscLogHandler handler, PetscInt *num_objects) 848 { 849 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 850 851 PetscFunctionBegin; 852 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, num_objects, NULL)); 853 PetscFunctionReturn(PETSC_SUCCESS); 854 } 855 856 static PetscErrorCode PetscLogHandlerDump_Default(PetscLogHandler handler, const char sname[]) 857 { 858 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 859 FILE *fd; 860 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 861 PetscLogDouble flops, _TotalTime; 862 PetscMPIInt rank; 863 int curStage; 864 PetscLogState state; 865 PetscInt num_events; 866 PetscLogEvent event; 867 868 PetscFunctionBegin; 869 /* Calculate the total elapsed time */ 870 PetscCall(PetscTime(&_TotalTime)); 871 _TotalTime -= petsc_BaseTime; 872 /* Open log file */ 873 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)handler), &rank)); 874 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 875 PetscCall(PetscFixFilename(file, fname)); 876 PetscCall(PetscFOpen(PETSC_COMM_SELF, fname, "w", &fd)); 877 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 878 /* Output totals */ 879 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 880 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Clock Resolution %g\n", 0.0)); 881 /* Output actions */ 882 if (def->petsc_logActions) { 883 PetscInt num_actions; 884 PetscCall(PetscLogActionArrayGetSize(def->petsc_actions, &num_actions, NULL)); 885 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Actions accomplished %d\n", (int)num_actions)); 886 for (int a = 0; a < num_actions; a++) { 887 Action *action; 888 889 PetscCall(PetscLogActionArrayGetRef(def->petsc_actions, a, &action)); 890 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g %d %d %d %d %d %d %g %g %g\n", action->time, action->action, (int)action->event, (int)action->classid, action->id1, action->id2, action->id3, action->flops, action->mem, action->maxmem)); 891 } 892 } 893 /* Output objects */ 894 if (def->petsc_logObjects) { 895 PetscInt num_objects; 896 897 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 898 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Objects created %d destroyed %d\n", def->petsc_numObjectsCreated, def->petsc_numObjectsDestroyed)); 899 for (int o = 0; o < num_objects; o++) { 900 Object *object = NULL; 901 902 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, o, &object)); 903 if (object->parent != -1) continue; // object with this id wasn't logged, probably a PetscLogHandler 904 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Parent ID: %d Memory: %d\n", object->parent, (int)object->mem)); 905 if (!object->name[0]) { 906 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Name\n")); 907 } else { 908 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Name: %s\n", object->name)); 909 } 910 if (!object->info[0]) { 911 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Info\n")); 912 } else { 913 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Info: %s\n", object->info)); 914 } 915 } 916 } 917 /* Output events */ 918 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Event log:\n")); 919 PetscCall(PetscLogHandlerGetState(handler, &state)); 920 PetscCall(PetscLogStateGetNumEvents(state, &num_events)); 921 PetscCall(PetscLogStateGetCurrentStage(state, &curStage)); 922 for (event = 0; event < num_events; event++) { 923 PetscEventPerfInfo *event_info; 924 925 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, curStage, event, &event_info)); 926 if (event_info->time != 0.0) flops = event_info->flops / event_info->time; 927 else flops = 0.0; 928 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%d %16d %16g %16g %16g\n", event, event_info->count, event_info->flops, event_info->time, flops)); 929 } 930 PetscCall(PetscFClose(PETSC_COMM_SELF, fd)); 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 /* 935 PetscLogView_Detailed - Each process prints the times for its own events 936 937 */ 938 static PetscErrorCode PetscLogHandlerView_Default_Detailed(PetscLogHandler handler, PetscViewer viewer) 939 { 940 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 941 PetscLogDouble locTotalTime, numRed, maxMem; 942 PetscInt numStages, numEvents; 943 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 944 PetscMPIInt rank, size; 945 PetscLogGlobalNames global_stages, global_events; 946 PetscLogState state; 947 PetscEventPerfInfo zero_info; 948 949 PetscFunctionBegin; 950 PetscCall(PetscLogHandlerGetState(handler, &state)); 951 PetscCallMPI(MPI_Comm_size(comm, &size)); 952 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 953 /* Must preserve reduction count before we go on */ 954 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 955 /* Get the total elapsed time */ 956 PetscCall(PetscTime(&locTotalTime)); 957 locTotalTime -= petsc_BaseTime; 958 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 959 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 960 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 961 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 962 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 963 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 964 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 965 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 966 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 967 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 968 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 969 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 970 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 971 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 972 for (PetscInt stage = 0; stage < numStages; stage++) { 973 PetscInt stage_id; 974 const char *stage_name; 975 976 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 977 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 978 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stage_name)); 979 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stage_name)); 980 for (PetscInt event = 0; event < numEvents; event++) { 981 PetscEventPerfInfo *eventInfo = &zero_info; 982 PetscBool is_zero = PETSC_FALSE; 983 PetscInt event_id; 984 const char *event_name; 985 986 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 987 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 988 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &eventInfo)); 989 is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE; 990 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm)); 991 if (!is_zero) { PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stage_name, event_name)); } 992 } 993 } 994 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 995 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 996 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 997 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 998 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 999 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1000 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1001 { 1002 PetscInt num_objects; 1003 1004 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 1005 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, (int)num_objects)); 1006 } 1007 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1008 PetscCall(PetscViewerFlush(viewer)); 1009 for (PetscInt stage = 0; stage < numStages; stage++) { 1010 PetscEventPerfInfo *stage_perf_info = &zero_info; 1011 PetscInt stage_id; 1012 const char *stage_name; 1013 1014 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1015 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1016 if (stage_id >= 0) { 1017 PetscStagePerf *stage_info; 1018 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info)); 1019 stage_perf_info = &stage_info->perfInfo; 1020 } 1021 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stage_name, rank, stage_perf_info->time, 1022 stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops)); 1023 for (PetscInt event = 0; event < numEvents; event++) { 1024 PetscEventPerfInfo *eventInfo = &zero_info; 1025 PetscBool is_zero = PETSC_FALSE; 1026 PetscInt event_id; 1027 const char *event_name; 1028 1029 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1030 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1031 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &eventInfo)); 1032 is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE; 1033 PetscCall(PetscMemcmp(eventInfo, &zero_info, sizeof(zero_info), &is_zero)); 1034 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm)); 1035 if (!is_zero) { 1036 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", stage_name, event_name, rank, 1037 eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops)); 1038 if (eventInfo->dof[0] >= 0.) { 1039 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1040 for (PetscInt d = 0; d < 8; ++d) { 1041 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1042 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1043 } 1044 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1045 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1046 for (PetscInt e = 0; e < 8; ++e) { 1047 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1048 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1049 } 1050 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1051 } 1052 } 1053 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1054 } 1055 } 1056 PetscCall(PetscViewerFlush(viewer)); 1057 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1058 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1059 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1060 PetscFunctionReturn(PETSC_SUCCESS); 1061 } 1062 1063 /* 1064 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1065 */ 1066 static PetscErrorCode PetscLogHandlerView_Default_CSV(PetscLogHandler handler, PetscViewer viewer) 1067 { 1068 PetscLogDouble locTotalTime, maxMem; 1069 PetscInt numStages, numEvents, stage, event; 1070 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1071 PetscMPIInt rank, size; 1072 PetscLogGlobalNames global_stages, global_events; 1073 PetscLogState state; 1074 PetscEventPerfInfo zero_info; 1075 1076 PetscFunctionBegin; 1077 PetscCall(PetscLogHandlerGetState(handler, &state)); 1078 PetscCallMPI(MPI_Comm_size(comm, &size)); 1079 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1080 /* Must preserve reduction count before we go on */ 1081 /* Get the total elapsed time */ 1082 PetscCall(PetscTime(&locTotalTime)); 1083 locTotalTime -= petsc_BaseTime; 1084 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1085 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 1086 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 1087 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 1088 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 1089 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 1090 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1091 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1092 PetscCall(PetscViewerFlush(viewer)); 1093 for (stage = 0; stage < numStages; stage++) { 1094 PetscEventPerfInfo *stage_perf_info; 1095 PetscInt stage_id; 1096 const char *stage_name; 1097 1098 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1099 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1100 stage_perf_info = &zero_info; 1101 if (stage_id >= 0) { 1102 PetscStagePerf *stage_info; 1103 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info)); 1104 stage_perf_info = &stage_info->perfInfo; 1105 } 1106 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stage_name, rank, stage_perf_info->time, stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops)); 1107 for (event = 0; event < numEvents; event++) { 1108 PetscEventPerfInfo *eventInfo = &zero_info; 1109 PetscBool is_zero = PETSC_FALSE; 1110 PetscInt event_id; 1111 const char *event_name; 1112 1113 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1114 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1115 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &eventInfo)); 1116 PetscCall(PetscMemcmp(eventInfo, &zero_info, sizeof(zero_info), &is_zero)); 1117 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm)); 1118 if (!is_zero) { 1119 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stage_name, event_name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops)); 1120 if (eventInfo->dof[0] >= 0.) { 1121 for (PetscInt d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1122 for (PetscInt e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1123 } 1124 } 1125 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1126 } 1127 } 1128 PetscCall(PetscViewerFlush(viewer)); 1129 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1130 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1131 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1136 { 1137 PetscFunctionBegin; 1138 if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS); 1139 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1140 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1141 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1142 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1143 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1144 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1145 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1146 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1147 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1148 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1149 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1150 PetscFunctionReturn(PETSC_SUCCESS); 1151 } 1152 1153 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1154 { 1155 PetscFunctionBegin; 1156 if (PetscDefined(USE_DEBUG)) { 1157 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1158 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1159 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1160 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1161 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1162 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1163 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1164 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1165 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1166 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1167 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1168 } 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1173 { 1174 #if defined(PETSC_HAVE_DEVICE) 1175 PetscMPIInt size; 1176 PetscBool deviceInitialized = PETSC_FALSE; 1177 1178 PetscFunctionBegin; 1179 PetscCallMPI(MPI_Comm_size(comm, &size)); 1180 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1181 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1182 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1183 deviceInitialized = PETSC_TRUE; 1184 break; 1185 } 1186 } 1187 /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1188 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS); 1189 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1190 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1191 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1192 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1193 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1194 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1195 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1196 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1197 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1198 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1199 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1200 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1201 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1202 PetscFunctionReturn(PETSC_SUCCESS); 1203 #else 1204 return PETSC_SUCCESS; 1205 #endif 1206 } 1207 1208 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1209 { 1210 #if defined(PETSC_HAVE_DEVICE) 1211 1212 PetscFunctionBegin; 1213 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS); 1214 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1215 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1216 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1217 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1218 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1219 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1220 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1221 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1222 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1223 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1224 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1225 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1226 PetscFunctionReturn(PETSC_SUCCESS); 1227 #else 1228 return PETSC_SUCCESS; 1229 #endif 1230 } 1231 1232 static PetscErrorCode PetscLogHandlerView_Default_Info(PetscLogHandler handler, PetscViewer viewer) 1233 { 1234 FILE *fd; 1235 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 1236 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1237 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1238 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1239 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1240 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1241 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1242 PetscLogDouble min, max, tot, ratio, avg, x, y; 1243 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1244 #if defined(PETSC_HAVE_DEVICE) 1245 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1246 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1247 #endif 1248 PetscMPIInt minC, maxC; 1249 PetscMPIInt size, rank; 1250 PetscBool *localStageUsed, *stageUsed; 1251 PetscBool *localStageVisible, *stageVisible; 1252 PetscInt numStages, numEvents; 1253 int stage, oclass; 1254 PetscLogEvent event; 1255 char version[256]; 1256 MPI_Comm comm; 1257 #if defined(PETSC_HAVE_DEVICE) 1258 PetscInt64 nas = 0x7FF0000000000002; 1259 #endif 1260 PetscLogGlobalNames global_stages, global_events; 1261 PetscEventPerfInfo zero_info; 1262 PetscLogState state; 1263 1264 PetscFunctionBegin; 1265 PetscCall(PetscLogHandlerGetState(handler, &state)); 1266 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1267 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1268 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1269 PetscCallMPI(MPI_Comm_size(comm, &size)); 1270 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1271 /* Get the total elapsed time */ 1272 PetscCall(PetscTime(&locTotalTime)); 1273 locTotalTime -= petsc_BaseTime; 1274 1275 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1276 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1277 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1278 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1279 PetscCall(PetscLogViewWarnSync(comm, fd)); 1280 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1281 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1282 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1283 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1284 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1285 PetscCall(PetscGetUserName(username, sizeof(username))); 1286 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1287 PetscCall(PetscGetDate(date, sizeof(date))); 1288 PetscCall(PetscGetVersion(version, sizeof(version))); 1289 if (size == 1) { 1290 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1291 } else { 1292 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1293 } 1294 #if defined(PETSC_HAVE_OPENMP) 1295 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1296 #endif 1297 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1298 1299 /* Must preserve reduction count before we go on */ 1300 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1301 1302 /* Calculate summary information */ 1303 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1304 /* Time */ 1305 PetscCall(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1306 PetscCall(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1307 PetscCall(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1308 avg = tot / ((PetscLogDouble)size); 1309 if (min != 0.0) ratio = max / min; 1310 else ratio = 0.0; 1311 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1312 TotalTime = tot; 1313 /* Objects */ 1314 { 1315 PetscInt num_objects; 1316 1317 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 1318 avg = (PetscLogDouble)num_objects; 1319 } 1320 PetscCall(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1321 PetscCall(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1322 PetscCall(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1323 avg = tot / ((PetscLogDouble)size); 1324 if (min != 0.0) ratio = max / min; 1325 else ratio = 0.0; 1326 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1327 /* Flops */ 1328 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1329 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1330 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1331 avg = tot / ((PetscLogDouble)size); 1332 if (min != 0.0) ratio = max / min; 1333 else ratio = 0.0; 1334 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1335 TotalFlops = tot; 1336 /* Flops/sec -- Must talk to Barry here */ 1337 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1338 else flops = 0.0; 1339 PetscCall(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1340 PetscCall(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1341 PetscCall(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1342 avg = tot / ((PetscLogDouble)size); 1343 if (min != 0.0) ratio = max / min; 1344 else ratio = 0.0; 1345 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1346 /* Memory */ 1347 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1348 if (mem > 0.0) { 1349 PetscCall(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1350 PetscCall(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1351 PetscCall(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1352 avg = tot / ((PetscLogDouble)size); 1353 if (min != 0.0) ratio = max / min; 1354 else ratio = 0.0; 1355 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1356 } 1357 /* Messages */ 1358 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1359 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1360 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1361 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1362 avg = tot / ((PetscLogDouble)size); 1363 if (min != 0.0) ratio = max / min; 1364 else ratio = 0.0; 1365 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1366 numMessages = tot; 1367 /* Message Lengths */ 1368 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1369 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1370 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1371 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1372 if (numMessages != 0) avg = tot / numMessages; 1373 else avg = 0.0; 1374 if (min != 0.0) ratio = max / min; 1375 else ratio = 0.0; 1376 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1377 messageLength = tot; 1378 /* Reductions */ 1379 PetscCall(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1380 PetscCall(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1381 PetscCall(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1382 if (min != 0.0) ratio = max / min; 1383 else ratio = 0.0; 1384 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1385 numReductions = red; /* wrong because uses count from process zero */ 1386 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1387 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1388 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1389 1390 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 1391 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 1392 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 1393 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 1394 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 1395 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1396 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1397 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1398 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1399 if (numStages > 0) { 1400 for (stage = 0; stage < numStages; stage++) { 1401 PetscInt stage_id; 1402 1403 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1404 if (stage_id >= 0) { 1405 PetscStagePerf *stage_info; 1406 1407 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 1408 localStageUsed[stage] = stage_info->used; 1409 localStageVisible[stage] = stage_info->perfInfo.visible; 1410 } else { 1411 localStageUsed[stage] = PETSC_FALSE; 1412 localStageVisible[stage] = PETSC_TRUE; 1413 } 1414 } 1415 PetscCall(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1416 PetscCall(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1417 for (stage = 0; stage < numStages; stage++) { 1418 if (stageUsed[stage] && stageVisible[stage]) { 1419 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1420 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1421 break; 1422 } 1423 } 1424 for (stage = 0; stage < numStages; stage++) { 1425 PetscInt stage_id; 1426 PetscEventPerfInfo *stage_info; 1427 const char *stage_name; 1428 1429 if (!(stageUsed[stage] && stageVisible[stage])) continue; 1430 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1431 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1432 stage_info = &zero_info; 1433 if (localStageUsed[stage]) { 1434 PetscStagePerf *stage_perf_info; 1435 1436 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_perf_info)); 1437 stage_info = &stage_perf_info->perfInfo; 1438 } 1439 PetscCall(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1440 PetscCall(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1441 PetscCall(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1442 PetscCall(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1443 PetscCall(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1444 mess *= 0.5; 1445 messLen *= 0.5; 1446 red /= size; 1447 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1448 else fracTime = 0.0; 1449 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1450 else fracFlops = 0.0; 1451 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1452 if (numMessages != 0.0) fracMessages = mess / numMessages; 1453 else fracMessages = 0.0; 1454 if (mess != 0.0) avgMessLen = messLen / mess; 1455 else avgMessLen = 0.0; 1456 if (messageLength != 0.0) fracLength = messLen / messageLength; 1457 else fracLength = 0.0; 1458 if (numReductions != 0.0) fracReductions = red / numReductions; 1459 else fracReductions = 0.0; 1460 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, stage_name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1461 } 1462 } 1463 1464 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1465 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1466 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1467 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1468 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1469 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1470 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1471 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1472 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1473 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1474 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1475 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1476 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1477 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1478 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1479 if (PetscLogMemory) { 1480 PetscCall(PetscFPrintf(comm, fd, " Memory usage is summed over all MPI processes, it is given in mega-bytes\n")); 1481 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1482 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1483 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1484 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1485 } 1486 #if defined(PETSC_HAVE_DEVICE) 1487 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1488 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1489 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1490 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1491 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1492 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1493 #endif 1494 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1495 1496 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1497 1498 /* Report events */ 1499 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1500 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1501 #if defined(PETSC_HAVE_DEVICE) 1502 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1503 #endif 1504 PetscCall(PetscFPrintf(comm, fd, "\n")); 1505 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1506 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1507 #if defined(PETSC_HAVE_DEVICE) 1508 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1509 #endif 1510 PetscCall(PetscFPrintf(comm, fd, "\n")); 1511 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1512 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1513 #if defined(PETSC_HAVE_DEVICE) 1514 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1515 #endif 1516 PetscCall(PetscFPrintf(comm, fd, "\n")); 1517 1518 #if defined(PETSC_HAVE_DEVICE) 1519 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1520 PetscCall(PetscLogStateGetEventFromName(state, "TAOSolve", &TAO_Solve)); 1521 PetscCall(PetscLogStateGetEventFromName(state, "TSStep", &TS_Step)); 1522 PetscCall(PetscLogStateGetEventFromName(state, "SNESSolve", &SNES_Solve)); 1523 PetscCall(PetscLogStateGetEventFromName(state, "KSPSolve", &KSP_Solve)); 1524 #endif 1525 1526 for (stage = 0; stage < numStages; stage++) { 1527 PetscInt stage_id; 1528 PetscEventPerfInfo *stage_info; 1529 const char *stage_name; 1530 1531 if (!(stageVisible[stage] && stageUsed[stage])) continue; 1532 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1533 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1534 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stage_name)); 1535 stage_info = &zero_info; 1536 if (localStageUsed[stage]) { 1537 PetscStagePerf *stage_perf_info; 1538 1539 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_perf_info)); 1540 stage_info = &stage_perf_info->perfInfo; 1541 } 1542 PetscCall(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1543 PetscCall(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1544 PetscCall(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1545 PetscCall(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1546 PetscCall(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1547 mess *= 0.5; 1548 messLen *= 0.5; 1549 red /= size; 1550 1551 for (event = 0; event < numEvents; event++) { 1552 PetscInt event_id; 1553 PetscEventPerfInfo *event_info = &zero_info; 1554 PetscBool is_zero = PETSC_FALSE; 1555 const char *event_name; 1556 1557 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1558 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1559 if (event_id >= 0 && stage_id >= 0) { PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &event_info)); } 1560 PetscCall(PetscMemcmp(event_info, &zero_info, sizeof(zero_info), &is_zero)); 1561 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm)); 1562 if (!is_zero) { 1563 flopr = event_info->flops; 1564 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1565 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1566 PetscCall(MPIU_Allreduce(&event_info->flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1567 PetscCall(MPIU_Allreduce(&event_info->time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1568 PetscCall(MPIU_Allreduce(&event_info->time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1569 PetscCall(MPIU_Allreduce(&event_info->time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1570 PetscCall(MPIU_Allreduce(&event_info->numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1571 PetscCall(MPIU_Allreduce(&event_info->messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1572 PetscCall(MPIU_Allreduce(&event_info->numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1573 PetscCall(MPIU_Allreduce(&event_info->count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1574 PetscCall(MPIU_Allreduce(&event_info->count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1575 if (PetscLogMemory) { 1576 PetscCall(MPIU_Allreduce(&event_info->memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1577 PetscCall(MPIU_Allreduce(&event_info->mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1578 PetscCall(MPIU_Allreduce(&event_info->mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1579 PetscCall(MPIU_Allreduce(&event_info->mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1580 } 1581 #if defined(PETSC_HAVE_DEVICE) 1582 PetscCall(MPIU_Allreduce(&event_info->CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1583 PetscCall(MPIU_Allreduce(&event_info->GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1584 PetscCall(MPIU_Allreduce(&event_info->CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1585 PetscCall(MPIU_Allreduce(&event_info->GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1586 PetscCall(MPIU_Allreduce(&event_info->GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1587 PetscCall(MPIU_Allreduce(&event_info->GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1588 #endif 1589 if (mint < 0.0) { 1590 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, event_name)); 1591 mint = 0; 1592 } 1593 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, event_name); 1594 #if defined(PETSC_HAVE_DEVICE) 1595 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1596 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1597 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 1598 if (event_id != SNES_Solve && event_id != KSP_Solve && event_id != TS_Step && event_id != TAO_Solve) { 1599 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 1600 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 1601 } 1602 } 1603 #endif 1604 totm *= 0.5; 1605 totml *= 0.5; 1606 totr /= size; 1607 1608 if (maxC != 0) { 1609 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 1610 else ratC = 0.0; 1611 if (mint != 0.0) ratt = maxt / mint; 1612 else ratt = 0.0; 1613 if (minf != 0.0) ratf = maxf / minf; 1614 else ratf = 0.0; 1615 if (TotalTime != 0.0) fracTime = tott / TotalTime; 1616 else fracTime = 0.0; 1617 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 1618 else fracFlops = 0.0; 1619 if (stageTime != 0.0) fracStageTime = tott / stageTime; 1620 else fracStageTime = 0.0; 1621 if (flops != 0.0) fracStageFlops = totf / flops; 1622 else fracStageFlops = 0.0; 1623 if (numMessages != 0.0) fracMess = totm / numMessages; 1624 else fracMess = 0.0; 1625 if (messageLength != 0.0) fracMessLen = totml / messageLength; 1626 else fracMessLen = 0.0; 1627 if (numReductions != 0.0) fracRed = totr / numReductions; 1628 else fracRed = 0.0; 1629 if (mess != 0.0) fracStageMess = totm / mess; 1630 else fracStageMess = 0.0; 1631 if (messLen != 0.0) fracStageMessLen = totml / messLen; 1632 else fracStageMessLen = 0.0; 1633 if (red != 0.0) fracStageRed = totr / red; 1634 else fracStageRed = 0.0; 1635 if (totm != 0.0) totml /= totm; 1636 else totml = 0.0; 1637 if (maxt != 0.0) flopr = totf / maxt; 1638 else flopr = 0.0; 1639 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 1640 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 1641 else 1642 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 1643 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 1644 #if defined(PETSC_HAVE_DEVICE) 1645 if (totf != 0.0) fracgflops = gflops / totf; 1646 else fracgflops = 0.0; 1647 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 1648 else gflopr = 0.0; 1649 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1650 #endif 1651 PetscCall(PetscFPrintf(comm, fd, "\n")); 1652 } 1653 } 1654 } 1655 } 1656 1657 /* Memory usage and object creation */ 1658 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1659 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1660 #if defined(PETSC_HAVE_DEVICE) 1661 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1662 #endif 1663 PetscCall(PetscFPrintf(comm, fd, "\n")); 1664 PetscCall(PetscFPrintf(comm, fd, "\n")); 1665 1666 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1667 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1668 stats for stages local to processor sets. 1669 */ 1670 /* We should figure out the longest object name here (now 20 characters) */ 1671 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1672 for (stage = 0; stage < numStages; stage++) { 1673 const char *stage_name; 1674 1675 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1676 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stage_name)); 1677 if (localStageUsed[stage]) { 1678 PetscInt num_classes; 1679 1680 PetscCall(PetscLogStateGetNumClasses(state, &num_classes)); 1681 for (oclass = 0; oclass < num_classes; oclass++) { 1682 PetscClassPerf *class_perf_info; 1683 1684 PetscCall(PetscLogHandlerDefaultGetClassPerf(handler, stage, oclass, &class_perf_info)); 1685 if ((class_perf_info->creations > 0) || (class_perf_info->destructions > 0)) { 1686 PetscLogClassInfo class_reg_info; 1687 1688 PetscCall(PetscLogStateClassGetInfo(state, oclass, &class_reg_info)); 1689 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", class_reg_info.name, class_perf_info->creations, class_perf_info->destructions)); 1690 } 1691 } 1692 } 1693 } 1694 1695 PetscCall(PetscFree(localStageUsed)); 1696 PetscCall(PetscFree(stageUsed)); 1697 PetscCall(PetscFree(localStageVisible)); 1698 PetscCall(PetscFree(stageVisible)); 1699 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1700 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1701 1702 /* Information unrelated to this particular run */ 1703 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1704 PetscCall(PetscTime(&y)); 1705 PetscCall(PetscTime(&x)); 1706 PetscCall(PetscTime(&y)); 1707 PetscCall(PetscTime(&y)); 1708 PetscCall(PetscTime(&y)); 1709 PetscCall(PetscTime(&y)); 1710 PetscCall(PetscTime(&y)); 1711 PetscCall(PetscTime(&y)); 1712 PetscCall(PetscTime(&y)); 1713 PetscCall(PetscTime(&y)); 1714 PetscCall(PetscTime(&y)); 1715 PetscCall(PetscTime(&y)); 1716 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 1717 /* MPI information */ 1718 if (size > 1) { 1719 MPI_Status status; 1720 PetscMPIInt tag; 1721 MPI_Comm newcomm; 1722 1723 PetscCallMPI(MPI_Barrier(comm)); 1724 PetscCall(PetscTime(&x)); 1725 PetscCallMPI(MPI_Barrier(comm)); 1726 PetscCallMPI(MPI_Barrier(comm)); 1727 PetscCallMPI(MPI_Barrier(comm)); 1728 PetscCallMPI(MPI_Barrier(comm)); 1729 PetscCallMPI(MPI_Barrier(comm)); 1730 PetscCall(PetscTime(&y)); 1731 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 1732 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 1733 PetscCallMPI(MPI_Barrier(comm)); 1734 if (rank) { 1735 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 1736 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 1737 } else { 1738 PetscCall(PetscTime(&x)); 1739 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 1740 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 1741 PetscCall(PetscTime(&y)); 1742 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 1743 } 1744 PetscCall(PetscCommDestroy(&newcomm)); 1745 } 1746 PetscCall(PetscOptionsView(NULL, viewer)); 1747 1748 /* Machine and compile information */ 1749 if (PetscDefined(USE_FORTRAN_KERNELS)) { 1750 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 1751 } else { 1752 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 1753 } 1754 if (PetscDefined(USE_64BIT_INDICES)) { 1755 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64-bit PetscInt\n")); 1756 } else if (PetscDefined(USE___FLOAT128)) { 1757 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32-bit PetscInt\n")); 1758 } 1759 if (PetscDefined(USE_REAL_SINGLE)) { 1760 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 1761 } else if (PetscDefined(USE___FLOAT128)) { 1762 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 1763 } 1764 if (PetscDefined(USE_REAL_MAT_SINGLE)) { 1765 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 1766 } else { 1767 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 1768 } 1769 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 1770 1771 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 1772 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 1773 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 1774 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 1775 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 1776 1777 /* Cleanup */ 1778 PetscCall(PetscFPrintf(comm, fd, "\n")); 1779 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1780 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1781 PetscCall(PetscFPTrapPop()); 1782 PetscFunctionReturn(PETSC_SUCCESS); 1783 } 1784 1785 static PetscErrorCode PetscLogHandlerView_Default(PetscLogHandler handler, PetscViewer viewer) 1786 { 1787 PetscViewerFormat format; 1788 PetscFunctionBegin; 1789 PetscCall(PetscViewerGetFormat(viewer, &format)); 1790 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 1791 PetscCall(PetscLogHandlerView_Default_Info(handler, viewer)); 1792 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1793 PetscCall(PetscLogHandlerView_Default_Detailed(handler, viewer)); 1794 } else if (format == PETSC_VIEWER_ASCII_CSV) { 1795 PetscCall(PetscLogHandlerView_Default_CSV(handler, viewer)); 1796 } 1797 PetscFunctionReturn(PETSC_SUCCESS); 1798 } 1799 1800 /*MC 1801 PETSCLOGHANDLERDEFAULT - PETSCLOGHANDLERDEFAULT = "default" - A `PetscLogHandler` that collects data for PETSc 1802 default profiling log viewers (`PetscLogView()` and `PetscLogDump()`). A log handler of this type is 1803 created and started (`PetscLogHandlerStart()`) by `PetscLogDefaultBegin()`. 1804 1805 Options Database Keys: 1806 + -log_include_actions - include a growing list of actions (event beginnings and endings, object creations and destructions) in `PetscLogDump()` (`PetscLogActions()`). 1807 - -log_include_objects - include a growing list of object creations and destructions in `PetscLogDump()` (`PetscLogObjects()`). 1808 1809 Level: developer 1810 1811 .seealso: [](ch_profiling), `PetscLogHandler` 1812 M*/ 1813 1814 PETSC_INTERN PetscErrorCode PetscLogHandlerCreate_Default(PetscLogHandler handler) 1815 { 1816 PetscFunctionBegin; 1817 PetscCall(PetscLogHandlerContextCreate_Default((PetscLogHandler_Default *)&handler->data)); 1818 handler->ops->destroy = PetscLogHandlerDestroy_Default; 1819 handler->ops->eventbegin = PetscLogHandlerEventBegin_Default; 1820 handler->ops->eventend = PetscLogHandlerEventEnd_Default; 1821 handler->ops->eventsync = PetscLogHandlerEventSync_Default; 1822 handler->ops->objectcreate = PetscLogHandlerObjectCreate_Default; 1823 handler->ops->objectdestroy = PetscLogHandlerObjectDestroy_Default; 1824 handler->ops->stagepush = PetscLogHandlerStagePush_Default; 1825 handler->ops->stagepop = PetscLogHandlerStagePop_Default; 1826 handler->ops->view = PetscLogHandlerView_Default; 1827 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerGetEventPerfInfo_C", PetscLogHandlerGetEventPerfInfo_Default)); 1828 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerGetStagePerfInfo_C", PetscLogHandlerGetStagePerfInfo_Default)); 1829 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerSetLogActions_C", PetscLogHandlerSetLogActions_Default)); 1830 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerSetLogObjects_C", PetscLogHandlerSetLogObjects_Default)); 1831 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerLogObjectState_C", PetscLogHandlerLogObjectState_Default)); 1832 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerGetNumObjects_C", PetscLogHandlerGetNumObjects_Default)); 1833 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventDeactivatePush_C", PetscLogHandlerEventDeactivatePush_Default)); 1834 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventDeactivatePop_C", PetscLogHandlerEventDeactivatePop_Default)); 1835 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventsPause_C", PetscLogHandlerEventsPause_Default)); 1836 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventsResume_C", PetscLogHandlerEventsResume_Default)); 1837 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerDump_C", PetscLogHandlerDump_Default)); 1838 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerStageSetVisible_C", PetscLogHandlerStageSetVisible_Default)); 1839 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerStageGetVisible_C", PetscLogHandlerStageGetVisible_Default)); 1840 PetscFunctionReturn(PETSC_SUCCESS); 1841 } 1842