1 #include <petscviewer.h> 2 #include <petscdevice.h> 3 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 4 #include <petsc/private/loghandlerimpl.h> 5 #include <petsc/private/deviceimpl.h> 6 #include <petscconfiginfo.h> 7 #include <petscmachineinfo.h> 8 #include "logdefault.h" 9 10 static PetscErrorCode PetscEventPerfInfoInit(PetscEventPerfInfo *eventInfo) 11 { 12 PetscFunctionBegin; 13 PetscCall(PetscMemzero(eventInfo, sizeof(*eventInfo))); 14 eventInfo->visible = PETSC_TRUE; 15 eventInfo->id = -1; 16 eventInfo->dof[0] = -1.0; 17 eventInfo->dof[1] = -1.0; 18 eventInfo->dof[2] = -1.0; 19 eventInfo->dof[3] = -1.0; 20 eventInfo->dof[4] = -1.0; 21 eventInfo->dof[5] = -1.0; 22 eventInfo->dof[6] = -1.0; 23 eventInfo->dof[7] = -1.0; 24 eventInfo->errors[0] = -1.0; 25 eventInfo->errors[1] = -1.0; 26 eventInfo->errors[2] = -1.0; 27 eventInfo->errors[3] = -1.0; 28 eventInfo->errors[4] = -1.0; 29 eventInfo->errors[5] = -1.0; 30 eventInfo->errors[6] = -1.0; 31 eventInfo->errors[7] = -1.0; 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 static PetscErrorCode PetscEventPerfInfoTic_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool resume) 36 { 37 PetscFunctionBegin; 38 if (resume) { 39 eventInfo->timeTmp -= time; 40 eventInfo->flopsTmp -= petsc_TotalFlops_th; 41 } else { 42 eventInfo->timeTmp = -time; 43 eventInfo->flopsTmp = -petsc_TotalFlops_th; 44 } 45 eventInfo->numMessages -= petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th; 46 eventInfo->messageLength -= petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len_th + petsc_send_len_th; 47 eventInfo->numReductions -= petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th; 48 #if defined(PETSC_HAVE_DEVICE) 49 eventInfo->CpuToGpuCount -= petsc_ctog_ct_th; 50 eventInfo->GpuToCpuCount -= petsc_gtoc_ct_th; 51 eventInfo->CpuToGpuSize -= petsc_ctog_sz_th; 52 eventInfo->GpuToCpuSize -= petsc_gtoc_sz_th; 53 eventInfo->GpuFlops -= petsc_gflops_th; 54 eventInfo->GpuTime -= petsc_gtime; 55 if (PetscLogGpuEnergyFlag) eventInfo->GpuEnergy -= petsc_genergy; 56 if (PetscLogGpuEnergyMeterFlag) eventInfo->GpuEnergy -= petsc_genergy_meter; 57 #endif 58 if (logMemory) { 59 PetscLogDouble usage; 60 PetscCall(PetscMemoryGetCurrentUsage(&usage)); 61 eventInfo->memIncrease -= usage; 62 PetscCall(PetscMallocGetCurrentUsage(&usage)); 63 eventInfo->mallocSpace -= usage; 64 PetscCall(PetscMallocGetMaximumUsage(&usage)); 65 eventInfo->mallocIncrease -= usage; 66 PetscCall(PetscMallocPushMaximumUsage(event)); 67 } 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode PetscEventPerfInfoTic(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 72 { 73 PetscFunctionBegin; 74 PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 75 PetscFunctionReturn(PETSC_SUCCESS); 76 } 77 78 static PetscErrorCode PetscEventPerfInfoResume(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 79 { 80 PetscFunctionBegin; 81 PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_TRUE)); 82 PetscFunctionReturn(PETSC_SUCCESS); 83 } 84 85 static PetscErrorCode PetscEventPerfInfoToc_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool pause) 86 { 87 PetscFunctionBegin; 88 eventInfo->timeTmp += time; 89 eventInfo->flopsTmp += petsc_TotalFlops_th; 90 if (!pause) { 91 eventInfo->time += eventInfo->timeTmp; 92 eventInfo->time2 += eventInfo->timeTmp * eventInfo->timeTmp; 93 eventInfo->flops += eventInfo->flopsTmp; 94 eventInfo->flops2 += eventInfo->flopsTmp * eventInfo->flopsTmp; 95 } 96 eventInfo->numMessages += petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th; 97 eventInfo->messageLength += petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len + petsc_send_len_th; 98 eventInfo->numReductions += petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th; 99 #if defined(PETSC_HAVE_DEVICE) 100 eventInfo->CpuToGpuCount += petsc_ctog_ct_th; 101 eventInfo->GpuToCpuCount += petsc_gtoc_ct_th; 102 eventInfo->CpuToGpuSize += petsc_ctog_sz_th; 103 eventInfo->GpuToCpuSize += petsc_gtoc_sz_th; 104 eventInfo->GpuFlops += petsc_gflops_th; 105 eventInfo->GpuTime += petsc_gtime; 106 if (PetscLogGpuEnergyFlag) eventInfo->GpuEnergy += petsc_genergy; 107 if (PetscLogGpuEnergyMeterFlag) eventInfo->GpuEnergy += petsc_genergy_meter; 108 #endif 109 if (logMemory) { 110 PetscLogDouble usage, musage; 111 PetscCall(PetscMemoryGetCurrentUsage(&usage)); /* the comments below match the column labels printed in PetscLogView_Default() */ 112 eventInfo->memIncrease += usage; /* RMI */ 113 PetscCall(PetscMallocGetCurrentUsage(&usage)); 114 eventInfo->mallocSpace += usage; /* Malloc */ 115 PetscCall(PetscMallocPopMaximumUsage(event, &musage)); 116 eventInfo->mallocIncreaseEvent = PetscMax(musage - usage, eventInfo->mallocIncreaseEvent); /* EMalloc */ 117 PetscCall(PetscMallocGetMaximumUsage(&usage)); 118 eventInfo->mallocIncrease += usage; /* MMalloc */ 119 } 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode PetscEventPerfInfoToc(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 124 { 125 PetscFunctionBegin; 126 PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 static PetscErrorCode PetscEventPerfInfoPause(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 131 { 132 PetscFunctionBegin; 133 PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_TRUE)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 static PetscErrorCode PetscEventPerfInfoAdd_Internal(const PetscEventPerfInfo *eventInfo, PetscEventPerfInfo *outInfo) 138 { 139 PetscFunctionBegin; 140 outInfo->count += eventInfo->count; 141 outInfo->time += eventInfo->time; 142 outInfo->time2 += eventInfo->time2; 143 outInfo->flops += eventInfo->flops; 144 outInfo->flops2 += eventInfo->flops2; 145 outInfo->numMessages += eventInfo->numMessages; 146 outInfo->messageLength += eventInfo->messageLength; 147 outInfo->numReductions += eventInfo->numReductions; 148 #if defined(PETSC_HAVE_DEVICE) 149 outInfo->CpuToGpuCount += eventInfo->CpuToGpuCount; 150 outInfo->GpuToCpuCount += eventInfo->GpuToCpuCount; 151 outInfo->CpuToGpuSize += eventInfo->CpuToGpuSize; 152 outInfo->GpuToCpuSize += eventInfo->GpuToCpuSize; 153 outInfo->GpuFlops += eventInfo->GpuFlops; 154 outInfo->GpuTime += eventInfo->GpuTime; 155 outInfo->GpuEnergy += eventInfo->GpuEnergy; 156 #endif 157 outInfo->memIncrease += eventInfo->memIncrease; 158 outInfo->mallocSpace += eventInfo->mallocSpace; 159 outInfo->mallocIncreaseEvent += eventInfo->mallocIncreaseEvent; 160 outInfo->mallocIncrease += eventInfo->mallocIncrease; 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 PETSC_LOG_RESIZABLE_ARRAY(EventPerfArray, PetscEventPerfInfo, PetscLogEvent, PetscEventPerfInfoInit, NULL, NULL) 165 166 /* --- PetscClassPerf --- */ 167 168 typedef struct { 169 int creations; /* The number of objects of this class created */ 170 int destructions; /* The number of objects of this class destroyed */ 171 PetscLogDouble mem; /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */ 172 PetscLogDouble descMem; /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */ 173 } PetscClassPerf; 174 175 static PetscErrorCode PetscClassPerfInit(PetscClassPerf *classInfo) 176 { 177 PetscFunctionBegin; 178 PetscCall(PetscMemzero(classInfo, sizeof(*classInfo))); 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181 182 PETSC_LOG_RESIZABLE_ARRAY(ClassPerfArray, PetscClassPerf, PetscLogClass, PetscClassPerfInit, NULL, NULL) 183 184 /* --- PetscStagePerf --- */ 185 186 typedef struct _PetscStagePerf { 187 PetscBool used; /* The stage was pushed on this processor */ 188 PetscEventPerfInfo perfInfo; /* The stage performance information */ 189 PetscLogEventPerfArray eventLog; /* The event information for this stage */ 190 PetscLogClassPerfArray classLog; /* The class information for this stage */ 191 } PetscStagePerf; 192 193 static PetscErrorCode PetscStageInfoInit(PetscStagePerf *stageInfo) 194 { 195 PetscFunctionBegin; 196 PetscCall(PetscMemzero(stageInfo, sizeof(*stageInfo))); 197 PetscCall(PetscLogEventPerfArrayCreate(128, &stageInfo->eventLog)); 198 PetscCall(PetscLogClassPerfArrayCreate(128, &stageInfo->classLog)); 199 PetscCall(PetscEventPerfInfoInit(&stageInfo->perfInfo)); 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode PetscStageInfoReset(PetscStagePerf *stageInfo) 204 { 205 PetscFunctionBegin; 206 PetscCall(PetscLogEventPerfArrayDestroy(&stageInfo->eventLog)); 207 PetscCall(PetscLogClassPerfArrayDestroy(&stageInfo->classLog)); 208 PetscFunctionReturn(PETSC_SUCCESS); 209 } 210 211 PETSC_LOG_RESIZABLE_ARRAY(StageInfoArray, PetscStagePerf, PetscLogStage, PetscStageInfoInit, PetscStageInfoReset, NULL) 212 213 /* --- Action --- */ 214 215 /* The structure for action logging */ 216 typedef enum { 217 PETSC_LOG_ACTION_CREATE, 218 PETSC_LOG_ACTION_DESTROY, 219 PETSC_LOG_ACTION_BEGIN, 220 PETSC_LOG_ACTION_END, 221 } PetscLogActionType; 222 223 typedef struct _Action { 224 PetscLogActionType action; /* The type of execution */ 225 PetscLogEvent event; /* The event number */ 226 PetscClassId classid; /* The event class id */ 227 PetscLogDouble time; /* The time of occurrence */ 228 PetscLogDouble flops; /* The cumulative flops */ 229 PetscLogDouble mem; /* The current memory usage */ 230 PetscLogDouble maxmem; /* The maximum memory usage */ 231 PetscObjectId id1, id2, id3; /* The ids of associated objects */ 232 } Action; 233 234 PETSC_LOG_RESIZABLE_ARRAY(ActionArray, Action, PetscLogEvent, NULL, NULL, NULL) 235 236 /* --- Object --- */ 237 238 /* The structure for object logging */ 239 typedef struct _Object { 240 PetscObject obj; /* The associated PetscObject */ 241 int parent; /* The parent id */ 242 PetscLogDouble mem; /* The memory associated with the object */ 243 char name[64]; /* The object name */ 244 char info[64]; /* The information string */ 245 } Object; 246 247 PETSC_LOG_RESIZABLE_ARRAY(ObjectArray, Object, PetscObject, NULL, NULL, NULL) 248 249 /* Map from (threadid,stage,event) to perfInfo data struct */ 250 #include <petsc/private/hashmapijk.h> 251 252 PETSC_HASH_MAP(HMapEvent, PetscHashIJKKey, PetscEventPerfInfo *, PetscHashIJKKeyHash, PetscHashIJKKeyEqual, NULL) 253 254 typedef struct _n_PetscLogHandler_Default *PetscLogHandler_Default; 255 struct _n_PetscLogHandler_Default { 256 PetscLogStageInfoArray stages; 257 PetscSpinlock lock; 258 PetscLogActionArray petsc_actions; 259 PetscLogObjectArray petsc_objects; 260 PetscBool petsc_logActions; 261 PetscBool petsc_logObjects; 262 int petsc_numObjectsCreated; 263 int petsc_numObjectsDestroyed; 264 PetscHMapEvent eventInfoMap_th; 265 int pause_depth; 266 PetscBool use_threadsafe; 267 }; 268 269 /* --- PetscLogHandler_Default --- */ 270 271 static PetscErrorCode PetscLogHandlerContextCreate_Default(PetscLogHandler_Default *def_p) 272 { 273 PetscLogHandler_Default def; 274 275 PetscFunctionBegin; 276 PetscCall(PetscNew(def_p)); 277 def = *def_p; 278 PetscCall(PetscLogStageInfoArrayCreate(8, &def->stages)); 279 PetscCall(PetscLogActionArrayCreate(64, &def->petsc_actions)); 280 PetscCall(PetscLogObjectArrayCreate(64, &def->petsc_objects)); 281 PetscCall(PetscSpinlockCreate(&def->lock)); 282 283 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_actions", &def->petsc_logActions, NULL)); 284 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_objects", &def->petsc_logObjects, NULL)); 285 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_handler_default_use_threadsafe_events", &def->use_threadsafe, NULL)); 286 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) PetscCall(PetscHMapEventCreate(&def->eventInfoMap_th)); 287 PetscFunctionReturn(PETSC_SUCCESS); 288 } 289 290 static PetscErrorCode PetscLogHandlerDestroy_Default(PetscLogHandler h) 291 { 292 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 293 294 PetscFunctionBegin; 295 PetscCall(PetscLogStageInfoArrayDestroy(&def->stages)); 296 PetscCall(PetscLogActionArrayDestroy(&def->petsc_actions)); 297 PetscCall(PetscLogObjectArrayDestroy(&def->petsc_objects)); 298 PetscCall(PetscSpinlockDestroy(&def->lock)); 299 if (def->eventInfoMap_th) { 300 PetscEventPerfInfo **array; 301 PetscInt n, off = 0; 302 303 PetscCall(PetscHMapEventGetSize(def->eventInfoMap_th, &n)); 304 PetscCall(PetscMalloc1(n, &array)); 305 PetscCall(PetscHMapEventGetVals(def->eventInfoMap_th, &off, array)); 306 for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i])); 307 PetscCall(PetscFree(array)); 308 PetscCall(PetscHMapEventDestroy(&def->eventInfoMap_th)); 309 } 310 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerGetEventPerfInfo_C", NULL)); 311 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerGetStagePerfInfo_C", NULL)); 312 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerSetLogActions_C", NULL)); 313 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerSetLogObjects_C", NULL)); 314 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerLogObjectState_C", NULL)); 315 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerGetNumObjects_C", NULL)); 316 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventDeactivatePush_C", NULL)); 317 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventDeactivatePop_C", NULL)); 318 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventsPause_C", NULL)); 319 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventsResume_C", NULL)); 320 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerDump_C", NULL)); 321 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerStageSetVisible_C", NULL)); 322 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerStageGetVisible_C", NULL)); 323 PetscCall(PetscFree(def)); 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode PetscLogHandlerDefaultGetStageInfo(PetscLogHandler handler, PetscLogStage stage, PetscStagePerf **stage_info_p) 328 { 329 PetscStagePerf *stage_info = NULL; 330 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 331 332 PetscFunctionBegin; 333 PetscCall(PetscLogStageInfoArrayResize(def->stages, stage + 1)); 334 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 335 *stage_info_p = stage_info; 336 PetscFunctionReturn(PETSC_SUCCESS); 337 } 338 339 static PetscErrorCode PetscLogHandlerGetEventPerfInfo_Default(PetscLogHandler handler, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **event_info_p) 340 { 341 PetscEventPerfInfo *event_info = NULL; 342 PetscStagePerf *stage_info = NULL; 343 PetscLogEventPerfArray event_log; 344 345 PetscFunctionBegin; 346 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(handler->state, &stage)); 347 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 348 event_log = stage_info->eventLog; 349 PetscCall(PetscLogEventPerfArrayResize(event_log, event + 1)); 350 PetscCall(PetscLogEventPerfArrayGetRef(event_log, event, &event_info)); 351 event_info->id = event; 352 *event_info_p = event_info; 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 static PetscErrorCode PetscLogHandlerGetStagePerfInfo_Default(PetscLogHandler handler, PetscLogStage stage, PetscEventPerfInfo **stage_info_p) 357 { 358 PetscStagePerf *stage_perf_info = NULL; 359 360 PetscFunctionBegin; 361 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(handler->state, &stage)); 362 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_perf_info)); 363 *stage_info_p = &stage_perf_info->perfInfo; 364 PetscFunctionReturn(PETSC_SUCCESS); 365 } 366 367 static PetscErrorCode PetscLogHandlerDefaultGetClassPerf(PetscLogHandler handler, PetscLogStage stage, PetscLogClass clss, PetscClassPerf **class_info) 368 { 369 PetscLogClassPerfArray class_log; 370 PetscStagePerf *stage_info; 371 372 PetscFunctionBegin; 373 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 374 class_log = stage_info->classLog; 375 PetscCall(PetscLogClassPerfArrayResize(class_log, clss + 1)); 376 PetscCall(PetscLogClassPerfArrayGetRef(class_log, clss, class_info)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wconversion") 381 static PetscErrorCode PetscLogHandlerObjectCreate_Default(PetscLogHandler h, PetscObject obj) 382 { 383 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 384 PetscLogState state; 385 PetscLogStage stage; 386 PetscClassPerf *classInfo; 387 int oclass = 0; 388 389 PetscFunctionBegin; 390 PetscCall(PetscLogHandlerGetState(h, &state)); 391 PetscCall(PetscSpinlockLock(&def->lock)); 392 /* Record stage info */ 393 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 394 PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass)); 395 PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo)); 396 classInfo->creations++; 397 /* Record the creation action */ 398 if (def->petsc_logActions) { 399 Action new_action; 400 401 PetscCall(PetscTime(&new_action.time)); 402 new_action.time -= petsc_BaseTime; 403 new_action.action = PETSC_LOG_ACTION_CREATE; 404 new_action.event = -1; 405 new_action.classid = obj->classid; 406 new_action.id1 = obj->id; 407 new_action.id2 = -1; 408 new_action.id3 = -1; 409 new_action.flops = petsc_TotalFlops; 410 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 411 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 412 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 413 } 414 /* We don't just use obj->id to count all objects that are created 415 because PetscLogHandlers are objects and PetscLogObjectDestroy() will not 416 be called for them: the number of objects created and destroyed as counted 417 here and below would have an imbalance */ 418 def->petsc_numObjectsCreated++; 419 /* Record the object */ 420 if (def->petsc_logObjects) { 421 Object new_object; 422 PetscInt objid; 423 424 new_object.parent = -1; 425 new_object.obj = obj; 426 new_object.mem = 0; 427 428 PetscCall(PetscMemzero(new_object.name, sizeof(new_object.name))); 429 PetscCall(PetscMemzero(new_object.info, sizeof(new_object.info))); 430 PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1"); 431 PetscCall(PetscIntCast(obj->id, &objid)); 432 PetscCall(PetscLogObjectArrayResize(def->petsc_objects, objid)); 433 PetscCall(PetscLogObjectArraySet(def->petsc_objects, objid - 1, new_object)); 434 } 435 PetscCall(PetscSpinlockUnlock(&def->lock)); 436 PetscFunctionReturn(PETSC_SUCCESS); 437 } 438 439 static PetscErrorCode PetscLogHandlerObjectDestroy_Default(PetscLogHandler h, PetscObject obj) 440 { 441 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 442 PetscLogState state; 443 PetscLogStage stage; 444 PetscClassPerf *classInfo; 445 int oclass = 0; 446 447 PetscFunctionBegin; 448 PetscCall(PetscLogHandlerGetState(h, &state)); 449 /* Record stage info */ 450 PetscCall(PetscSpinlockLock(&def->lock)); 451 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 452 if (stage >= 0) { 453 /* stage < 0 can happen if the log summary is output before some things are destroyed */ 454 PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass)); 455 PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo)); 456 classInfo->destructions++; 457 } 458 /* Cannot Credit all ancestors with your memory because they may have already been destroyed*/ 459 def->petsc_numObjectsDestroyed++; 460 /* Dynamically enlarge logging structures */ 461 /* Record the destruction action */ 462 if (def->petsc_logActions) { 463 Action new_action; 464 465 PetscCall(PetscTime(&new_action.time)); 466 new_action.time -= petsc_BaseTime; 467 new_action.event = -1; 468 new_action.action = PETSC_LOG_ACTION_DESTROY; 469 new_action.classid = obj->classid; 470 new_action.id1 = obj->id; 471 new_action.id2 = -1; 472 new_action.id3 = -1; 473 new_action.flops = petsc_TotalFlops; 474 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 475 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 476 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 477 } 478 if (def->petsc_logObjects) { 479 Object *obj_entry = NULL; 480 PetscInt objid; 481 482 PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1"); 483 PetscCall(PetscIntCast(obj->id - 1, &objid)); 484 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, objid, &obj_entry)); 485 if (obj->name) PetscCall(PetscStrncpy(obj_entry->name, obj->name, 64)); 486 obj_entry->obj = NULL; 487 } 488 PetscCall(PetscSpinlockUnlock(&def->lock)); 489 PetscFunctionReturn(PETSC_SUCCESS); 490 } 491 492 static PetscErrorCode PetscLogHandlerEventSync_Default(PetscLogHandler h, PetscLogEvent event, MPI_Comm comm) 493 { 494 PetscLogState state; 495 PetscLogEventInfo event_info; 496 PetscEventPerfInfo *event_perf_info; 497 int stage; 498 PetscLogDouble time = 0.0; 499 500 PetscFunctionBegin; 501 if (!PetscLogSyncOn || comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS); 502 PetscCall(PetscLogHandlerGetState(h, &state)); 503 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 504 if (!event_info.collective) PetscFunctionReturn(PETSC_SUCCESS); 505 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 506 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 507 if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 508 509 PetscCall(PetscTimeSubtract(&time)); 510 PetscCallMPI(MPI_Barrier(comm)); 511 PetscCall(PetscTimeAdd(&time)); 512 event_perf_info->syncTime += time; 513 PetscFunctionReturn(PETSC_SUCCESS); 514 } 515 516 static PetscErrorCode PetscLogGetStageEventPerfInfo_threaded(PetscLogHandler_Default def, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **eventInfo) 517 { 518 PetscEventPerfInfo *leventInfo = NULL; 519 PetscHashIJKKey key; 520 521 PetscFunctionBegin; 522 #if PetscDefined(HAVE_THREADSAFETY) 523 key.i = PetscLogGetTid(); 524 #else 525 key.i = 0; 526 #endif 527 key.j = stage; 528 key.k = event; 529 PetscCall(PetscSpinlockLock(&def->lock)); 530 PetscCall(PetscHMapEventGet(def->eventInfoMap_th, key, &leventInfo)); 531 if (!leventInfo) { 532 PetscCall(PetscNew(&leventInfo)); 533 leventInfo->id = event; 534 PetscCall(PetscHMapEventSet(def->eventInfoMap_th, key, leventInfo)); 535 } 536 PetscCall(PetscSpinlockUnlock(&def->lock)); 537 *eventInfo = leventInfo; 538 PetscFunctionReturn(PETSC_SUCCESS); 539 } 540 541 static PetscErrorCode PetscLogHandlerEventBegin_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 542 { 543 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 544 PetscEventPerfInfo *event_perf_info = NULL; 545 PetscLogEventInfo event_info; 546 PetscLogDouble time; 547 PetscLogState state; 548 PetscLogStage stage; 549 550 PetscFunctionBegin; 551 PetscCall(PetscLogHandlerGetState(h, &state)); 552 if (PetscDefined(USE_DEBUG)) { 553 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 554 if (PetscUnlikely(o1)) PetscValidHeader(o1, 3); 555 if (PetscUnlikely(o2)) PetscValidHeader(o2, 4); 556 if (PetscUnlikely(o3)) PetscValidHeader(o3, 5); 557 if (PetscUnlikely(o4)) PetscValidHeader(o4, 6); 558 if (event_info.collective && o1) { 559 PetscInt64 b1[2], b2[2]; 560 561 b1[0] = -o1->cidx; 562 b1[1] = o1->cidx; 563 PetscCallMPI(MPIU_Allreduce(b1, b2, 2, MPIU_INT64, MPI_MAX, PetscObjectComm(o1))); 564 PetscCheck(-b2[0] == b2[1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Collective event %s not called collectively %" PetscInt64_FMT " != %" PetscInt64_FMT, event_info.name, -b2[0], b2[1]); 565 } 566 } 567 /* Synchronization */ 568 PetscCall(PetscLogHandlerEventSync_Default(h, event, PetscObjectComm(o1))); 569 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 570 if (def->pause_depth > 0) stage = 0; // in pause-mode, all events run on the main stage 571 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) { 572 PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info)); 573 if (event_perf_info->depth == 0) PetscCall(PetscEventPerfInfoInit(event_perf_info)); 574 } else { 575 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 576 } 577 PetscCheck(event_perf_info->depth >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to begin a paused event, this is not allowed"); 578 event_perf_info->depth++; 579 /* Check for double counting */ 580 if (event_perf_info->depth > 1) PetscFunctionReturn(PETSC_SUCCESS); 581 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 582 /* Log the performance info */ 583 event_perf_info->count++; 584 PetscCall(PetscTime(&time)); 585 PetscCall(PetscEventPerfInfoTic(event_perf_info, time, PetscLogMemory, event)); 586 if (def->petsc_logActions) { 587 PetscLogDouble curTime; 588 Action new_action; 589 590 PetscCall(PetscTime(&curTime)); 591 new_action.time = curTime - petsc_BaseTime; 592 new_action.action = PETSC_LOG_ACTION_BEGIN; 593 new_action.event = event; 594 new_action.classid = event_info.classid; 595 new_action.id1 = o1 ? o1->id : -1; 596 new_action.id2 = o2 ? o2->id : -1; 597 new_action.id3 = o3 ? o3->id : -1; 598 new_action.flops = petsc_TotalFlops; 599 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 600 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 601 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 602 } 603 PetscFunctionReturn(PETSC_SUCCESS); 604 } 605 606 static PetscErrorCode PetscLogHandlerEventEnd_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 607 { 608 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 609 PetscEventPerfInfo *event_perf_info = NULL; 610 PetscLogDouble time; 611 PetscLogState state; 612 int stage; 613 PetscLogEventInfo event_info; 614 615 PetscFunctionBegin; 616 PetscCall(PetscLogHandlerGetState(h, &state)); 617 if (PetscDefined(USE_DEBUG)) { 618 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 619 if (PetscUnlikely(o1)) PetscValidHeader(o1, 3); 620 if (PetscUnlikely(o2)) PetscValidHeader(o2, 4); 621 if (PetscUnlikely(o3)) PetscValidHeader(o3, 5); 622 if (PetscUnlikely(o4)) PetscValidHeader(o4, 6); 623 if (event_info.collective && o1) { 624 PetscInt64 b1[2], b2[2]; 625 626 b1[0] = -o1->cidx; 627 b1[1] = o1->cidx; 628 PetscCallMPI(MPIU_Allreduce(b1, b2, 2, MPIU_INT64, MPI_MAX, PetscObjectComm(o1))); 629 PetscCheck(-b2[0] == b2[1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Collective event %s not called collectively %" PetscInt64_FMT " != %" PetscInt64_FMT, event_info.name, -b2[0], b2[1]); 630 } 631 } 632 if (def->petsc_logActions) { 633 PetscLogDouble curTime; 634 Action new_action; 635 636 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 637 PetscCall(PetscTime(&curTime)); 638 new_action.time = curTime - petsc_BaseTime; 639 new_action.action = PETSC_LOG_ACTION_END; 640 new_action.event = event; 641 new_action.classid = event_info.classid; 642 new_action.id1 = o1 ? o1->id : -1; 643 new_action.id2 = o2 ? o2->id : -2; 644 new_action.id3 = o3 ? o3->id : -3; 645 new_action.flops = petsc_TotalFlops; 646 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 647 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 648 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 649 } 650 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 651 if (def->pause_depth > 0) stage = 0; // all events run on the main stage in pause-mode 652 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) { 653 PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info)); 654 } else { 655 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 656 } 657 PetscCheck(event_perf_info->depth > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to end paused event, not allowed"); 658 event_perf_info->depth--; 659 /* Check for double counting */ 660 if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 661 else PetscCheck(event_perf_info->depth == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Logging event had unbalanced begin/end pairs"); 662 663 /* Log performance info */ 664 PetscCall(PetscTime(&time)); 665 PetscCall(PetscEventPerfInfoToc(event_perf_info, time, PetscLogMemory, event)); 666 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) { 667 PetscEventPerfInfo *event_perf_info_global; 668 PetscCall(PetscSpinlockLock(&def->lock)); 669 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info_global)); 670 PetscCall(PetscEventPerfInfoAdd_Internal(event_perf_info, event_perf_info_global)); 671 PetscCall(PetscSpinlockUnlock(&def->lock)); 672 } 673 PetscFunctionReturn(PETSC_SUCCESS); 674 } 675 676 static PetscErrorCode PetscLogHandlerEventDeactivatePush_Default(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event) 677 { 678 PetscEventPerfInfo *event_perf_info; 679 680 PetscFunctionBegin; 681 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 682 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 683 event_perf_info->depth++; 684 PetscFunctionReturn(PETSC_SUCCESS); 685 } 686 687 static PetscErrorCode PetscLogHandlerEventDeactivatePop_Default(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event) 688 { 689 PetscEventPerfInfo *event_perf_info; 690 691 PetscFunctionBegin; 692 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 693 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 694 event_perf_info->depth--; 695 PetscFunctionReturn(PETSC_SUCCESS); 696 } 697 698 static PetscErrorCode PetscLogHandlerEventsPause_Default(PetscLogHandler h) 699 { 700 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 701 PetscLogDouble time; 702 PetscInt num_stages; 703 704 PetscFunctionBegin; 705 if (def->pause_depth++ > 0) PetscFunctionReturn(PETSC_SUCCESS); 706 PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL)); 707 PetscCall(PetscTime(&time)); 708 /* Pause stages in reverse of the order they were pushed */ 709 for (PetscInt stage = num_stages - 1; stage >= 0; stage--) { 710 PetscStagePerf *stage_info = NULL; 711 PetscInt num_events; 712 713 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 714 PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL)); 715 /* Pause events in reverse of the order they were pushed */ 716 for (PetscInt event = num_events - 1; event >= 0; event--) { 717 PetscEventPerfInfo *event_info = NULL; 718 PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info)); 719 if (event_info->depth > 0) { 720 event_info->depth *= -1; 721 PetscCall(PetscEventPerfInfoPause(event_info, time, PetscLogMemory, event)); 722 } 723 } 724 if (stage > 0 && stage_info->perfInfo.depth > 0) { 725 stage_info->perfInfo.depth *= -1; 726 PetscCall(PetscEventPerfInfoPause(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2))); 727 } 728 } 729 PetscFunctionReturn(PETSC_SUCCESS); 730 } 731 732 static PetscErrorCode PetscLogHandlerEventsResume_Default(PetscLogHandler h) 733 { 734 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 735 PetscLogDouble time; 736 PetscInt num_stages; 737 738 PetscFunctionBegin; 739 if (--def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 740 PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL)); 741 PetscCall(PetscTime(&time)); 742 /* Unpause stages in the same order they were pushed */ 743 for (PetscInt stage = 0; stage < num_stages; stage++) { 744 PetscStagePerf *stage_info = NULL; 745 PetscInt num_events; 746 747 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 748 PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL)); 749 /* Unpause events in the same order they were pushed */ 750 for (PetscInt event = 0; event < num_events; event++) { 751 PetscEventPerfInfo *event_info = NULL; 752 PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info)); 753 if (event_info->depth < 0) { 754 event_info->depth *= -1; 755 PetscCall(PetscEventPerfInfoResume(event_info, time, PetscLogMemory, event)); 756 } 757 } 758 if (stage > 0 && stage_info->perfInfo.depth < 0) { 759 stage_info->perfInfo.depth *= -1; 760 PetscCall(PetscEventPerfInfoResume(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2))); 761 } 762 } 763 PetscFunctionReturn(PETSC_SUCCESS); 764 } 765 766 static PetscErrorCode PetscLogHandlerStagePush_Default(PetscLogHandler h, PetscLogStage new_stage) 767 { 768 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 769 PetscLogDouble time; 770 PetscLogState state; 771 PetscLogStage current_stage; 772 PetscStagePerf *new_stage_info; 773 774 PetscFunctionBegin; 775 if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 776 PetscCall(PetscLogHandlerGetState(h, &state)); 777 current_stage = state->current_stage; 778 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, new_stage, &new_stage_info)); 779 PetscCall(PetscTime(&time)); 780 781 /* Record flops/time of previous stage */ 782 if (current_stage >= 0) { 783 if (PetscBTLookup(state->active, current_stage)) { 784 PetscStagePerf *current_stage_info; 785 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, ¤t_stage_info)); 786 PetscCall(PetscEventPerfInfoToc(¤t_stage_info->perfInfo, time, PetscLogMemory, -(current_stage + 2))); 787 } 788 } 789 new_stage_info->used = PETSC_TRUE; 790 new_stage_info->perfInfo.count++; 791 new_stage_info->perfInfo.depth++; 792 /* Subtract current quantities so that we obtain the difference when we pop */ 793 if (PetscBTLookup(state->active, new_stage)) PetscCall(PetscEventPerfInfoTic(&new_stage_info->perfInfo, time, PetscLogMemory, -(new_stage + 2))); 794 PetscFunctionReturn(PETSC_SUCCESS); 795 } 796 797 static PetscErrorCode PetscLogHandlerStagePop_Default(PetscLogHandler h, PetscLogStage old_stage) 798 { 799 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 800 PetscLogStage current_stage; 801 PetscStagePerf *old_stage_info; 802 PetscLogState state; 803 PetscLogDouble time; 804 805 PetscFunctionBegin; 806 if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 807 PetscCall(PetscLogHandlerGetState(h, &state)); 808 current_stage = state->current_stage; 809 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, old_stage, &old_stage_info)); 810 PetscCall(PetscTime(&time)); 811 old_stage_info->perfInfo.depth--; 812 if (PetscBTLookup(state->active, old_stage)) PetscCall(PetscEventPerfInfoToc(&old_stage_info->perfInfo, time, PetscLogMemory, -(old_stage + 2))); 813 if (current_stage >= 0) { 814 if (PetscBTLookup(state->active, current_stage)) { 815 PetscStagePerf *current_stage_info; 816 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, ¤t_stage_info)); 817 PetscCall(PetscEventPerfInfoTic(¤t_stage_info->perfInfo, time, PetscLogMemory, -(current_stage + 2))); 818 } 819 } 820 PetscFunctionReturn(PETSC_SUCCESS); 821 } 822 823 static PetscErrorCode PetscLogHandlerStageSetVisible_Default(PetscLogHandler h, PetscLogStage stage, PetscBool is_visible) 824 { 825 PetscStagePerf *stage_info; 826 827 PetscFunctionBegin; 828 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 829 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info)); 830 stage_info->perfInfo.visible = is_visible; 831 PetscFunctionReturn(PETSC_SUCCESS); 832 } 833 834 static PetscErrorCode PetscLogHandlerStageGetVisible_Default(PetscLogHandler h, PetscLogStage stage, PetscBool *is_visible) 835 { 836 PetscStagePerf *stage_info; 837 838 PetscFunctionBegin; 839 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 840 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info)); 841 *is_visible = stage_info->perfInfo.visible; 842 PetscFunctionReturn(PETSC_SUCCESS); 843 } 844 845 static PetscErrorCode PetscLogHandlerSetLogActions_Default(PetscLogHandler handler, PetscBool flag) 846 { 847 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 848 849 PetscFunctionBegin; 850 def->petsc_logActions = flag; 851 PetscFunctionReturn(PETSC_SUCCESS); 852 } 853 854 static PetscErrorCode PetscLogHandlerSetLogObjects_Default(PetscLogHandler handler, PetscBool flag) 855 { 856 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 857 858 PetscFunctionBegin; 859 def->petsc_logObjects = flag; 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode PetscLogHandlerLogObjectState_Default(PetscLogHandler handler, PetscObject obj, const char format[], va_list Argp) 864 { 865 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 866 size_t fullLength; 867 868 PetscFunctionBegin; 869 if (def->petsc_logObjects) { 870 Object *obj_entry = NULL; 871 PetscInt objid; 872 873 PetscCall(PetscIntCast(obj->id - 1, &objid)); 874 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, objid, &obj_entry)); 875 PetscCall(PetscVSNPrintf(obj_entry->info, 64, format, &fullLength, Argp)); 876 } 877 PetscFunctionReturn(PETSC_SUCCESS); 878 } 879 880 static PetscErrorCode PetscLogHandlerGetNumObjects_Default(PetscLogHandler handler, PetscInt *num_objects) 881 { 882 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 883 884 PetscFunctionBegin; 885 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, num_objects, NULL)); 886 PetscFunctionReturn(PETSC_SUCCESS); 887 } 888 889 static PetscErrorCode PetscLogHandlerDump_Default(PetscLogHandler handler, const char sname[]) 890 { 891 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 892 FILE *fd; 893 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 894 PetscLogDouble flops, _TotalTime; 895 PetscMPIInt rank; 896 int curStage; 897 PetscLogState state; 898 PetscInt num_events; 899 PetscLogEvent event; 900 901 PetscFunctionBegin; 902 /* Calculate the total elapsed time */ 903 PetscCall(PetscTime(&_TotalTime)); 904 _TotalTime -= petsc_BaseTime; 905 /* Open log file */ 906 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)handler), &rank)); 907 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 908 PetscCall(PetscFixFilename(file, fname)); 909 PetscCall(PetscFOpen(PETSC_COMM_SELF, fname, "w", &fd)); 910 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 911 /* Output totals */ 912 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 913 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Clock Resolution %g\n", 0.0)); 914 /* Output actions */ 915 if (def->petsc_logActions) { 916 PetscInt num_actions; 917 PetscCall(PetscLogActionArrayGetSize(def->petsc_actions, &num_actions, NULL)); 918 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Actions accomplished %" PetscInt_FMT "\n", num_actions)); 919 for (int a = 0; a < num_actions; a++) { 920 Action *action; 921 922 PetscCall(PetscLogActionArrayGetRef(def->petsc_actions, a, &action)); 923 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g %d %d %d %" PetscInt64_FMT " %" PetscInt64_FMT " %" PetscInt64_FMT " %g %g %g\n", action->time, action->action, action->event, action->classid, action->id1, action->id2, action->id3, action->flops, 924 action->mem, action->maxmem)); 925 } 926 } 927 /* Output objects */ 928 if (def->petsc_logObjects) { 929 PetscInt num_objects; 930 931 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 932 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Objects created %d destroyed %d\n", def->petsc_numObjectsCreated, def->petsc_numObjectsDestroyed)); 933 for (int o = 0; o < num_objects; o++) { 934 Object *object = NULL; 935 936 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, o, &object)); 937 if (object->parent != -1) continue; // object with this id wasn't logged, probably a PetscLogHandler 938 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Parent ID: %d Memory: %d\n", object->parent, (int)object->mem)); 939 if (!object->name[0]) { 940 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Name\n")); 941 } else { 942 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Name: %s\n", object->name)); 943 } 944 if (!object->info[0]) { 945 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Info\n")); 946 } else { 947 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Info: %s\n", object->info)); 948 } 949 } 950 } 951 /* Output events */ 952 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Event log:\n")); 953 PetscCall(PetscLogHandlerGetState(handler, &state)); 954 PetscCall(PetscLogStateGetNumEvents(state, &num_events)); 955 PetscCall(PetscLogStateGetCurrentStage(state, &curStage)); 956 for (event = 0; event < num_events; event++) { 957 PetscEventPerfInfo *event_info; 958 959 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, curStage, event, &event_info)); 960 if (event_info->time != 0.0) flops = event_info->flops / event_info->time; 961 else flops = 0.0; 962 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%d %16d %16g %16g %16g\n", event, event_info->count, event_info->flops, event_info->time, flops)); 963 } 964 PetscCall(PetscFClose(PETSC_COMM_SELF, fd)); 965 PetscFunctionReturn(PETSC_SUCCESS); 966 } 967 968 /* 969 PetscLogView_Detailed - Each process prints the times for its own events 970 971 */ 972 static PetscErrorCode PetscLogHandlerView_Default_Detailed(PetscLogHandler handler, PetscViewer viewer) 973 { 974 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 975 PetscLogDouble locTotalTime, numRed, maxMem; 976 PetscInt numStages, numEvents; 977 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 978 PetscMPIInt rank, size; 979 PetscLogGlobalNames global_stages, global_events; 980 PetscLogState state; 981 PetscEventPerfInfo zero_info; 982 983 PetscFunctionBegin; 984 PetscCall(PetscLogHandlerGetState(handler, &state)); 985 PetscCallMPI(MPI_Comm_size(comm, &size)); 986 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 987 /* Must preserve reduction count before we go on */ 988 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 989 /* Get the total elapsed time */ 990 PetscCall(PetscTime(&locTotalTime)); 991 locTotalTime -= petsc_BaseTime; 992 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 993 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 994 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 995 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 996 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 997 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 998 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 999 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 1000 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 1001 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 1002 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 1003 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 1004 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 1005 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1006 for (PetscInt stage = 0; stage < numStages; stage++) { 1007 PetscInt stage_id; 1008 const char *stage_name; 1009 1010 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1011 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1012 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stage_name)); 1013 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stage_name)); 1014 for (PetscInt event = 0; event < numEvents; event++) { 1015 PetscEventPerfInfo *eventInfo = &zero_info; 1016 PetscBool is_zero = PETSC_FALSE; 1017 PetscInt event_id; 1018 const char *event_name; 1019 1020 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1021 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1022 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &eventInfo)); 1023 is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE; 1024 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPI_C_BOOL, MPI_LAND, comm)); 1025 if (!is_zero) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stage_name, event_name)); 1026 } 1027 } 1028 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1029 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1030 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1031 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct)); 1032 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len)); 1033 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1034 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1035 { 1036 PetscInt num_objects; 1037 1038 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 1039 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %" PetscInt_FMT "\n", rank, num_objects)); 1040 } 1041 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1042 PetscCall(PetscViewerFlush(viewer)); 1043 for (PetscInt stage = 0; stage < numStages; stage++) { 1044 PetscEventPerfInfo *stage_perf_info = &zero_info; 1045 PetscInt stage_id; 1046 const char *stage_name; 1047 1048 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1049 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1050 if (stage_id >= 0) { 1051 PetscStagePerf *stage_info; 1052 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info)); 1053 stage_perf_info = &stage_info->perfInfo; 1054 } 1055 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stage_name, rank, stage_perf_info->time, 1056 stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops)); 1057 for (PetscInt event = 0; event < numEvents; event++) { 1058 PetscEventPerfInfo *eventInfo = &zero_info; 1059 PetscBool is_zero = PETSC_FALSE; 1060 PetscInt event_id; 1061 const char *event_name; 1062 1063 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1064 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1065 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &eventInfo)); 1066 is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE; 1067 PetscCall(PetscArraycmp(eventInfo, &zero_info, 1, &is_zero)); 1068 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPI_C_BOOL, MPI_LAND, comm)); 1069 if (!is_zero) { 1070 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", stage_name, event_name, rank, 1071 eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops)); 1072 if (eventInfo->dof[0] >= 0.) { 1073 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1074 for (PetscInt d = 0; d < 8; ++d) { 1075 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1076 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1077 } 1078 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1079 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1080 for (PetscInt e = 0; e < 8; ++e) { 1081 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1082 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1083 } 1084 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1085 } 1086 } 1087 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1088 } 1089 } 1090 PetscCall(PetscViewerFlush(viewer)); 1091 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1092 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1093 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1094 PetscFunctionReturn(PETSC_SUCCESS); 1095 } 1096 1097 /* 1098 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1099 */ 1100 static PetscErrorCode PetscLogHandlerView_Default_CSV(PetscLogHandler handler, PetscViewer viewer) 1101 { 1102 PetscLogDouble locTotalTime, maxMem; 1103 PetscInt numStages, numEvents, stage, event; 1104 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1105 PetscMPIInt rank, size; 1106 PetscLogGlobalNames global_stages, global_events; 1107 PetscLogState state; 1108 PetscEventPerfInfo zero_info; 1109 1110 PetscFunctionBegin; 1111 PetscCall(PetscLogHandlerGetState(handler, &state)); 1112 PetscCallMPI(MPI_Comm_size(comm, &size)); 1113 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1114 /* Must preserve reduction count before we go on */ 1115 /* Get the total elapsed time */ 1116 PetscCall(PetscTime(&locTotalTime)); 1117 locTotalTime -= petsc_BaseTime; 1118 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1119 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 1120 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 1121 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 1122 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 1123 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 1124 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1125 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1126 PetscCall(PetscViewerFlush(viewer)); 1127 for (stage = 0; stage < numStages; stage++) { 1128 PetscEventPerfInfo *stage_perf_info; 1129 PetscInt stage_id; 1130 const char *stage_name; 1131 1132 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1133 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1134 stage_perf_info = &zero_info; 1135 if (stage_id >= 0) { 1136 PetscStagePerf *stage_info; 1137 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info)); 1138 stage_perf_info = &stage_info->perfInfo; 1139 } 1140 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stage_name, rank, stage_perf_info->time, stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops)); 1141 for (event = 0; event < numEvents; event++) { 1142 PetscEventPerfInfo *eventInfo = &zero_info; 1143 PetscBool is_zero = PETSC_FALSE; 1144 PetscInt event_id; 1145 const char *event_name; 1146 1147 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1148 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1149 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &eventInfo)); 1150 PetscCall(PetscArraycmp(eventInfo, &zero_info, 1, &is_zero)); 1151 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPI_C_BOOL, MPI_LAND, comm)); 1152 if (!is_zero) { 1153 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stage_name, event_name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops)); 1154 if (eventInfo->dof[0] >= 0.) { 1155 for (PetscInt d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1156 for (PetscInt e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1157 } 1158 } 1159 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1160 } 1161 } 1162 PetscCall(PetscViewerFlush(viewer)); 1163 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1164 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1165 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1166 PetscFunctionReturn(PETSC_SUCCESS); 1167 } 1168 1169 static PetscErrorCode PetscLogViewWarnSync(PetscViewer viewer) 1170 { 1171 PetscFunctionBegin; 1172 if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS); 1173 PetscCall(PetscViewerASCIIPrintf(viewer, "\n\n")); 1174 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n")); 1175 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1176 PetscCall(PetscViewerASCIIPrintf(viewer, " # WARNING!!! #\n")); 1177 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1178 PetscCall(PetscViewerASCIIPrintf(viewer, " # This program was run with logging synchronization. #\n")); 1179 PetscCall(PetscViewerASCIIPrintf(viewer, " # This option provides more meaningful imbalance #\n")); 1180 PetscCall(PetscViewerASCIIPrintf(viewer, " # figures at the expense of slowing things down and #\n")); 1181 PetscCall(PetscViewerASCIIPrintf(viewer, " # providing a distorted view of the overall runtime. #\n")); 1182 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1183 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n\n\n")); 1184 PetscFunctionReturn(PETSC_SUCCESS); 1185 } 1186 1187 static PetscErrorCode PetscLogViewWarnDebugging(PetscViewer viewer) 1188 { 1189 PetscFunctionBegin; 1190 if (PetscDefined(USE_DEBUG)) { 1191 PetscCall(PetscViewerASCIIPrintf(viewer, "\n\n")); 1192 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n")); 1193 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1194 PetscCall(PetscViewerASCIIPrintf(viewer, " # WARNING!!! #\n")); 1195 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1196 PetscCall(PetscViewerASCIIPrintf(viewer, " # This code was compiled with a debugging option. #\n")); 1197 PetscCall(PetscViewerASCIIPrintf(viewer, " # To get timing results run ./configure #\n")); 1198 PetscCall(PetscViewerASCIIPrintf(viewer, " # using --with-debugging=no, the performance will #\n")); 1199 PetscCall(PetscViewerASCIIPrintf(viewer, " # be generally two or three times faster. #\n")); 1200 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1201 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n\n\n")); 1202 } 1203 PetscFunctionReturn(PETSC_SUCCESS); 1204 } 1205 1206 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(PetscViewer viewer) 1207 { 1208 #if defined(PETSC_HAVE_DEVICE) 1209 PetscMPIInt size; 1210 PetscBool deviceInitialized = PETSC_FALSE; 1211 1212 PetscFunctionBegin; 1213 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size)); 1214 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1215 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1216 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1217 deviceInitialized = PETSC_TRUE; 1218 break; 1219 } 1220 } 1221 /* the last condition says PETSc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1222 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS); 1223 PetscCall(PetscViewerASCIIPrintf(viewer, "\n\n")); 1224 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n")); 1225 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1226 PetscCall(PetscViewerASCIIPrintf(viewer, " # WARNING!!! #\n")); 1227 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1228 PetscCall(PetscViewerASCIIPrintf(viewer, " # This code was compiled with GPU support and you've #\n")); 1229 PetscCall(PetscViewerASCIIPrintf(viewer, " # created PETSc/GPU objects, but you intentionally #\n")); 1230 PetscCall(PetscViewerASCIIPrintf(viewer, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1231 PetscCall(PetscViewerASCIIPrintf(viewer, " # additional data between the GPU and CPU. To obtain #\n")); 1232 PetscCall(PetscViewerASCIIPrintf(viewer, " # meaningful timing results on multi-rank runs, use #\n")); 1233 PetscCall(PetscViewerASCIIPrintf(viewer, " # GPU-aware MPI instead. #\n")); 1234 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1235 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n\n\n")); 1236 PetscFunctionReturn(PETSC_SUCCESS); 1237 #else 1238 return PETSC_SUCCESS; 1239 #endif 1240 } 1241 1242 static PetscErrorCode PetscLogViewWarnGpuTime(PetscViewer viewer) 1243 { 1244 #if defined(PETSC_HAVE_DEVICE) && !defined(PETSC_HAVE_KOKKOS_WITHOUT_GPU) 1245 1246 PetscFunctionBegin; 1247 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS); 1248 PetscCall(PetscViewerASCIIPrintf(viewer, "\n\n")); 1249 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n")); 1250 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1251 PetscCall(PetscViewerASCIIPrintf(viewer, " # WARNING!!! #\n")); 1252 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1253 PetscCall(PetscViewerASCIIPrintf(viewer, " # This code was run with -log_view_gpu_time #\n")); 1254 PetscCall(PetscViewerASCIIPrintf(viewer, " # This provides accurate timing within the GPU kernels #\n")); 1255 PetscCall(PetscViewerASCIIPrintf(viewer, " # but can slow down the entire computation by a #\n")); 1256 PetscCall(PetscViewerASCIIPrintf(viewer, " # measurable amount. For fastest runs we recommend #\n")); 1257 PetscCall(PetscViewerASCIIPrintf(viewer, " # not using this option. #\n")); 1258 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1259 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n\n\n")); 1260 PetscFunctionReturn(PETSC_SUCCESS); 1261 #else 1262 return PETSC_SUCCESS; 1263 #endif 1264 } 1265 1266 PETSC_INTERN int PetscGlobalArgc; 1267 PETSC_INTERN char **PetscGlobalArgs; 1268 1269 static PetscErrorCode PetscLogHandlerView_Default_Info(PetscLogHandler handler, PetscViewer viewer) 1270 { 1271 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 1272 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1273 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1274 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1275 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1276 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1277 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1278 PetscLogDouble min, max, tot, ratio, avg, x, y; 1279 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1280 #if defined(PETSC_HAVE_DEVICE) 1281 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1282 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops, gjoules, geff; 1283 #endif 1284 PetscMPIInt minC, maxC; 1285 PetscMPIInt size, rank; 1286 PetscBool *localStageUsed, *stageUsed; 1287 PetscBool *localStageVisible, *stageVisible; 1288 PetscInt numStages, numEvents; 1289 int stage, oclass; 1290 PetscLogEvent event; 1291 char version[256]; 1292 MPI_Comm comm; 1293 #if defined(PETSC_HAVE_DEVICE) && !defined(PETSC_HAVE_KOKKOS_WITHOUT_GPU) 1294 PetscInt64 nas = 0x7FF0000000000002; 1295 #endif 1296 PetscLogGlobalNames global_stages, global_events; 1297 PetscEventPerfInfo zero_info; 1298 PetscLogState state; 1299 1300 PetscFunctionBegin; 1301 PetscCall(PetscLogHandlerGetState(handler, &state)); 1302 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1303 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1304 PetscCallMPI(MPI_Comm_size(comm, &size)); 1305 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1306 /* Get the total elapsed time */ 1307 PetscCall(PetscTime(&locTotalTime)); 1308 locTotalTime -= petsc_BaseTime; 1309 1310 PetscCall(PetscViewerASCIIPrintf(viewer, "****************************************************************************************************************************************************************\n")); 1311 PetscCall(PetscViewerASCIIPrintf(viewer, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "****************************************************************************************************************************************************************\n")); 1313 PetscCall(PetscViewerASCIIPrintf(viewer, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1314 PetscCall(PetscLogViewWarnSync(viewer)); 1315 PetscCall(PetscLogViewWarnDebugging(viewer)); 1316 PetscCall(PetscLogViewWarnNoGpuAwareMpi(viewer)); 1317 PetscCall(PetscLogViewWarnGpuTime(viewer)); 1318 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1319 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1320 PetscCall(PetscGetUserName(username, sizeof(username))); 1321 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1322 PetscCall(PetscGetDate(date, sizeof(date))); 1323 PetscCall(PetscGetVersion(version, sizeof(version))); 1324 1325 #if defined(PETSC_HAVE_CUPM) 1326 const char *cupm = PetscDefined(HAVE_CUDA) ? "CUDA" : "HIP"; 1327 if (PetscDeviceCUPMRuntimeArch) 1328 PetscCall(PetscViewerASCIIPrintf(viewer, "%s on a %s named %s with %d process%s and %s architecture %d, by %s on %s\n", pname, arch, hostname, size, size > 1 ? "es" : "", cupm, PetscDeviceCUPMRuntimeArch, username, date)); 1329 else 1330 #endif 1331 PetscCall(PetscViewerASCIIPrintf(viewer, "%s on a %s named %s with %d process%s, by %s on %s\n", pname, arch, hostname, size, size > 1 ? "es" : "", username, date)); 1332 1333 #if defined(PETSC_HAVE_OPENMP) 1334 PetscCall(PetscViewerASCIIPrintf(viewer, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1335 #endif 1336 PetscCall(PetscViewerASCIIPrintf(viewer, "Using %s\n", version)); 1337 1338 /* Must preserve reduction count before we go on */ 1339 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1340 1341 /* Calculate summary information */ 1342 PetscCall(PetscViewerASCIIPrintf(viewer, "\n Max Max/Min Avg Total\n")); 1343 /* Time */ 1344 PetscCallMPI(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1345 PetscCallMPI(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1346 PetscCallMPI(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1347 avg = tot / ((PetscLogDouble)size); 1348 if (min != 0.0) ratio = max / min; 1349 else ratio = 0.0; 1350 PetscCall(PetscViewerASCIIPrintf(viewer, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1351 TotalTime = tot; 1352 /* Objects */ 1353 { 1354 PetscInt num_objects; 1355 1356 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 1357 avg = (PetscLogDouble)num_objects; 1358 } 1359 PetscCallMPI(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1360 PetscCallMPI(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1361 PetscCallMPI(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1362 avg = tot / ((PetscLogDouble)size); 1363 if (min != 0.0) ratio = max / min; 1364 else ratio = 0.0; 1365 PetscCall(PetscViewerASCIIPrintf(viewer, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1366 /* Flops */ 1367 PetscCallMPI(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1368 PetscCallMPI(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1369 PetscCallMPI(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1370 avg = tot / ((PetscLogDouble)size); 1371 if (min != 0.0) ratio = max / min; 1372 else ratio = 0.0; 1373 PetscCall(PetscViewerASCIIPrintf(viewer, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1374 TotalFlops = tot; 1375 /* Flops/sec -- Must talk to Barry here */ 1376 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1377 else flops = 0.0; 1378 PetscCallMPI(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1379 PetscCallMPI(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1380 PetscCallMPI(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1381 avg = tot / ((PetscLogDouble)size); 1382 if (min != 0.0) ratio = max / min; 1383 else ratio = 0.0; 1384 PetscCall(PetscViewerASCIIPrintf(viewer, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1385 /* Memory */ 1386 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1387 if (mem > 0.0) { 1388 PetscCallMPI(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1389 PetscCallMPI(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1390 PetscCallMPI(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1391 avg = tot / ((PetscLogDouble)size); 1392 if (min != 0.0) ratio = max / min; 1393 else ratio = 0.0; 1394 PetscCall(PetscViewerASCIIPrintf(viewer, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1395 } 1396 /* Messages */ 1397 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1398 PetscCallMPI(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1399 PetscCallMPI(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1400 PetscCallMPI(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1401 avg = tot / ((PetscLogDouble)size); 1402 if (min != 0.0) ratio = max / min; 1403 else ratio = 0.0; 1404 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1405 numMessages = tot; 1406 /* Message Lengths */ 1407 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1408 PetscCallMPI(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1409 PetscCallMPI(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1410 PetscCallMPI(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1411 if (numMessages != 0) avg = tot / numMessages; 1412 else avg = 0.0; 1413 if (min != 0.0) ratio = max / min; 1414 else ratio = 0.0; 1415 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1416 messageLength = tot; 1417 /* Reductions */ 1418 PetscCallMPI(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1419 PetscCallMPI(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1420 PetscCallMPI(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1421 if (min != 0.0) ratio = max / min; 1422 else ratio = 0.0; 1423 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1424 numReductions = red; /* wrong because uses count from process zero */ 1425 PetscCall(PetscViewerASCIIPrintf(viewer, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1426 PetscCall(PetscViewerASCIIPrintf(viewer, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1427 PetscCall(PetscViewerASCIIPrintf(viewer, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1428 1429 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 1430 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 1431 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 1432 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 1433 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 1434 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1435 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1436 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1437 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1438 if (numStages > 0) { 1439 for (stage = 0; stage < numStages; stage++) { 1440 PetscInt stage_id; 1441 1442 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1443 if (stage_id >= 0) { 1444 PetscStagePerf *stage_info; 1445 1446 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 1447 localStageUsed[stage] = stage_info->used; 1448 localStageVisible[stage] = stage_info->perfInfo.visible; 1449 } else { 1450 localStageUsed[stage] = PETSC_FALSE; 1451 localStageVisible[stage] = PETSC_TRUE; 1452 } 1453 } 1454 PetscCallMPI(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPI_C_BOOL, MPI_LOR, comm)); 1455 PetscCallMPI(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPI_C_BOOL, MPI_LAND, comm)); 1456 for (stage = 0; stage < numStages; stage++) { 1457 if (stageUsed[stage] && stageVisible[stage]) { 1458 PetscCall(PetscViewerASCIIPrintf(viewer, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1459 PetscCall(PetscViewerASCIIPrintf(viewer, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1460 break; 1461 } 1462 } 1463 for (stage = 0; stage < numStages; stage++) { 1464 PetscInt stage_id; 1465 PetscEventPerfInfo *stage_info; 1466 const char *stage_name; 1467 1468 if (!(stageUsed[stage] && stageVisible[stage])) continue; 1469 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1470 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1471 stage_info = &zero_info; 1472 if (localStageUsed[stage]) { 1473 PetscStagePerf *stage_perf_info; 1474 1475 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_perf_info)); 1476 stage_info = &stage_perf_info->perfInfo; 1477 } 1478 PetscCallMPI(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1479 PetscCallMPI(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1480 PetscCallMPI(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1481 PetscCallMPI(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1482 PetscCallMPI(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1483 mess *= 0.5; 1484 messLen *= 0.5; 1485 red /= size; 1486 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1487 else fracTime = 0.0; 1488 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1489 else fracFlops = 0.0; 1490 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1491 if (numMessages != 0.0) fracMessages = mess / numMessages; 1492 else fracMessages = 0.0; 1493 if (mess != 0.0) avgMessLen = messLen / mess; 1494 else avgMessLen = 0.0; 1495 if (messageLength != 0.0) fracLength = messLen / messageLength; 1496 else fracLength = 0.0; 1497 if (numReductions != 0.0) fracReductions = red / numReductions; 1498 else fracReductions = 0.0; 1499 PetscCall(PetscViewerASCIIPrintf(viewer, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, stage_name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1500 } 1501 } 1502 1503 PetscCall(PetscViewerASCIIPrintf(viewer, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1504 PetscCall(PetscViewerASCIIPrintf(viewer, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1505 PetscCall(PetscViewerASCIIPrintf(viewer, "Phase summary info:\n")); 1506 PetscCall(PetscViewerASCIIPrintf(viewer, " Count: number of times phase was executed\n")); 1507 PetscCall(PetscViewerASCIIPrintf(viewer, " Time and Flop: Max - maximum over all processes\n")); 1508 PetscCall(PetscViewerASCIIPrintf(viewer, " Ratio - ratio of maximum to minimum over all processes\n")); 1509 PetscCall(PetscViewerASCIIPrintf(viewer, " Mess: number of messages sent\n")); 1510 PetscCall(PetscViewerASCIIPrintf(viewer, " AvgLen: average message length (bytes)\n")); 1511 PetscCall(PetscViewerASCIIPrintf(viewer, " Reduct: number of global reductions\n")); 1512 PetscCall(PetscViewerASCIIPrintf(viewer, " Global: entire computation\n")); 1513 PetscCall(PetscViewerASCIIPrintf(viewer, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1514 PetscCall(PetscViewerASCIIPrintf(viewer, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1515 PetscCall(PetscViewerASCIIPrintf(viewer, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1516 PetscCall(PetscViewerASCIIPrintf(viewer, " %%R - percent reductions in this phase\n")); 1517 PetscCall(PetscViewerASCIIPrintf(viewer, " Total Mflop/s: 1e-6 * (sum of flop over all processes)/(max time over all processes)\n")); 1518 if (PetscLogMemory) { 1519 PetscCall(PetscViewerASCIIPrintf(viewer, " Memory usage is summed over all MPI processes, it is given in mega-bytes\n")); 1520 PetscCall(PetscViewerASCIIPrintf(viewer, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1521 PetscCall(PetscViewerASCIIPrintf(viewer, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1522 PetscCall(PetscViewerASCIIPrintf(viewer, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1523 PetscCall(PetscViewerASCIIPrintf(viewer, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1524 } 1525 #if defined(PETSC_HAVE_DEVICE) 1526 PetscCall(PetscViewerASCIIPrintf(viewer, " GPU Mflop/s: 1e-6 * (sum of flop on GPU over all processes)/(max GPU time over all processes)\n")); 1527 PetscCall(PetscViewerASCIIPrintf(viewer, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1528 PetscCall(PetscViewerASCIIPrintf(viewer, " CpuToGpu Size (Mbytes): 1e-6 * (total size of CPU to GPU copies per processor)\n")); 1529 PetscCall(PetscViewerASCIIPrintf(viewer, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1530 PetscCall(PetscViewerASCIIPrintf(viewer, " GpuToCpu Size (Mbytes): 1e-6 * (total size of GPU to CPU copies per processor)\n")); 1531 PetscCall(PetscViewerASCIIPrintf(viewer, " GPU %%F: percent flops on GPU in this event\n")); 1532 #endif 1533 PetscCall(PetscViewerASCIIPrintf(viewer, "------------------------------------------------------------------------------------------------------------------------\n")); 1534 1535 PetscCall(PetscLogViewWarnDebugging(viewer)); 1536 1537 /* Report events */ 1538 PetscCall(PetscViewerASCIIPrintf(viewer, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1539 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, " Malloc EMalloc MMalloc RMI")); 1540 #if defined(PETSC_HAVE_DEVICE) 1541 PetscCall(PetscViewerASCIIPrintf(viewer, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1542 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCall(PetscViewerASCIIPrintf(viewer, " GPU GPU")); 1543 #endif 1544 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1545 PetscCall(PetscViewerASCIIPrintf(viewer, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1546 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, " Mbytes Mbytes Mbytes Mbytes")); 1547 #if defined(PETSC_HAVE_DEVICE) 1548 PetscCall(PetscViewerASCIIPrintf(viewer, " Mflop/s Count Size Count Size %%F")); 1549 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCall(PetscViewerASCIIPrintf(viewer, " Joule Mflop/J")); 1550 #endif 1551 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1552 PetscCall(PetscViewerASCIIPrintf(viewer, "------------------------------------------------------------------------------------------------------------------------")); 1553 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, "-----------------------------")); 1554 #if defined(PETSC_HAVE_DEVICE) 1555 PetscCall(PetscViewerASCIIPrintf(viewer, "---------------------------------------")); 1556 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCall(PetscViewerASCIIPrintf(viewer, "-------------------")); 1557 #endif 1558 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1559 1560 #if defined(PETSC_HAVE_DEVICE) 1561 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1562 PetscCall(PetscLogStateGetEventFromName(state, "TaoSolve", &TAO_Solve)); 1563 PetscCall(PetscLogStateGetEventFromName(state, "TSStep", &TS_Step)); 1564 PetscCall(PetscLogStateGetEventFromName(state, "SNESSolve", &SNES_Solve)); 1565 PetscCall(PetscLogStateGetEventFromName(state, "KSPSolve", &KSP_Solve)); 1566 #endif 1567 1568 for (stage = 0; stage < numStages; stage++) { 1569 PetscInt stage_id; 1570 PetscEventPerfInfo *stage_info; 1571 const char *stage_name; 1572 1573 if (!(stageVisible[stage] && stageUsed[stage])) continue; 1574 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1575 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1576 PetscCall(PetscViewerASCIIPrintf(viewer, "\n--- Event Stage %d: %s\n\n", stage, stage_name)); 1577 stage_info = &zero_info; 1578 if (localStageUsed[stage]) { 1579 PetscStagePerf *stage_perf_info; 1580 1581 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_perf_info)); 1582 stage_info = &stage_perf_info->perfInfo; 1583 } 1584 PetscCallMPI(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1585 PetscCallMPI(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1586 PetscCallMPI(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1587 PetscCallMPI(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1588 PetscCallMPI(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1589 mess *= 0.5; 1590 messLen *= 0.5; 1591 red /= size; 1592 1593 for (event = 0; event < numEvents; event++) { 1594 PetscInt event_id; 1595 PetscEventPerfInfo *event_info = &zero_info; 1596 PetscBool is_zero = PETSC_FALSE; 1597 const char *event_name; 1598 1599 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1600 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1601 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &event_info)); 1602 PetscCall(PetscArraycmp(event_info, &zero_info, 1, &is_zero)); 1603 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPI_C_BOOL, MPI_LAND, comm)); 1604 if (!is_zero) { 1605 flopr = event_info->flops; 1606 PetscCallMPI(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1607 PetscCallMPI(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1608 PetscCallMPI(MPIU_Allreduce(&event_info->flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1609 PetscCallMPI(MPIU_Allreduce(&event_info->time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1610 PetscCallMPI(MPIU_Allreduce(&event_info->time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1611 PetscCallMPI(MPIU_Allreduce(&event_info->time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1612 PetscCallMPI(MPIU_Allreduce(&event_info->numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1613 PetscCallMPI(MPIU_Allreduce(&event_info->messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1614 PetscCallMPI(MPIU_Allreduce(&event_info->numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1615 PetscCallMPI(MPIU_Allreduce(&event_info->count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1616 PetscCallMPI(MPIU_Allreduce(&event_info->count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1617 if (PetscLogMemory) { 1618 PetscCallMPI(MPIU_Allreduce(&event_info->memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1619 PetscCallMPI(MPIU_Allreduce(&event_info->mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1620 PetscCallMPI(MPIU_Allreduce(&event_info->mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1621 PetscCallMPI(MPIU_Allreduce(&event_info->mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1622 } 1623 #if defined(PETSC_HAVE_DEVICE) 1624 PetscCallMPI(MPIU_Allreduce(&event_info->CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1625 PetscCallMPI(MPIU_Allreduce(&event_info->GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1626 PetscCallMPI(MPIU_Allreduce(&event_info->CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1627 PetscCallMPI(MPIU_Allreduce(&event_info->GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1628 PetscCallMPI(MPIU_Allreduce(&event_info->GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1629 PetscCallMPI(MPIU_Allreduce(&event_info->GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1630 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCallMPI(MPIU_Allreduce(&event_info->GpuEnergy, &gjoules, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1631 #endif 1632 if (mint < 0.0) { 1633 PetscCall(PetscViewerASCIIPrintf(viewer, "WARNING!!! Minimum time %g over all processes for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, event_name)); 1634 mint = 0; 1635 } 1636 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processes for %s is negative! Not possible!", minf, event_name); 1637 #if defined(PETSC_HAVE_DEVICE) && !defined(PETSC_HAVE_KOKKOS_WITHOUT_GPU) 1638 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1639 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1640 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 1641 if (event_id != SNES_Solve && event_id != KSP_Solve && event_id != TS_Step && event_id != TAO_Solve) { 1642 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 1643 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 1644 } 1645 } 1646 #endif 1647 totm *= 0.5; 1648 totml *= 0.5; 1649 totr /= size; 1650 1651 if (maxC != 0) { 1652 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 1653 else ratC = 0.0; 1654 if (mint != 0.0) ratt = maxt / mint; 1655 else ratt = 0.0; 1656 if (minf != 0.0) ratf = maxf / minf; 1657 else ratf = 0.0; 1658 if (TotalTime != 0.0) fracTime = tott / TotalTime; 1659 else fracTime = 0.0; 1660 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 1661 else fracFlops = 0.0; 1662 if (stageTime != 0.0) fracStageTime = tott / stageTime; 1663 else fracStageTime = 0.0; 1664 if (flops != 0.0) fracStageFlops = totf / flops; 1665 else fracStageFlops = 0.0; 1666 if (numMessages != 0.0) fracMess = totm / numMessages; 1667 else fracMess = 0.0; 1668 if (messageLength != 0.0) fracMessLen = totml / messageLength; 1669 else fracMessLen = 0.0; 1670 if (numReductions != 0.0) fracRed = totr / numReductions; 1671 else fracRed = 0.0; 1672 if (mess != 0.0) fracStageMess = totm / mess; 1673 else fracStageMess = 0.0; 1674 if (messLen != 0.0) fracStageMessLen = totml / messLen; 1675 else fracStageMessLen = 0.0; 1676 if (red != 0.0) fracStageRed = totr / red; 1677 else fracStageRed = 0.0; 1678 if (totm != 0.0) totml /= totm; 1679 else totml = 0.0; 1680 if (maxt != 0.0) flopr = totf / maxt; 1681 else flopr = 0.0; 1682 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) { 1683 if (PetscIsNanReal(maxt)) 1684 PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f n/a n/a %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages n/a", event_name, maxC, ratC, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed)); 1685 else 1686 PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 1687 } else { 1688 if (PetscIsNanReal(maxt)) { // when maxt, ratt, flopr are NaN (i.e., run with GPUs but without -log_view_gpu_time), replace the confusing "nan" with "n/a" 1689 PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f n/a n/a %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f n/a", event_name, maxC, ratC, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed)); 1690 } else { 1691 PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 1692 } 1693 } 1694 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 1695 #if defined(PETSC_HAVE_DEVICE) 1696 if (totf != 0.0) fracgflops = gflops / totf; 1697 else fracgflops = 0.0; 1698 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 1699 else gflopr = 0.0; 1700 if (PetscIsNanReal(gflopr)) { 1701 PetscCall(PetscViewerASCIIPrintf(viewer, " n/a %4.0f %3.2e %4.0f %3.2e %2.0f", cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1702 } else { 1703 PetscCall(PetscViewerASCIIPrintf(viewer, " %5.0f %4.0f %3.2e %4.0f %3.2e %2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1704 } 1705 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) { 1706 if (gjoules != 0.0) geff = gflops / gjoules; 1707 else geff = 0.0; 1708 PetscCall(PetscViewerASCIIPrintf(viewer, " %4.2e %4.2e", gjoules, geff)); 1709 } 1710 #endif 1711 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1712 } 1713 } 1714 } 1715 } 1716 1717 /* Memory usage and object creation */ 1718 PetscCall(PetscViewerASCIIPrintf(viewer, "------------------------------------------------------------------------------------------------------------------------")); 1719 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, "-----------------------------")); 1720 #if defined(PETSC_HAVE_DEVICE) 1721 PetscCall(PetscViewerASCIIPrintf(viewer, "---------------------------------------")); 1722 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCall(PetscViewerASCIIPrintf(viewer, "-------------------")); 1723 #endif 1724 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1725 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1726 1727 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1728 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1729 stats for stages local to processor sets. 1730 */ 1731 /* We should figure out the longest object name here (now 20 characters) */ 1732 PetscCall(PetscViewerASCIIPrintf(viewer, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1733 for (stage = 0; stage < numStages; stage++) { 1734 const char *stage_name; 1735 1736 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1737 PetscCall(PetscViewerASCIIPrintf(viewer, "\n--- Event Stage %d: %s\n\n", stage, stage_name)); 1738 if (localStageUsed[stage]) { 1739 PetscInt num_classes; 1740 1741 PetscCall(PetscLogStateGetNumClasses(state, &num_classes)); 1742 for (oclass = 0; oclass < num_classes; oclass++) { 1743 PetscClassPerf *class_perf_info; 1744 1745 PetscCall(PetscLogHandlerDefaultGetClassPerf(handler, stage, oclass, &class_perf_info)); 1746 if (class_perf_info->creations > 0 || class_perf_info->destructions > 0) { 1747 PetscLogClassInfo class_reg_info; 1748 PetscBool flg = PETSC_FALSE; 1749 1750 PetscCall(PetscLogStateClassGetInfo(state, oclass, &class_reg_info)); 1751 if (stage == 0 && oclass == num_classes - 1) { 1752 if (PetscGlobalArgc == 0 && PetscGlobalArgs == NULL) { 1753 PetscCall(PetscStrcmp(class_reg_info.name, "Viewer", &flg)); 1754 if (flg && class_perf_info->creations == PetscLogNumViewersCreated && class_perf_info->destructions == PetscLogNumViewersDestroyed) continue; 1755 } 1756 } 1757 PetscCall(PetscViewerASCIIPrintf(viewer, "%20s %5d %5d\n", class_reg_info.name, class_perf_info->creations, class_perf_info->destructions)); 1758 } 1759 } 1760 } 1761 } 1762 1763 PetscCall(PetscFree(localStageUsed)); 1764 PetscCall(PetscFree(stageUsed)); 1765 PetscCall(PetscFree(localStageVisible)); 1766 PetscCall(PetscFree(stageVisible)); 1767 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1768 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1769 1770 /* Information unrelated to this particular run */ 1771 PetscCall(PetscViewerASCIIPrintf(viewer, "========================================================================================================================\n")); 1772 PetscCall(PetscTime(&y)); 1773 PetscCall(PetscTime(&x)); 1774 PetscCall(PetscTime(&y)); 1775 PetscCall(PetscTime(&y)); 1776 PetscCall(PetscTime(&y)); 1777 PetscCall(PetscTime(&y)); 1778 PetscCall(PetscTime(&y)); 1779 PetscCall(PetscTime(&y)); 1780 PetscCall(PetscTime(&y)); 1781 PetscCall(PetscTime(&y)); 1782 PetscCall(PetscTime(&y)); 1783 PetscCall(PetscTime(&y)); 1784 PetscCall(PetscViewerASCIIPrintf(viewer, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 1785 /* MPI information */ 1786 if (size > 1) { 1787 MPI_Status status; 1788 PetscMPIInt tag; 1789 MPI_Comm newcomm; 1790 1791 PetscCallMPI(MPI_Barrier(comm)); 1792 PetscCall(PetscTime(&x)); 1793 PetscCallMPI(MPI_Barrier(comm)); 1794 PetscCallMPI(MPI_Barrier(comm)); 1795 PetscCallMPI(MPI_Barrier(comm)); 1796 PetscCallMPI(MPI_Barrier(comm)); 1797 PetscCallMPI(MPI_Barrier(comm)); 1798 PetscCall(PetscTime(&y)); 1799 PetscCall(PetscViewerASCIIPrintf(viewer, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 1800 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 1801 PetscCallMPI(MPI_Barrier(comm)); 1802 if (rank) { 1803 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 1804 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 1805 } else { 1806 PetscCall(PetscTime(&x)); 1807 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 1808 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 1809 PetscCall(PetscTime(&y)); 1810 PetscCall(PetscViewerASCIIPrintf(viewer, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 1811 } 1812 PetscCall(PetscCommDestroy(&newcomm)); 1813 } 1814 PetscCall(PetscOptionsView(NULL, viewer)); 1815 1816 /* Machine and compile information */ 1817 if (PetscDefined(USE_FORTRAN_KERNELS)) { 1818 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with FORTRAN kernels\n")); 1819 } else { 1820 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled without FORTRAN kernels\n")); 1821 } 1822 if (PetscDefined(USE_64BIT_INDICES)) { 1823 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with 64-bit PetscInt\n")); 1824 } else if (PetscDefined(USE___FLOAT128)) { 1825 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with 32-bit PetscInt\n")); 1826 } 1827 if (PetscDefined(USE_REAL_SINGLE)) { 1828 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with single precision PetscScalar and PetscReal\n")); 1829 } else if (PetscDefined(USE___FLOAT128)) { 1830 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 1831 } 1832 if (PetscDefined(USE_REAL_MAT_SINGLE)) { 1833 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with single precision matrices\n")); 1834 } else { 1835 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with full precision matrices (default)\n")); 1836 } 1837 PetscCall(PetscViewerASCIIPrintf(viewer, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 1838 1839 PetscCall(PetscViewerASCIIPrintf(viewer, "Configure options: %s", petscconfigureoptions)); 1840 PetscCall(PetscViewerASCIIPrintf(viewer, "%s", petscmachineinfo)); 1841 PetscCall(PetscViewerASCIIPrintf(viewer, "%s", petsccompilerinfo)); 1842 PetscCall(PetscViewerASCIIPrintf(viewer, "%s", petsccompilerflagsinfo)); 1843 PetscCall(PetscViewerASCIIPrintf(viewer, "%s", petsclinkerinfo)); 1844 1845 /* Cleanup */ 1846 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1847 PetscCall(PetscLogViewWarnNoGpuAwareMpi(viewer)); 1848 PetscCall(PetscLogViewWarnDebugging(viewer)); 1849 PetscCall(PetscFPTrapPop()); 1850 PetscFunctionReturn(PETSC_SUCCESS); 1851 } 1852 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END() 1853 1854 static PetscErrorCode PetscLogHandlerView_Default(PetscLogHandler handler, PetscViewer viewer) 1855 { 1856 PetscViewerFormat format; 1857 1858 PetscFunctionBegin; 1859 PetscCall(PetscViewerGetFormat(viewer, &format)); 1860 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 1861 PetscCall(PetscLogHandlerView_Default_Info(handler, viewer)); 1862 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1863 PetscCall(PetscLogHandlerView_Default_Detailed(handler, viewer)); 1864 } else if (format == PETSC_VIEWER_ASCII_CSV) { 1865 PetscCall(PetscLogHandlerView_Default_CSV(handler, viewer)); 1866 } 1867 PetscFunctionReturn(PETSC_SUCCESS); 1868 } 1869 1870 /*MC 1871 PETSCLOGHANDLERDEFAULT - PETSCLOGHANDLERDEFAULT = "default" - A `PetscLogHandler` that collects data for PETSc 1872 default profiling log viewers (`PetscLogView()` and `PetscLogDump()`). A log handler of this type is 1873 created and started (`PetscLogHandlerStart()`) by `PetscLogDefaultBegin()`. 1874 1875 Options Database Keys: 1876 + -log_include_actions - include a growing list of actions (event beginnings and endings, object creations and destructions) in `PetscLogDump()` (`PetscLogActions()`). 1877 - -log_include_objects - include a growing list of object creations and destructions in `PetscLogDump()` (`PetscLogObjects()`). 1878 1879 Level: developer 1880 1881 .seealso: [](ch_profiling), `PetscLogHandler` 1882 M*/ 1883 1884 PETSC_INTERN PetscErrorCode PetscLogHandlerCreate_Default(PetscLogHandler handler) 1885 { 1886 PetscFunctionBegin; 1887 PetscCall(PetscLogHandlerContextCreate_Default((PetscLogHandler_Default *)&handler->data)); 1888 handler->ops->destroy = PetscLogHandlerDestroy_Default; 1889 handler->ops->eventbegin = PetscLogHandlerEventBegin_Default; 1890 handler->ops->eventend = PetscLogHandlerEventEnd_Default; 1891 handler->ops->eventsync = PetscLogHandlerEventSync_Default; 1892 handler->ops->objectcreate = PetscLogHandlerObjectCreate_Default; 1893 handler->ops->objectdestroy = PetscLogHandlerObjectDestroy_Default; 1894 handler->ops->stagepush = PetscLogHandlerStagePush_Default; 1895 handler->ops->stagepop = PetscLogHandlerStagePop_Default; 1896 handler->ops->view = PetscLogHandlerView_Default; 1897 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerGetEventPerfInfo_C", PetscLogHandlerGetEventPerfInfo_Default)); 1898 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerGetStagePerfInfo_C", PetscLogHandlerGetStagePerfInfo_Default)); 1899 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerSetLogActions_C", PetscLogHandlerSetLogActions_Default)); 1900 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerSetLogObjects_C", PetscLogHandlerSetLogObjects_Default)); 1901 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerLogObjectState_C", PetscLogHandlerLogObjectState_Default)); 1902 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerGetNumObjects_C", PetscLogHandlerGetNumObjects_Default)); 1903 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventDeactivatePush_C", PetscLogHandlerEventDeactivatePush_Default)); 1904 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventDeactivatePop_C", PetscLogHandlerEventDeactivatePop_Default)); 1905 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventsPause_C", PetscLogHandlerEventsPause_Default)); 1906 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventsResume_C", PetscLogHandlerEventsResume_Default)); 1907 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerDump_C", PetscLogHandlerDump_Default)); 1908 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerStageSetVisible_C", PetscLogHandlerStageSetVisible_Default)); 1909 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerStageGetVisible_C", PetscLogHandlerStageGetVisible_Default)); 1910 PetscFunctionReturn(PETSC_SUCCESS); 1911 } 1912