1 #include <petscviewer.h> 2 #include <petscdevice.h> 3 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 4 #include <petsc/private/loghandlerimpl.h> 5 #include <petsc/private/deviceimpl.h> 6 #include <petscconfiginfo.h> 7 #include <petscmachineinfo.h> 8 #include "logdefault.h" 9 10 static PetscErrorCode PetscEventPerfInfoInit(PetscEventPerfInfo *eventInfo) 11 { 12 PetscFunctionBegin; 13 PetscCall(PetscMemzero(eventInfo, sizeof(*eventInfo))); 14 eventInfo->visible = PETSC_TRUE; 15 eventInfo->id = -1; 16 eventInfo->dof[0] = -1.0; 17 eventInfo->dof[1] = -1.0; 18 eventInfo->dof[2] = -1.0; 19 eventInfo->dof[3] = -1.0; 20 eventInfo->dof[4] = -1.0; 21 eventInfo->dof[5] = -1.0; 22 eventInfo->dof[6] = -1.0; 23 eventInfo->dof[7] = -1.0; 24 eventInfo->errors[0] = -1.0; 25 eventInfo->errors[1] = -1.0; 26 eventInfo->errors[2] = -1.0; 27 eventInfo->errors[3] = -1.0; 28 eventInfo->errors[4] = -1.0; 29 eventInfo->errors[5] = -1.0; 30 eventInfo->errors[6] = -1.0; 31 eventInfo->errors[7] = -1.0; 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 static PetscErrorCode PetscEventPerfInfoTic_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool resume) 36 { 37 PetscFunctionBegin; 38 if (resume) { 39 eventInfo->timeTmp -= time; 40 eventInfo->flopsTmp -= petsc_TotalFlops_th; 41 } else { 42 eventInfo->timeTmp = -time; 43 eventInfo->flopsTmp = -petsc_TotalFlops_th; 44 } 45 eventInfo->numMessages -= petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th; 46 eventInfo->messageLength -= petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len_th + petsc_send_len_th; 47 eventInfo->numReductions -= petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th; 48 #if defined(PETSC_HAVE_DEVICE) 49 eventInfo->CpuToGpuCount -= petsc_ctog_ct_th; 50 eventInfo->GpuToCpuCount -= petsc_gtoc_ct_th; 51 eventInfo->CpuToGpuSize -= petsc_ctog_sz_th; 52 eventInfo->GpuToCpuSize -= petsc_gtoc_sz_th; 53 eventInfo->GpuFlops -= petsc_gflops_th; 54 eventInfo->GpuTime -= petsc_gtime; 55 if (PetscLogGpuEnergyFlag) eventInfo->GpuEnergy -= petsc_genergy; 56 if (PetscLogGpuEnergyMeterFlag) eventInfo->GpuEnergy -= petsc_genergy_meter; 57 #endif 58 if (logMemory) { 59 PetscLogDouble usage; 60 PetscCall(PetscMemoryGetCurrentUsage(&usage)); 61 eventInfo->memIncrease -= usage; 62 PetscCall(PetscMallocGetCurrentUsage(&usage)); 63 eventInfo->mallocSpace -= usage; 64 PetscCall(PetscMallocGetMaximumUsage(&usage)); 65 eventInfo->mallocIncrease -= usage; 66 PetscCall(PetscMallocPushMaximumUsage(event)); 67 } 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode PetscEventPerfInfoTic(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 72 { 73 PetscFunctionBegin; 74 PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 75 PetscFunctionReturn(PETSC_SUCCESS); 76 } 77 78 static PetscErrorCode PetscEventPerfInfoResume(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 79 { 80 PetscFunctionBegin; 81 PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_TRUE)); 82 PetscFunctionReturn(PETSC_SUCCESS); 83 } 84 85 static PetscErrorCode PetscEventPerfInfoToc_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool pause) 86 { 87 PetscFunctionBegin; 88 eventInfo->timeTmp += time; 89 eventInfo->flopsTmp += petsc_TotalFlops_th; 90 if (!pause) { 91 eventInfo->time += eventInfo->timeTmp; 92 eventInfo->time2 += eventInfo->timeTmp * eventInfo->timeTmp; 93 eventInfo->flops += eventInfo->flopsTmp; 94 eventInfo->flops2 += eventInfo->flopsTmp * eventInfo->flopsTmp; 95 } 96 eventInfo->numMessages += petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th; 97 eventInfo->messageLength += petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len + petsc_send_len_th; 98 eventInfo->numReductions += petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th; 99 #if defined(PETSC_HAVE_DEVICE) 100 eventInfo->CpuToGpuCount += petsc_ctog_ct_th; 101 eventInfo->GpuToCpuCount += petsc_gtoc_ct_th; 102 eventInfo->CpuToGpuSize += petsc_ctog_sz_th; 103 eventInfo->GpuToCpuSize += petsc_gtoc_sz_th; 104 eventInfo->GpuFlops += petsc_gflops_th; 105 eventInfo->GpuTime += petsc_gtime; 106 if (PetscLogGpuEnergyFlag) eventInfo->GpuEnergy += petsc_genergy; 107 if (PetscLogGpuEnergyMeterFlag) eventInfo->GpuEnergy += petsc_genergy_meter; 108 #endif 109 if (logMemory) { 110 PetscLogDouble usage, musage; 111 PetscCall(PetscMemoryGetCurrentUsage(&usage)); /* the comments below match the column labels printed in PetscLogView_Default() */ 112 eventInfo->memIncrease += usage; /* RMI */ 113 PetscCall(PetscMallocGetCurrentUsage(&usage)); 114 eventInfo->mallocSpace += usage; /* Malloc */ 115 PetscCall(PetscMallocPopMaximumUsage(event, &musage)); 116 eventInfo->mallocIncreaseEvent = PetscMax(musage - usage, eventInfo->mallocIncreaseEvent); /* EMalloc */ 117 PetscCall(PetscMallocGetMaximumUsage(&usage)); 118 eventInfo->mallocIncrease += usage; /* MMalloc */ 119 } 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode PetscEventPerfInfoToc(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 124 { 125 PetscFunctionBegin; 126 PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 static PetscErrorCode PetscEventPerfInfoPause(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 131 { 132 PetscFunctionBegin; 133 PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_TRUE)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 static PetscErrorCode PetscEventPerfInfoAdd_Internal(const PetscEventPerfInfo *eventInfo, PetscEventPerfInfo *outInfo) 138 { 139 PetscFunctionBegin; 140 outInfo->count += eventInfo->count; 141 outInfo->time += eventInfo->time; 142 outInfo->time2 += eventInfo->time2; 143 outInfo->flops += eventInfo->flops; 144 outInfo->flops2 += eventInfo->flops2; 145 outInfo->numMessages += eventInfo->numMessages; 146 outInfo->messageLength += eventInfo->messageLength; 147 outInfo->numReductions += eventInfo->numReductions; 148 #if defined(PETSC_HAVE_DEVICE) 149 outInfo->CpuToGpuCount += eventInfo->CpuToGpuCount; 150 outInfo->GpuToCpuCount += eventInfo->GpuToCpuCount; 151 outInfo->CpuToGpuSize += eventInfo->CpuToGpuSize; 152 outInfo->GpuToCpuSize += eventInfo->GpuToCpuSize; 153 outInfo->GpuFlops += eventInfo->GpuFlops; 154 outInfo->GpuTime += eventInfo->GpuTime; 155 outInfo->GpuEnergy += eventInfo->GpuEnergy; 156 #endif 157 outInfo->memIncrease += eventInfo->memIncrease; 158 outInfo->mallocSpace += eventInfo->mallocSpace; 159 outInfo->mallocIncreaseEvent += eventInfo->mallocIncreaseEvent; 160 outInfo->mallocIncrease += eventInfo->mallocIncrease; 161 PetscFunctionReturn(PETSC_SUCCESS); 162 } 163 164 PETSC_LOG_RESIZABLE_ARRAY(EventPerfArray, PetscEventPerfInfo, PetscLogEvent, PetscEventPerfInfoInit, NULL, NULL) 165 166 /* --- PetscClassPerf --- */ 167 168 typedef struct { 169 int creations; /* The number of objects of this class created */ 170 int destructions; /* The number of objects of this class destroyed */ 171 PetscLogDouble mem; /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */ 172 PetscLogDouble descMem; /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */ 173 } PetscClassPerf; 174 175 static PetscErrorCode PetscClassPerfInit(PetscClassPerf *classInfo) 176 { 177 PetscFunctionBegin; 178 PetscCall(PetscMemzero(classInfo, sizeof(*classInfo))); 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181 182 PETSC_LOG_RESIZABLE_ARRAY(ClassPerfArray, PetscClassPerf, PetscLogClass, PetscClassPerfInit, NULL, NULL) 183 184 /* --- PetscStagePerf --- */ 185 186 typedef struct _PetscStagePerf { 187 PetscBool used; /* The stage was pushed on this processor */ 188 PetscEventPerfInfo perfInfo; /* The stage performance information */ 189 PetscLogEventPerfArray eventLog; /* The event information for this stage */ 190 PetscLogClassPerfArray classLog; /* The class information for this stage */ 191 } PetscStagePerf; 192 193 static PetscErrorCode PetscStageInfoInit(PetscStagePerf *stageInfo) 194 { 195 PetscFunctionBegin; 196 PetscCall(PetscMemzero(stageInfo, sizeof(*stageInfo))); 197 PetscCall(PetscLogEventPerfArrayCreate(128, &stageInfo->eventLog)); 198 PetscCall(PetscLogClassPerfArrayCreate(128, &stageInfo->classLog)); 199 PetscCall(PetscEventPerfInfoInit(&stageInfo->perfInfo)); 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode PetscStageInfoReset(PetscStagePerf *stageInfo) 204 { 205 PetscFunctionBegin; 206 PetscCall(PetscLogEventPerfArrayDestroy(&stageInfo->eventLog)); 207 PetscCall(PetscLogClassPerfArrayDestroy(&stageInfo->classLog)); 208 PetscFunctionReturn(PETSC_SUCCESS); 209 } 210 211 PETSC_LOG_RESIZABLE_ARRAY(StageInfoArray, PetscStagePerf, PetscLogStage, PetscStageInfoInit, PetscStageInfoReset, NULL) 212 213 /* --- Action --- */ 214 215 /* The structure for action logging */ 216 typedef enum { 217 PETSC_LOG_ACTION_CREATE, 218 PETSC_LOG_ACTION_DESTROY, 219 PETSC_LOG_ACTION_BEGIN, 220 PETSC_LOG_ACTION_END, 221 } PetscLogActionType; 222 223 typedef struct _Action { 224 PetscLogActionType action; /* The type of execution */ 225 PetscLogEvent event; /* The event number */ 226 PetscClassId classid; /* The event class id */ 227 PetscLogDouble time; /* The time of occurrence */ 228 PetscLogDouble flops; /* The cumulative flops */ 229 PetscLogDouble mem; /* The current memory usage */ 230 PetscLogDouble maxmem; /* The maximum memory usage */ 231 PetscObjectId id1, id2, id3; /* The ids of associated objects */ 232 } Action; 233 234 PETSC_LOG_RESIZABLE_ARRAY(ActionArray, Action, PetscLogEvent, NULL, NULL, NULL) 235 236 /* --- Object --- */ 237 238 /* The structure for object logging */ 239 typedef struct _Object { 240 PetscObject obj; /* The associated PetscObject */ 241 int parent; /* The parent id */ 242 PetscLogDouble mem; /* The memory associated with the object */ 243 char name[64]; /* The object name */ 244 char info[64]; /* The information string */ 245 } Object; 246 247 PETSC_LOG_RESIZABLE_ARRAY(ObjectArray, Object, PetscObject, NULL, NULL, NULL) 248 249 /* Map from (threadid,stage,event) to perfInfo data struct */ 250 #include <petsc/private/hashmapijk.h> 251 252 PETSC_HASH_MAP(HMapEvent, PetscHashIJKKey, PetscEventPerfInfo *, PetscHashIJKKeyHash, PetscHashIJKKeyEqual, NULL) 253 254 typedef struct _n_PetscLogHandler_Default *PetscLogHandler_Default; 255 struct _n_PetscLogHandler_Default { 256 PetscLogStageInfoArray stages; 257 PetscSpinlock lock; 258 PetscLogActionArray petsc_actions; 259 PetscLogObjectArray petsc_objects; 260 PetscBool petsc_logActions; 261 PetscBool petsc_logObjects; 262 int petsc_numObjectsCreated; 263 int petsc_numObjectsDestroyed; 264 PetscHMapEvent eventInfoMap_th; 265 int pause_depth; 266 PetscBool use_threadsafe; 267 }; 268 269 /* --- PetscLogHandler_Default --- */ 270 271 static PetscErrorCode PetscLogHandlerContextCreate_Default(PetscLogHandler_Default *def_p) 272 { 273 PetscLogHandler_Default def; 274 275 PetscFunctionBegin; 276 PetscCall(PetscNew(def_p)); 277 def = *def_p; 278 PetscCall(PetscLogStageInfoArrayCreate(8, &def->stages)); 279 PetscCall(PetscLogActionArrayCreate(64, &def->petsc_actions)); 280 PetscCall(PetscLogObjectArrayCreate(64, &def->petsc_objects)); 281 PetscCall(PetscSpinlockCreate(&def->lock)); 282 283 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_actions", &def->petsc_logActions, NULL)); 284 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_objects", &def->petsc_logObjects, NULL)); 285 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_handler_default_use_threadsafe_events", &def->use_threadsafe, NULL)); 286 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) PetscCall(PetscHMapEventCreate(&def->eventInfoMap_th)); 287 PetscFunctionReturn(PETSC_SUCCESS); 288 } 289 290 static PetscErrorCode PetscLogHandlerDestroy_Default(PetscLogHandler h) 291 { 292 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 293 294 PetscFunctionBegin; 295 PetscCall(PetscLogStageInfoArrayDestroy(&def->stages)); 296 PetscCall(PetscLogActionArrayDestroy(&def->petsc_actions)); 297 PetscCall(PetscLogObjectArrayDestroy(&def->petsc_objects)); 298 PetscCall(PetscSpinlockDestroy(&def->lock)); 299 if (def->eventInfoMap_th) { 300 PetscEventPerfInfo **array; 301 PetscInt n, off = 0; 302 303 PetscCall(PetscHMapEventGetSize(def->eventInfoMap_th, &n)); 304 PetscCall(PetscMalloc1(n, &array)); 305 PetscCall(PetscHMapEventGetVals(def->eventInfoMap_th, &off, array)); 306 for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i])); 307 PetscCall(PetscFree(array)); 308 PetscCall(PetscHMapEventDestroy(&def->eventInfoMap_th)); 309 } 310 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerGetEventPerfInfo_C", NULL)); 311 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerGetStagePerfInfo_C", NULL)); 312 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerSetLogActions_C", NULL)); 313 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerSetLogObjects_C", NULL)); 314 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerLogObjectState_C", NULL)); 315 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerGetNumObjects_C", NULL)); 316 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventDeactivatePush_C", NULL)); 317 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventDeactivatePop_C", NULL)); 318 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventsPause_C", NULL)); 319 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerEventsResume_C", NULL)); 320 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerDump_C", NULL)); 321 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerStageSetVisible_C", NULL)); 322 PetscCall(PetscObjectComposeFunction((PetscObject)h, "PetscLogHandlerStageGetVisible_C", NULL)); 323 PetscCall(PetscFree(def)); 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode PetscLogHandlerDefaultGetStageInfo(PetscLogHandler handler, PetscLogStage stage, PetscStagePerf **stage_info_p) 328 { 329 PetscStagePerf *stage_info = NULL; 330 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 331 332 PetscFunctionBegin; 333 PetscCall(PetscLogStageInfoArrayResize(def->stages, stage + 1)); 334 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 335 *stage_info_p = stage_info; 336 PetscFunctionReturn(PETSC_SUCCESS); 337 } 338 339 static PetscErrorCode PetscLogHandlerGetEventPerfInfo_Default(PetscLogHandler handler, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **event_info_p) 340 { 341 PetscEventPerfInfo *event_info = NULL; 342 PetscStagePerf *stage_info = NULL; 343 PetscLogEventPerfArray event_log; 344 345 PetscFunctionBegin; 346 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(handler->state, &stage)); 347 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 348 event_log = stage_info->eventLog; 349 PetscCall(PetscLogEventPerfArrayResize(event_log, event + 1)); 350 PetscCall(PetscLogEventPerfArrayGetRef(event_log, event, &event_info)); 351 event_info->id = event; 352 *event_info_p = event_info; 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 static PetscErrorCode PetscLogHandlerGetStagePerfInfo_Default(PetscLogHandler handler, PetscLogStage stage, PetscEventPerfInfo **stage_info_p) 357 { 358 PetscStagePerf *stage_perf_info = NULL; 359 360 PetscFunctionBegin; 361 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(handler->state, &stage)); 362 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_perf_info)); 363 *stage_info_p = &stage_perf_info->perfInfo; 364 PetscFunctionReturn(PETSC_SUCCESS); 365 } 366 367 static PetscErrorCode PetscLogHandlerDefaultGetClassPerf(PetscLogHandler handler, PetscLogStage stage, PetscLogClass clss, PetscClassPerf **class_info) 368 { 369 PetscLogClassPerfArray class_log; 370 PetscStagePerf *stage_info; 371 372 PetscFunctionBegin; 373 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 374 class_log = stage_info->classLog; 375 PetscCall(PetscLogClassPerfArrayResize(class_log, clss + 1)); 376 PetscCall(PetscLogClassPerfArrayGetRef(class_log, clss, class_info)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 static PetscErrorCode PetscLogHandlerObjectCreate_Default(PetscLogHandler h, PetscObject obj) 381 { 382 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 383 PetscLogState state; 384 PetscLogStage stage; 385 PetscClassPerf *classInfo; 386 int oclass = 0; 387 388 PetscFunctionBegin; 389 PetscCall(PetscLogHandlerGetState(h, &state)); 390 PetscCall(PetscSpinlockLock(&def->lock)); 391 /* Record stage info */ 392 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 393 PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass)); 394 PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo)); 395 classInfo->creations++; 396 /* Record the creation action */ 397 if (def->petsc_logActions) { 398 Action new_action; 399 400 PetscCall(PetscTime(&new_action.time)); 401 new_action.time -= petsc_BaseTime; 402 new_action.action = PETSC_LOG_ACTION_CREATE; 403 new_action.event = -1; 404 new_action.classid = obj->classid; 405 new_action.id1 = obj->id; 406 new_action.id2 = -1; 407 new_action.id3 = -1; 408 new_action.flops = petsc_TotalFlops; 409 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 410 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 411 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 412 } 413 /* We don't just use obj->id to count all objects that are created 414 because PetscLogHandlers are objects and PetscLogObjectDestroy() will not 415 be called for them: the number of objects created and destroyed as counted 416 here and below would have an imbalance */ 417 def->petsc_numObjectsCreated++; 418 /* Record the object */ 419 if (def->petsc_logObjects) { 420 Object new_object; 421 PetscInt objid; 422 423 new_object.parent = -1; 424 new_object.obj = obj; 425 new_object.mem = 0; 426 427 PetscCall(PetscMemzero(new_object.name, sizeof(new_object.name))); 428 PetscCall(PetscMemzero(new_object.info, sizeof(new_object.info))); 429 PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1"); 430 PetscCall(PetscIntCast(obj->id, &objid)); 431 PetscCall(PetscLogObjectArrayResize(def->petsc_objects, objid)); 432 PetscCall(PetscLogObjectArraySet(def->petsc_objects, objid - 1, new_object)); 433 } 434 PetscCall(PetscSpinlockUnlock(&def->lock)); 435 PetscFunctionReturn(PETSC_SUCCESS); 436 } 437 438 static PetscErrorCode PetscLogHandlerObjectDestroy_Default(PetscLogHandler h, PetscObject obj) 439 { 440 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 441 PetscLogState state; 442 PetscLogStage stage; 443 PetscClassPerf *classInfo; 444 int oclass = 0; 445 446 PetscFunctionBegin; 447 PetscCall(PetscLogHandlerGetState(h, &state)); 448 /* Record stage info */ 449 PetscCall(PetscSpinlockLock(&def->lock)); 450 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 451 if (stage >= 0) { 452 /* stage < 0 can happen if the log summary is output before some things are destroyed */ 453 PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass)); 454 PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo)); 455 classInfo->destructions++; 456 } 457 /* Cannot Credit all ancestors with your memory because they may have already been destroyed*/ 458 def->petsc_numObjectsDestroyed++; 459 /* Dynamically enlarge logging structures */ 460 /* Record the destruction action */ 461 if (def->petsc_logActions) { 462 Action new_action; 463 464 PetscCall(PetscTime(&new_action.time)); 465 new_action.time -= petsc_BaseTime; 466 new_action.event = -1; 467 new_action.action = PETSC_LOG_ACTION_DESTROY; 468 new_action.classid = obj->classid; 469 new_action.id1 = obj->id; 470 new_action.id2 = -1; 471 new_action.id3 = -1; 472 new_action.flops = petsc_TotalFlops; 473 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 474 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 475 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 476 } 477 if (def->petsc_logObjects) { 478 Object *obj_entry = NULL; 479 PetscInt objid; 480 481 PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1"); 482 PetscCall(PetscIntCast(obj->id - 1, &objid)); 483 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, objid, &obj_entry)); 484 if (obj->name) PetscCall(PetscStrncpy(obj_entry->name, obj->name, 64)); 485 obj_entry->obj = NULL; 486 } 487 PetscCall(PetscSpinlockUnlock(&def->lock)); 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 static PetscErrorCode PetscLogHandlerEventSync_Default(PetscLogHandler h, PetscLogEvent event, MPI_Comm comm) 492 { 493 PetscLogState state; 494 PetscLogEventInfo event_info; 495 PetscEventPerfInfo *event_perf_info; 496 int stage; 497 PetscLogDouble time = 0.0; 498 499 PetscFunctionBegin; 500 if (!PetscLogSyncOn || comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS); 501 PetscCall(PetscLogHandlerGetState(h, &state)); 502 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 503 if (!event_info.collective) PetscFunctionReturn(PETSC_SUCCESS); 504 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 505 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 506 if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 507 508 PetscCall(PetscTimeSubtract(&time)); 509 PetscCallMPI(MPI_Barrier(comm)); 510 PetscCall(PetscTimeAdd(&time)); 511 event_perf_info->syncTime += time; 512 PetscFunctionReturn(PETSC_SUCCESS); 513 } 514 515 static PetscErrorCode PetscLogGetStageEventPerfInfo_threaded(PetscLogHandler_Default def, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **eventInfo) 516 { 517 PetscEventPerfInfo *leventInfo = NULL; 518 PetscHashIJKKey key; 519 520 PetscFunctionBegin; 521 #if PetscDefined(HAVE_THREADSAFETY) 522 key.i = PetscLogGetTid(); 523 #else 524 key.i = 0; 525 #endif 526 key.j = stage; 527 key.k = event; 528 PetscCall(PetscSpinlockLock(&def->lock)); 529 PetscCall(PetscHMapEventGet(def->eventInfoMap_th, key, &leventInfo)); 530 if (!leventInfo) { 531 PetscCall(PetscNew(&leventInfo)); 532 leventInfo->id = event; 533 PetscCall(PetscHMapEventSet(def->eventInfoMap_th, key, leventInfo)); 534 } 535 PetscCall(PetscSpinlockUnlock(&def->lock)); 536 *eventInfo = leventInfo; 537 PetscFunctionReturn(PETSC_SUCCESS); 538 } 539 540 static PetscErrorCode PetscLogHandlerEventBegin_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 541 { 542 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 543 PetscEventPerfInfo *event_perf_info = NULL; 544 PetscLogEventInfo event_info; 545 PetscLogDouble time; 546 PetscLogState state; 547 PetscLogStage stage; 548 549 PetscFunctionBegin; 550 PetscCall(PetscLogHandlerGetState(h, &state)); 551 if (PetscDefined(USE_DEBUG)) { 552 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 553 if (PetscUnlikely(o1)) PetscValidHeader(o1, 3); 554 if (PetscUnlikely(o2)) PetscValidHeader(o2, 4); 555 if (PetscUnlikely(o3)) PetscValidHeader(o3, 5); 556 if (PetscUnlikely(o4)) PetscValidHeader(o4, 6); 557 if (event_info.collective && o1) { 558 PetscInt64 b1[2], b2[2]; 559 560 b1[0] = -o1->cidx; 561 b1[1] = o1->cidx; 562 PetscCallMPI(MPIU_Allreduce(b1, b2, 2, MPIU_INT64, MPI_MAX, PetscObjectComm(o1))); 563 PetscCheck(-b2[0] == b2[1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Collective event %s not called collectively %" PetscInt64_FMT " != %" PetscInt64_FMT, event_info.name, -b2[0], b2[1]); 564 } 565 } 566 /* Synchronization */ 567 PetscCall(PetscLogHandlerEventSync_Default(h, event, PetscObjectComm(o1))); 568 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 569 if (def->pause_depth > 0) stage = 0; // in pause-mode, all events run on the main stage 570 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) { 571 PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info)); 572 if (event_perf_info->depth == 0) PetscCall(PetscEventPerfInfoInit(event_perf_info)); 573 } else { 574 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 575 } 576 PetscCheck(event_perf_info->depth >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to begin a paused event, this is not allowed"); 577 event_perf_info->depth++; 578 /* Check for double counting */ 579 if (event_perf_info->depth > 1) PetscFunctionReturn(PETSC_SUCCESS); 580 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 581 /* Log the performance info */ 582 event_perf_info->count++; 583 PetscCall(PetscTime(&time)); 584 PetscCall(PetscEventPerfInfoTic(event_perf_info, time, PetscLogMemory, event)); 585 if (def->petsc_logActions) { 586 PetscLogDouble curTime; 587 Action new_action; 588 589 PetscCall(PetscTime(&curTime)); 590 new_action.time = curTime - petsc_BaseTime; 591 new_action.action = PETSC_LOG_ACTION_BEGIN; 592 new_action.event = event; 593 new_action.classid = event_info.classid; 594 new_action.id1 = o1 ? o1->id : -1; 595 new_action.id2 = o2 ? o2->id : -1; 596 new_action.id3 = o3 ? o3->id : -1; 597 new_action.flops = petsc_TotalFlops; 598 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 599 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 600 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 601 } 602 PetscFunctionReturn(PETSC_SUCCESS); 603 } 604 605 static PetscErrorCode PetscLogHandlerEventEnd_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 606 { 607 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 608 PetscEventPerfInfo *event_perf_info = NULL; 609 PetscLogDouble time; 610 PetscLogState state; 611 int stage; 612 PetscLogEventInfo event_info; 613 614 PetscFunctionBegin; 615 PetscCall(PetscLogHandlerGetState(h, &state)); 616 if (PetscDefined(USE_DEBUG)) { 617 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 618 if (PetscUnlikely(o1)) PetscValidHeader(o1, 3); 619 if (PetscUnlikely(o2)) PetscValidHeader(o2, 4); 620 if (PetscUnlikely(o3)) PetscValidHeader(o3, 5); 621 if (PetscUnlikely(o4)) PetscValidHeader(o4, 6); 622 if (event_info.collective && o1) { 623 PetscInt64 b1[2], b2[2]; 624 625 b1[0] = -o1->cidx; 626 b1[1] = o1->cidx; 627 PetscCallMPI(MPIU_Allreduce(b1, b2, 2, MPIU_INT64, MPI_MAX, PetscObjectComm(o1))); 628 PetscCheck(-b2[0] == b2[1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Collective event %s not called collectively %" PetscInt64_FMT " != %" PetscInt64_FMT, event_info.name, -b2[0], b2[1]); 629 } 630 } 631 if (def->petsc_logActions) { 632 PetscLogDouble curTime; 633 Action new_action; 634 635 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 636 PetscCall(PetscTime(&curTime)); 637 new_action.time = curTime - petsc_BaseTime; 638 new_action.action = PETSC_LOG_ACTION_END; 639 new_action.event = event; 640 new_action.classid = event_info.classid; 641 new_action.id1 = o1 ? o1->id : -1; 642 new_action.id2 = o2 ? o2->id : -2; 643 new_action.id3 = o3 ? o3->id : -3; 644 new_action.flops = petsc_TotalFlops; 645 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 646 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 647 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 648 } 649 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 650 if (def->pause_depth > 0) stage = 0; // all events run on the main stage in pause-mode 651 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) { 652 PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info)); 653 } else { 654 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 655 } 656 PetscCheck(event_perf_info->depth > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to end paused event, not allowed"); 657 event_perf_info->depth--; 658 /* Check for double counting */ 659 if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 660 else PetscCheck(event_perf_info->depth == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Logging event had unbalanced begin/end pairs"); 661 662 /* Log performance info */ 663 PetscCall(PetscTime(&time)); 664 PetscCall(PetscEventPerfInfoToc(event_perf_info, time, PetscLogMemory, event)); 665 if (PetscDefined(HAVE_THREADSAFETY) || def->use_threadsafe) { 666 PetscEventPerfInfo *event_perf_info_global; 667 PetscCall(PetscSpinlockLock(&def->lock)); 668 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info_global)); 669 PetscCall(PetscEventPerfInfoAdd_Internal(event_perf_info, event_perf_info_global)); 670 PetscCall(PetscSpinlockUnlock(&def->lock)); 671 } 672 PetscFunctionReturn(PETSC_SUCCESS); 673 } 674 675 static PetscErrorCode PetscLogHandlerEventDeactivatePush_Default(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event) 676 { 677 PetscEventPerfInfo *event_perf_info; 678 679 PetscFunctionBegin; 680 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 681 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 682 event_perf_info->depth++; 683 PetscFunctionReturn(PETSC_SUCCESS); 684 } 685 686 static PetscErrorCode PetscLogHandlerEventDeactivatePop_Default(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event) 687 { 688 PetscEventPerfInfo *event_perf_info; 689 690 PetscFunctionBegin; 691 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 692 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(h, stage, event, &event_perf_info)); 693 event_perf_info->depth--; 694 PetscFunctionReturn(PETSC_SUCCESS); 695 } 696 697 static PetscErrorCode PetscLogHandlerEventsPause_Default(PetscLogHandler h) 698 { 699 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 700 PetscLogDouble time; 701 PetscInt num_stages; 702 703 PetscFunctionBegin; 704 if (def->pause_depth++ > 0) PetscFunctionReturn(PETSC_SUCCESS); 705 PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL)); 706 PetscCall(PetscTime(&time)); 707 /* Pause stages in reverse of the order they were pushed */ 708 for (PetscInt stage = num_stages - 1; stage >= 0; stage--) { 709 PetscStagePerf *stage_info = NULL; 710 PetscInt num_events; 711 712 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 713 PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL)); 714 /* Pause events in reverse of the order they were pushed */ 715 for (PetscInt event = num_events - 1; event >= 0; event--) { 716 PetscEventPerfInfo *event_info = NULL; 717 PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info)); 718 if (event_info->depth > 0) { 719 event_info->depth *= -1; 720 PetscCall(PetscEventPerfInfoPause(event_info, time, PetscLogMemory, event)); 721 } 722 } 723 if (stage > 0 && stage_info->perfInfo.depth > 0) { 724 stage_info->perfInfo.depth *= -1; 725 PetscCall(PetscEventPerfInfoPause(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2))); 726 } 727 } 728 PetscFunctionReturn(PETSC_SUCCESS); 729 } 730 731 static PetscErrorCode PetscLogHandlerEventsResume_Default(PetscLogHandler h) 732 { 733 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 734 PetscLogDouble time; 735 PetscInt num_stages; 736 737 PetscFunctionBegin; 738 if (--def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 739 PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL)); 740 PetscCall(PetscTime(&time)); 741 /* Unpause stages in the same order they were pushed */ 742 for (PetscInt stage = 0; stage < num_stages; stage++) { 743 PetscStagePerf *stage_info = NULL; 744 PetscInt num_events; 745 746 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 747 PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL)); 748 /* Unpause events in the same order they were pushed */ 749 for (PetscInt event = 0; event < num_events; event++) { 750 PetscEventPerfInfo *event_info = NULL; 751 PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info)); 752 if (event_info->depth < 0) { 753 event_info->depth *= -1; 754 PetscCall(PetscEventPerfInfoResume(event_info, time, PetscLogMemory, event)); 755 } 756 } 757 if (stage > 0 && stage_info->perfInfo.depth < 0) { 758 stage_info->perfInfo.depth *= -1; 759 PetscCall(PetscEventPerfInfoResume(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2))); 760 } 761 } 762 PetscFunctionReturn(PETSC_SUCCESS); 763 } 764 765 static PetscErrorCode PetscLogHandlerStagePush_Default(PetscLogHandler h, PetscLogStage new_stage) 766 { 767 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 768 PetscLogDouble time; 769 PetscLogState state; 770 PetscLogStage current_stage; 771 PetscStagePerf *new_stage_info; 772 773 PetscFunctionBegin; 774 if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 775 PetscCall(PetscLogHandlerGetState(h, &state)); 776 current_stage = state->current_stage; 777 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, new_stage, &new_stage_info)); 778 PetscCall(PetscTime(&time)); 779 780 /* Record flops/time of previous stage */ 781 if (current_stage >= 0) { 782 if (PetscBTLookup(state->active, current_stage)) { 783 PetscStagePerf *current_stage_info; 784 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, ¤t_stage_info)); 785 PetscCall(PetscEventPerfInfoToc(¤t_stage_info->perfInfo, time, PetscLogMemory, -(current_stage + 2))); 786 } 787 } 788 new_stage_info->used = PETSC_TRUE; 789 new_stage_info->perfInfo.count++; 790 new_stage_info->perfInfo.depth++; 791 /* Subtract current quantities so that we obtain the difference when we pop */ 792 if (PetscBTLookup(state->active, new_stage)) PetscCall(PetscEventPerfInfoTic(&new_stage_info->perfInfo, time, PetscLogMemory, -(new_stage + 2))); 793 PetscFunctionReturn(PETSC_SUCCESS); 794 } 795 796 static PetscErrorCode PetscLogHandlerStagePop_Default(PetscLogHandler h, PetscLogStage old_stage) 797 { 798 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 799 PetscLogStage current_stage; 800 PetscStagePerf *old_stage_info; 801 PetscLogState state; 802 PetscLogDouble time; 803 804 PetscFunctionBegin; 805 if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 806 PetscCall(PetscLogHandlerGetState(h, &state)); 807 current_stage = state->current_stage; 808 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, old_stage, &old_stage_info)); 809 PetscCall(PetscTime(&time)); 810 old_stage_info->perfInfo.depth--; 811 if (PetscBTLookup(state->active, old_stage)) PetscCall(PetscEventPerfInfoToc(&old_stage_info->perfInfo, time, PetscLogMemory, -(old_stage + 2))); 812 if (current_stage >= 0) { 813 if (PetscBTLookup(state->active, current_stage)) { 814 PetscStagePerf *current_stage_info; 815 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, ¤t_stage_info)); 816 PetscCall(PetscEventPerfInfoTic(¤t_stage_info->perfInfo, time, PetscLogMemory, -(current_stage + 2))); 817 } 818 } 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 static PetscErrorCode PetscLogHandlerStageSetVisible_Default(PetscLogHandler h, PetscLogStage stage, PetscBool is_visible) 823 { 824 PetscStagePerf *stage_info; 825 826 PetscFunctionBegin; 827 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 828 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info)); 829 stage_info->perfInfo.visible = is_visible; 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode PetscLogHandlerStageGetVisible_Default(PetscLogHandler h, PetscLogStage stage, PetscBool *is_visible) 834 { 835 PetscStagePerf *stage_info; 836 837 PetscFunctionBegin; 838 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 839 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info)); 840 *is_visible = stage_info->perfInfo.visible; 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode PetscLogHandlerSetLogActions_Default(PetscLogHandler handler, PetscBool flag) 845 { 846 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 847 848 PetscFunctionBegin; 849 def->petsc_logActions = flag; 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode PetscLogHandlerSetLogObjects_Default(PetscLogHandler handler, PetscBool flag) 854 { 855 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 856 857 PetscFunctionBegin; 858 def->petsc_logObjects = flag; 859 PetscFunctionReturn(PETSC_SUCCESS); 860 } 861 862 static PetscErrorCode PetscLogHandlerLogObjectState_Default(PetscLogHandler handler, PetscObject obj, const char format[], va_list Argp) 863 { 864 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 865 size_t fullLength; 866 867 PetscFunctionBegin; 868 if (def->petsc_logObjects) { 869 Object *obj_entry = NULL; 870 PetscInt objid; 871 872 PetscCall(PetscIntCast(obj->id - 1, &objid)); 873 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, objid, &obj_entry)); 874 PetscCall(PetscVSNPrintf(obj_entry->info, 64, format, &fullLength, Argp)); 875 } 876 PetscFunctionReturn(PETSC_SUCCESS); 877 } 878 879 static PetscErrorCode PetscLogHandlerGetNumObjects_Default(PetscLogHandler handler, PetscInt *num_objects) 880 { 881 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 882 883 PetscFunctionBegin; 884 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, num_objects, NULL)); 885 PetscFunctionReturn(PETSC_SUCCESS); 886 } 887 888 static PetscErrorCode PetscLogHandlerDump_Default(PetscLogHandler handler, const char sname[]) 889 { 890 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 891 FILE *fd; 892 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 893 PetscLogDouble flops, _TotalTime; 894 PetscMPIInt rank; 895 int curStage; 896 PetscLogState state; 897 PetscInt num_events; 898 PetscLogEvent event; 899 900 PetscFunctionBegin; 901 /* Calculate the total elapsed time */ 902 PetscCall(PetscTime(&_TotalTime)); 903 _TotalTime -= petsc_BaseTime; 904 /* Open log file */ 905 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)handler), &rank)); 906 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 907 PetscCall(PetscFixFilename(file, fname)); 908 PetscCall(PetscFOpen(PETSC_COMM_SELF, fname, "w", &fd)); 909 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 910 /* Output totals */ 911 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 912 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Clock Resolution %g\n", 0.0)); 913 /* Output actions */ 914 if (def->petsc_logActions) { 915 PetscInt num_actions; 916 PetscCall(PetscLogActionArrayGetSize(def->petsc_actions, &num_actions, NULL)); 917 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Actions accomplished %" PetscInt_FMT "\n", num_actions)); 918 for (int a = 0; a < num_actions; a++) { 919 Action *action; 920 921 PetscCall(PetscLogActionArrayGetRef(def->petsc_actions, a, &action)); 922 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g %d %d %d %" PetscInt64_FMT " %" PetscInt64_FMT " %" PetscInt64_FMT " %g %g %g\n", action->time, action->action, action->event, action->classid, action->id1, action->id2, action->id3, action->flops, 923 action->mem, action->maxmem)); 924 } 925 } 926 /* Output objects */ 927 if (def->petsc_logObjects) { 928 PetscInt num_objects; 929 930 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 931 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Objects created %d destroyed %d\n", def->petsc_numObjectsCreated, def->petsc_numObjectsDestroyed)); 932 for (int o = 0; o < num_objects; o++) { 933 Object *object = NULL; 934 935 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, o, &object)); 936 if (object->parent != -1) continue; // object with this id wasn't logged, probably a PetscLogHandler 937 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Parent ID: %d Memory: %d\n", object->parent, (int)object->mem)); 938 if (!object->name[0]) { 939 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Name\n")); 940 } else { 941 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Name: %s\n", object->name)); 942 } 943 if (!object->info[0]) { 944 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Info\n")); 945 } else { 946 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Info: %s\n", object->info)); 947 } 948 } 949 } 950 /* Output events */ 951 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Event log:\n")); 952 PetscCall(PetscLogHandlerGetState(handler, &state)); 953 PetscCall(PetscLogStateGetNumEvents(state, &num_events)); 954 PetscCall(PetscLogStateGetCurrentStage(state, &curStage)); 955 for (event = 0; event < num_events; event++) { 956 PetscEventPerfInfo *event_info; 957 958 PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, curStage, event, &event_info)); 959 if (event_info->time != 0.0) flops = event_info->flops / event_info->time; 960 else flops = 0.0; 961 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%d %16d %16g %16g %16g\n", event, event_info->count, event_info->flops, event_info->time, flops)); 962 } 963 PetscCall(PetscFClose(PETSC_COMM_SELF, fd)); 964 PetscFunctionReturn(PETSC_SUCCESS); 965 } 966 967 /* 968 PetscLogView_Detailed - Each process prints the times for its own events 969 970 */ 971 static PetscErrorCode PetscLogHandlerView_Default_Detailed(PetscLogHandler handler, PetscViewer viewer) 972 { 973 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 974 PetscLogDouble locTotalTime, numRed, maxMem; 975 PetscInt numStages, numEvents; 976 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 977 PetscMPIInt rank, size; 978 PetscLogGlobalNames global_stages, global_events; 979 PetscLogState state; 980 PetscEventPerfInfo zero_info; 981 982 PetscFunctionBegin; 983 PetscCall(PetscLogHandlerGetState(handler, &state)); 984 PetscCallMPI(MPI_Comm_size(comm, &size)); 985 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 986 /* Must preserve reduction count before we go on */ 987 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 988 /* Get the total elapsed time */ 989 PetscCall(PetscTime(&locTotalTime)); 990 locTotalTime -= petsc_BaseTime; 991 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 992 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 993 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 994 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 995 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 996 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 997 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 998 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 999 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 1000 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 1001 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 1002 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 1003 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 1004 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 1005 for (PetscInt stage = 0; stage < numStages; stage++) { 1006 PetscInt stage_id; 1007 const char *stage_name; 1008 1009 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1010 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1011 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stage_name)); 1012 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stage_name)); 1013 for (PetscInt event = 0; event < numEvents; event++) { 1014 PetscEventPerfInfo *eventInfo = &zero_info; 1015 PetscBool is_zero = PETSC_FALSE; 1016 PetscInt event_id; 1017 const char *event_name; 1018 1019 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1020 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1021 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &eventInfo)); 1022 is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE; 1023 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPI_C_BOOL, MPI_LAND, comm)); 1024 if (!is_zero) PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stage_name, event_name)); 1025 } 1026 } 1027 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1028 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1029 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 1030 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct)); 1031 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len)); 1032 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1033 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1034 { 1035 PetscInt num_objects; 1036 1037 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 1038 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %" PetscInt_FMT "\n", rank, num_objects)); 1039 } 1040 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1041 PetscCall(PetscViewerFlush(viewer)); 1042 for (PetscInt stage = 0; stage < numStages; stage++) { 1043 PetscEventPerfInfo *stage_perf_info = &zero_info; 1044 PetscInt stage_id; 1045 const char *stage_name; 1046 1047 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1048 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1049 if (stage_id >= 0) { 1050 PetscStagePerf *stage_info; 1051 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info)); 1052 stage_perf_info = &stage_info->perfInfo; 1053 } 1054 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stage_name, rank, stage_perf_info->time, 1055 stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops)); 1056 for (PetscInt event = 0; event < numEvents; event++) { 1057 PetscEventPerfInfo *eventInfo = &zero_info; 1058 PetscBool is_zero = PETSC_FALSE; 1059 PetscInt event_id; 1060 const char *event_name; 1061 1062 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1063 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1064 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &eventInfo)); 1065 is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE; 1066 PetscCall(PetscArraycmp(eventInfo, &zero_info, 1, &is_zero)); 1067 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPI_C_BOOL, MPI_LAND, comm)); 1068 if (!is_zero) { 1069 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", stage_name, event_name, rank, 1070 eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops)); 1071 if (eventInfo->dof[0] >= 0.) { 1072 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1073 for (PetscInt d = 0; d < 8; ++d) { 1074 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1075 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1076 } 1077 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1078 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1079 for (PetscInt e = 0; e < 8; ++e) { 1080 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1081 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1082 } 1083 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1084 } 1085 } 1086 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1087 } 1088 } 1089 PetscCall(PetscViewerFlush(viewer)); 1090 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1091 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1092 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1093 PetscFunctionReturn(PETSC_SUCCESS); 1094 } 1095 1096 /* 1097 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1098 */ 1099 static PetscErrorCode PetscLogHandlerView_Default_CSV(PetscLogHandler handler, PetscViewer viewer) 1100 { 1101 PetscLogDouble locTotalTime, maxMem; 1102 PetscInt numStages, numEvents, stage, event; 1103 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1104 PetscMPIInt rank, size; 1105 PetscLogGlobalNames global_stages, global_events; 1106 PetscLogState state; 1107 PetscEventPerfInfo zero_info; 1108 1109 PetscFunctionBegin; 1110 PetscCall(PetscLogHandlerGetState(handler, &state)); 1111 PetscCallMPI(MPI_Comm_size(comm, &size)); 1112 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1113 /* Must preserve reduction count before we go on */ 1114 /* Get the total elapsed time */ 1115 PetscCall(PetscTime(&locTotalTime)); 1116 locTotalTime -= petsc_BaseTime; 1117 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1118 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 1119 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 1120 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 1121 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 1122 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 1123 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1124 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1125 PetscCall(PetscViewerFlush(viewer)); 1126 for (stage = 0; stage < numStages; stage++) { 1127 PetscEventPerfInfo *stage_perf_info; 1128 PetscInt stage_id; 1129 const char *stage_name; 1130 1131 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1132 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1133 stage_perf_info = &zero_info; 1134 if (stage_id >= 0) { 1135 PetscStagePerf *stage_info; 1136 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info)); 1137 stage_perf_info = &stage_info->perfInfo; 1138 } 1139 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stage_name, rank, stage_perf_info->time, stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops)); 1140 for (event = 0; event < numEvents; event++) { 1141 PetscEventPerfInfo *eventInfo = &zero_info; 1142 PetscBool is_zero = PETSC_FALSE; 1143 PetscInt event_id; 1144 const char *event_name; 1145 1146 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1147 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1148 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &eventInfo)); 1149 PetscCall(PetscArraycmp(eventInfo, &zero_info, 1, &is_zero)); 1150 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPI_C_BOOL, MPI_LAND, comm)); 1151 if (!is_zero) { 1152 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stage_name, event_name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops)); 1153 if (eventInfo->dof[0] >= 0.) { 1154 for (PetscInt d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1155 for (PetscInt e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1156 } 1157 } 1158 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1159 } 1160 } 1161 PetscCall(PetscViewerFlush(viewer)); 1162 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1163 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1164 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1165 PetscFunctionReturn(PETSC_SUCCESS); 1166 } 1167 1168 static PetscErrorCode PetscLogViewWarnSync(PetscViewer viewer) 1169 { 1170 PetscFunctionBegin; 1171 if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS); 1172 PetscCall(PetscViewerASCIIPrintf(viewer, "\n\n")); 1173 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n")); 1174 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1175 PetscCall(PetscViewerASCIIPrintf(viewer, " # WARNING!!! #\n")); 1176 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1177 PetscCall(PetscViewerASCIIPrintf(viewer, " # This program was run with logging synchronization. #\n")); 1178 PetscCall(PetscViewerASCIIPrintf(viewer, " # This option provides more meaningful imbalance #\n")); 1179 PetscCall(PetscViewerASCIIPrintf(viewer, " # figures at the expense of slowing things down and #\n")); 1180 PetscCall(PetscViewerASCIIPrintf(viewer, " # providing a distorted view of the overall runtime. #\n")); 1181 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1182 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n\n\n")); 1183 PetscFunctionReturn(PETSC_SUCCESS); 1184 } 1185 1186 static PetscErrorCode PetscLogViewWarnDebugging(PetscViewer viewer) 1187 { 1188 PetscFunctionBegin; 1189 if (PetscDefined(USE_DEBUG)) { 1190 PetscCall(PetscViewerASCIIPrintf(viewer, "\n\n")); 1191 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n")); 1192 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1193 PetscCall(PetscViewerASCIIPrintf(viewer, " # WARNING!!! #\n")); 1194 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1195 PetscCall(PetscViewerASCIIPrintf(viewer, " # This code was compiled with a debugging option. #\n")); 1196 PetscCall(PetscViewerASCIIPrintf(viewer, " # To get timing results run ./configure #\n")); 1197 PetscCall(PetscViewerASCIIPrintf(viewer, " # using --with-debugging=no, the performance will #\n")); 1198 PetscCall(PetscViewerASCIIPrintf(viewer, " # be generally two or three times faster. #\n")); 1199 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1200 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n\n\n")); 1201 } 1202 PetscFunctionReturn(PETSC_SUCCESS); 1203 } 1204 1205 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(PetscViewer viewer) 1206 { 1207 #if defined(PETSC_HAVE_DEVICE) 1208 PetscMPIInt size; 1209 PetscBool deviceInitialized = PETSC_FALSE; 1210 1211 PetscFunctionBegin; 1212 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)viewer), &size)); 1213 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1214 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1215 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1216 deviceInitialized = PETSC_TRUE; 1217 break; 1218 } 1219 } 1220 /* the last condition says PETSc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1221 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS); 1222 PetscCall(PetscViewerASCIIPrintf(viewer, "\n\n")); 1223 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n")); 1224 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1225 PetscCall(PetscViewerASCIIPrintf(viewer, " # WARNING!!! #\n")); 1226 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1227 PetscCall(PetscViewerASCIIPrintf(viewer, " # This code was compiled with GPU support and you've #\n")); 1228 PetscCall(PetscViewerASCIIPrintf(viewer, " # created PETSc/GPU objects, but you intentionally #\n")); 1229 PetscCall(PetscViewerASCIIPrintf(viewer, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1230 PetscCall(PetscViewerASCIIPrintf(viewer, " # additional data between the GPU and CPU. To obtain #\n")); 1231 PetscCall(PetscViewerASCIIPrintf(viewer, " # meaningful timing results on multi-rank runs, use #\n")); 1232 PetscCall(PetscViewerASCIIPrintf(viewer, " # GPU-aware MPI instead. #\n")); 1233 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1234 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n\n\n")); 1235 PetscFunctionReturn(PETSC_SUCCESS); 1236 #else 1237 return PETSC_SUCCESS; 1238 #endif 1239 } 1240 1241 static PetscErrorCode PetscLogViewWarnGpuTime(PetscViewer viewer) 1242 { 1243 #if defined(PETSC_HAVE_DEVICE) && !defined(PETSC_HAVE_KOKKOS_WITHOUT_GPU) 1244 1245 PetscFunctionBegin; 1246 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS); 1247 PetscCall(PetscViewerASCIIPrintf(viewer, "\n\n")); 1248 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n")); 1249 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1250 PetscCall(PetscViewerASCIIPrintf(viewer, " # WARNING!!! #\n")); 1251 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1252 PetscCall(PetscViewerASCIIPrintf(viewer, " # This code was run with -log_view_gpu_time #\n")); 1253 PetscCall(PetscViewerASCIIPrintf(viewer, " # This provides accurate timing within the GPU kernels #\n")); 1254 PetscCall(PetscViewerASCIIPrintf(viewer, " # but can slow down the entire computation by a #\n")); 1255 PetscCall(PetscViewerASCIIPrintf(viewer, " # measurable amount. For fastest runs we recommend #\n")); 1256 PetscCall(PetscViewerASCIIPrintf(viewer, " # not using this option. #\n")); 1257 PetscCall(PetscViewerASCIIPrintf(viewer, " # #\n")); 1258 PetscCall(PetscViewerASCIIPrintf(viewer, " ##########################################################\n\n\n")); 1259 PetscFunctionReturn(PETSC_SUCCESS); 1260 #else 1261 return PETSC_SUCCESS; 1262 #endif 1263 } 1264 1265 PETSC_INTERN int PetscGlobalArgc; 1266 PETSC_INTERN char **PetscGlobalArgs; 1267 1268 static PetscErrorCode PetscLogHandlerView_Default_Info(PetscLogHandler handler, PetscViewer viewer) 1269 { 1270 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 1271 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1272 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1273 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1274 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1275 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1276 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1277 PetscLogDouble min, max, tot, ratio, avg, x, y; 1278 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1279 #if defined(PETSC_HAVE_DEVICE) 1280 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1281 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops, gjoules, geff; 1282 #endif 1283 PetscMPIInt minC, maxC; 1284 PetscMPIInt size, rank; 1285 PetscBool *localStageUsed, *stageUsed; 1286 PetscBool *localStageVisible, *stageVisible; 1287 PetscInt numStages, numEvents; 1288 int stage, oclass; 1289 PetscLogEvent event; 1290 char version[256]; 1291 MPI_Comm comm; 1292 #if defined(PETSC_HAVE_DEVICE) && !defined(PETSC_HAVE_KOKKOS_WITHOUT_GPU) 1293 PetscInt64 nas = 0x7FF0000000000002; 1294 #endif 1295 PetscLogGlobalNames global_stages, global_events; 1296 PetscEventPerfInfo zero_info; 1297 PetscLogState state; 1298 1299 PetscFunctionBegin; 1300 PetscCall(PetscLogHandlerGetState(handler, &state)); 1301 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1302 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1303 PetscCallMPI(MPI_Comm_size(comm, &size)); 1304 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1305 /* Get the total elapsed time */ 1306 PetscCall(PetscTime(&locTotalTime)); 1307 locTotalTime -= petsc_BaseTime; 1308 1309 PetscCall(PetscViewerASCIIPrintf(viewer, "****************************************************************************************************************************************************************\n")); 1310 PetscCall(PetscViewerASCIIPrintf(viewer, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1311 PetscCall(PetscViewerASCIIPrintf(viewer, "****************************************************************************************************************************************************************\n")); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1313 PetscCall(PetscLogViewWarnSync(viewer)); 1314 PetscCall(PetscLogViewWarnDebugging(viewer)); 1315 PetscCall(PetscLogViewWarnNoGpuAwareMpi(viewer)); 1316 PetscCall(PetscLogViewWarnGpuTime(viewer)); 1317 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1318 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1319 PetscCall(PetscGetUserName(username, sizeof(username))); 1320 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1321 PetscCall(PetscGetDate(date, sizeof(date))); 1322 PetscCall(PetscGetVersion(version, sizeof(version))); 1323 1324 #if defined(PETSC_HAVE_CUPM) 1325 const char *cupm = PetscDefined(HAVE_CUDA) ? "CUDA" : "HIP"; 1326 if (PetscDeviceCUPMRuntimeArch) 1327 PetscCall(PetscViewerASCIIPrintf(viewer, "%s on a %s named %s with %d process%s and %s architecture %d, by %s on %s\n", pname, arch, hostname, size, size > 1 ? "es" : "", cupm, PetscDeviceCUPMRuntimeArch, username, date)); 1328 else 1329 #endif 1330 PetscCall(PetscViewerASCIIPrintf(viewer, "%s on a %s named %s with %d process%s, by %s on %s\n", pname, arch, hostname, size, size > 1 ? "es" : "", username, date)); 1331 1332 #if defined(PETSC_HAVE_OPENMP) 1333 PetscCall(PetscViewerASCIIPrintf(viewer, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1334 #endif 1335 PetscCall(PetscViewerASCIIPrintf(viewer, "Using %s\n", version)); 1336 1337 /* Must preserve reduction count before we go on */ 1338 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1339 1340 /* Calculate summary information */ 1341 PetscCall(PetscViewerASCIIPrintf(viewer, "\n Max Max/Min Avg Total\n")); 1342 /* Time */ 1343 PetscCallMPI(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1344 PetscCallMPI(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1345 PetscCallMPI(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1346 avg = tot / ((PetscLogDouble)size); 1347 if (min != 0.0) ratio = max / min; 1348 else ratio = 0.0; 1349 PetscCall(PetscViewerASCIIPrintf(viewer, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1350 TotalTime = tot; 1351 /* Objects */ 1352 { 1353 PetscInt num_objects; 1354 1355 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 1356 avg = (PetscLogDouble)num_objects; 1357 } 1358 PetscCallMPI(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1359 PetscCallMPI(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1360 PetscCallMPI(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1361 avg = tot / ((PetscLogDouble)size); 1362 if (min != 0.0) ratio = max / min; 1363 else ratio = 0.0; 1364 PetscCall(PetscViewerASCIIPrintf(viewer, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1365 /* Flops */ 1366 PetscCallMPI(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1367 PetscCallMPI(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1368 PetscCallMPI(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1369 avg = tot / ((PetscLogDouble)size); 1370 if (min != 0.0) ratio = max / min; 1371 else ratio = 0.0; 1372 PetscCall(PetscViewerASCIIPrintf(viewer, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1373 TotalFlops = tot; 1374 /* Flops/sec -- Must talk to Barry here */ 1375 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1376 else flops = 0.0; 1377 PetscCallMPI(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1378 PetscCallMPI(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1379 PetscCallMPI(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1380 avg = tot / ((PetscLogDouble)size); 1381 if (min != 0.0) ratio = max / min; 1382 else ratio = 0.0; 1383 PetscCall(PetscViewerASCIIPrintf(viewer, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1384 /* Memory */ 1385 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1386 if (mem > 0.0) { 1387 PetscCallMPI(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1388 PetscCallMPI(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1389 PetscCallMPI(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1390 avg = tot / ((PetscLogDouble)size); 1391 if (min != 0.0) ratio = max / min; 1392 else ratio = 0.0; 1393 PetscCall(PetscViewerASCIIPrintf(viewer, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1394 } 1395 /* Messages */ 1396 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1397 PetscCallMPI(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1398 PetscCallMPI(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1399 PetscCallMPI(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1400 avg = tot / ((PetscLogDouble)size); 1401 if (min != 0.0) ratio = max / min; 1402 else ratio = 0.0; 1403 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1404 numMessages = tot; 1405 /* Message Lengths */ 1406 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1407 PetscCallMPI(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1408 PetscCallMPI(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1409 PetscCallMPI(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1410 if (numMessages != 0) avg = tot / numMessages; 1411 else avg = 0.0; 1412 if (min != 0.0) ratio = max / min; 1413 else ratio = 0.0; 1414 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1415 messageLength = tot; 1416 /* Reductions */ 1417 PetscCallMPI(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1418 PetscCallMPI(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1419 PetscCallMPI(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1420 if (min != 0.0) ratio = max / min; 1421 else ratio = 0.0; 1422 PetscCall(PetscViewerASCIIPrintf(viewer, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1423 numReductions = red; /* wrong because uses count from process zero */ 1424 PetscCall(PetscViewerASCIIPrintf(viewer, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1425 PetscCall(PetscViewerASCIIPrintf(viewer, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1426 PetscCall(PetscViewerASCIIPrintf(viewer, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1427 1428 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 1429 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 1430 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 1431 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 1432 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 1433 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1434 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1435 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1436 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1437 if (numStages > 0) { 1438 for (stage = 0; stage < numStages; stage++) { 1439 PetscInt stage_id; 1440 1441 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1442 if (stage_id >= 0) { 1443 PetscStagePerf *stage_info; 1444 1445 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 1446 localStageUsed[stage] = stage_info->used; 1447 localStageVisible[stage] = stage_info->perfInfo.visible; 1448 } else { 1449 localStageUsed[stage] = PETSC_FALSE; 1450 localStageVisible[stage] = PETSC_TRUE; 1451 } 1452 } 1453 PetscCallMPI(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPI_C_BOOL, MPI_LOR, comm)); 1454 PetscCallMPI(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPI_C_BOOL, MPI_LAND, comm)); 1455 for (stage = 0; stage < numStages; stage++) { 1456 if (stageUsed[stage] && stageVisible[stage]) { 1457 PetscCall(PetscViewerASCIIPrintf(viewer, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1458 PetscCall(PetscViewerASCIIPrintf(viewer, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1459 break; 1460 } 1461 } 1462 for (stage = 0; stage < numStages; stage++) { 1463 PetscInt stage_id; 1464 PetscEventPerfInfo *stage_info; 1465 const char *stage_name; 1466 1467 if (!(stageUsed[stage] && stageVisible[stage])) continue; 1468 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1469 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1470 stage_info = &zero_info; 1471 if (localStageUsed[stage]) { 1472 PetscStagePerf *stage_perf_info; 1473 1474 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_perf_info)); 1475 stage_info = &stage_perf_info->perfInfo; 1476 } 1477 PetscCallMPI(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1478 PetscCallMPI(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1479 PetscCallMPI(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1480 PetscCallMPI(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1481 PetscCallMPI(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1482 mess *= 0.5; 1483 messLen *= 0.5; 1484 red /= size; 1485 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1486 else fracTime = 0.0; 1487 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1488 else fracFlops = 0.0; 1489 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1490 if (numMessages != 0.0) fracMessages = mess / numMessages; 1491 else fracMessages = 0.0; 1492 if (mess != 0.0) avgMessLen = messLen / mess; 1493 else avgMessLen = 0.0; 1494 if (messageLength != 0.0) fracLength = messLen / messageLength; 1495 else fracLength = 0.0; 1496 if (numReductions != 0.0) fracReductions = red / numReductions; 1497 else fracReductions = 0.0; 1498 PetscCall(PetscViewerASCIIPrintf(viewer, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, stage_name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1499 } 1500 } 1501 1502 PetscCall(PetscViewerASCIIPrintf(viewer, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1503 PetscCall(PetscViewerASCIIPrintf(viewer, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1504 PetscCall(PetscViewerASCIIPrintf(viewer, "Phase summary info:\n")); 1505 PetscCall(PetscViewerASCIIPrintf(viewer, " Count: number of times phase was executed\n")); 1506 PetscCall(PetscViewerASCIIPrintf(viewer, " Time and Flop: Max - maximum over all processes\n")); 1507 PetscCall(PetscViewerASCIIPrintf(viewer, " Ratio - ratio of maximum to minimum over all processes\n")); 1508 PetscCall(PetscViewerASCIIPrintf(viewer, " Mess: number of messages sent\n")); 1509 PetscCall(PetscViewerASCIIPrintf(viewer, " AvgLen: average message length (bytes)\n")); 1510 PetscCall(PetscViewerASCIIPrintf(viewer, " Reduct: number of global reductions\n")); 1511 PetscCall(PetscViewerASCIIPrintf(viewer, " Global: entire computation\n")); 1512 PetscCall(PetscViewerASCIIPrintf(viewer, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1513 PetscCall(PetscViewerASCIIPrintf(viewer, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1514 PetscCall(PetscViewerASCIIPrintf(viewer, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1515 PetscCall(PetscViewerASCIIPrintf(viewer, " %%R - percent reductions in this phase\n")); 1516 PetscCall(PetscViewerASCIIPrintf(viewer, " Total Mflop/s: 1e-6 * (sum of flop over all processes)/(max time over all processes)\n")); 1517 if (PetscLogMemory) { 1518 PetscCall(PetscViewerASCIIPrintf(viewer, " Memory usage is summed over all MPI processes, it is given in mega-bytes\n")); 1519 PetscCall(PetscViewerASCIIPrintf(viewer, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1520 PetscCall(PetscViewerASCIIPrintf(viewer, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1521 PetscCall(PetscViewerASCIIPrintf(viewer, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1522 PetscCall(PetscViewerASCIIPrintf(viewer, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1523 } 1524 #if defined(PETSC_HAVE_DEVICE) 1525 PetscCall(PetscViewerASCIIPrintf(viewer, " GPU Mflop/s: 1e-6 * (sum of flop on GPU over all processes)/(max GPU time over all processes)\n")); 1526 PetscCall(PetscViewerASCIIPrintf(viewer, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1527 PetscCall(PetscViewerASCIIPrintf(viewer, " CpuToGpu Size (Mbytes): 1e-6 * (total size of CPU to GPU copies per processor)\n")); 1528 PetscCall(PetscViewerASCIIPrintf(viewer, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1529 PetscCall(PetscViewerASCIIPrintf(viewer, " GpuToCpu Size (Mbytes): 1e-6 * (total size of GPU to CPU copies per processor)\n")); 1530 PetscCall(PetscViewerASCIIPrintf(viewer, " GPU %%F: percent flops on GPU in this event\n")); 1531 #endif 1532 PetscCall(PetscViewerASCIIPrintf(viewer, "------------------------------------------------------------------------------------------------------------------------\n")); 1533 1534 PetscCall(PetscLogViewWarnDebugging(viewer)); 1535 1536 /* Report events */ 1537 PetscCall(PetscViewerASCIIPrintf(viewer, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1538 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, " Malloc EMalloc MMalloc RMI")); 1539 #if defined(PETSC_HAVE_DEVICE) 1540 PetscCall(PetscViewerASCIIPrintf(viewer, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1541 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCall(PetscViewerASCIIPrintf(viewer, " GPU GPU")); 1542 #endif 1543 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1544 PetscCall(PetscViewerASCIIPrintf(viewer, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1545 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, " Mbytes Mbytes Mbytes Mbytes")); 1546 #if defined(PETSC_HAVE_DEVICE) 1547 PetscCall(PetscViewerASCIIPrintf(viewer, " Mflop/s Count Size Count Size %%F")); 1548 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCall(PetscViewerASCIIPrintf(viewer, " Joule Mflop/J")); 1549 #endif 1550 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1551 PetscCall(PetscViewerASCIIPrintf(viewer, "------------------------------------------------------------------------------------------------------------------------")); 1552 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, "-----------------------------")); 1553 #if defined(PETSC_HAVE_DEVICE) 1554 PetscCall(PetscViewerASCIIPrintf(viewer, "---------------------------------------")); 1555 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCall(PetscViewerASCIIPrintf(viewer, "-------------------")); 1556 #endif 1557 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1558 1559 #if defined(PETSC_HAVE_DEVICE) 1560 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1561 PetscCall(PetscLogStateGetEventFromName(state, "TaoSolve", &TAO_Solve)); 1562 PetscCall(PetscLogStateGetEventFromName(state, "TSStep", &TS_Step)); 1563 PetscCall(PetscLogStateGetEventFromName(state, "SNESSolve", &SNES_Solve)); 1564 PetscCall(PetscLogStateGetEventFromName(state, "KSPSolve", &KSP_Solve)); 1565 #endif 1566 1567 for (stage = 0; stage < numStages; stage++) { 1568 PetscInt stage_id; 1569 PetscEventPerfInfo *stage_info; 1570 const char *stage_name; 1571 1572 if (!(stageVisible[stage] && stageUsed[stage])) continue; 1573 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1574 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1575 PetscCall(PetscViewerASCIIPrintf(viewer, "\n--- Event Stage %d: %s\n\n", stage, stage_name)); 1576 stage_info = &zero_info; 1577 if (localStageUsed[stage]) { 1578 PetscStagePerf *stage_perf_info; 1579 1580 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_perf_info)); 1581 stage_info = &stage_perf_info->perfInfo; 1582 } 1583 PetscCallMPI(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1584 PetscCallMPI(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1585 PetscCallMPI(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1586 PetscCallMPI(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1587 PetscCallMPI(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1588 mess *= 0.5; 1589 messLen *= 0.5; 1590 red /= size; 1591 1592 for (event = 0; event < numEvents; event++) { 1593 PetscInt event_id; 1594 PetscEventPerfInfo *event_info = &zero_info; 1595 PetscBool is_zero = PETSC_FALSE; 1596 const char *event_name; 1597 1598 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1599 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1600 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerGetEventPerfInfo_Default(handler, stage_id, event_id, &event_info)); 1601 PetscCall(PetscArraycmp(event_info, &zero_info, 1, &is_zero)); 1602 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPI_C_BOOL, MPI_LAND, comm)); 1603 if (!is_zero) { 1604 flopr = event_info->flops; 1605 PetscCallMPI(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1606 PetscCallMPI(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1607 PetscCallMPI(MPIU_Allreduce(&event_info->flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1608 PetscCallMPI(MPIU_Allreduce(&event_info->time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1609 PetscCallMPI(MPIU_Allreduce(&event_info->time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1610 PetscCallMPI(MPIU_Allreduce(&event_info->time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1611 PetscCallMPI(MPIU_Allreduce(&event_info->numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1612 PetscCallMPI(MPIU_Allreduce(&event_info->messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1613 PetscCallMPI(MPIU_Allreduce(&event_info->numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1614 PetscCallMPI(MPIU_Allreduce(&event_info->count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1615 PetscCallMPI(MPIU_Allreduce(&event_info->count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1616 if (PetscLogMemory) { 1617 PetscCallMPI(MPIU_Allreduce(&event_info->memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1618 PetscCallMPI(MPIU_Allreduce(&event_info->mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1619 PetscCallMPI(MPIU_Allreduce(&event_info->mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1620 PetscCallMPI(MPIU_Allreduce(&event_info->mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1621 } 1622 #if defined(PETSC_HAVE_DEVICE) 1623 PetscCallMPI(MPIU_Allreduce(&event_info->CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1624 PetscCallMPI(MPIU_Allreduce(&event_info->GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1625 PetscCallMPI(MPIU_Allreduce(&event_info->CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1626 PetscCallMPI(MPIU_Allreduce(&event_info->GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1627 PetscCallMPI(MPIU_Allreduce(&event_info->GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1628 PetscCallMPI(MPIU_Allreduce(&event_info->GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1629 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCallMPI(MPIU_Allreduce(&event_info->GpuEnergy, &gjoules, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1630 #endif 1631 if (mint < 0.0) { 1632 PetscCall(PetscViewerASCIIPrintf(viewer, "WARNING!!! Minimum time %g over all processes for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, event_name)); 1633 mint = 0; 1634 } 1635 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processes for %s is negative! Not possible!", minf, event_name); 1636 #if defined(PETSC_HAVE_DEVICE) && !defined(PETSC_HAVE_KOKKOS_WITHOUT_GPU) 1637 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1638 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1639 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 1640 if (event_id != SNES_Solve && event_id != KSP_Solve && event_id != TS_Step && event_id != TAO_Solve) { 1641 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 1642 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 1643 } 1644 } 1645 #endif 1646 totm *= 0.5; 1647 totml *= 0.5; 1648 totr /= size; 1649 1650 if (maxC != 0) { 1651 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 1652 else ratC = 0.0; 1653 if (mint != 0.0) ratt = maxt / mint; 1654 else ratt = 0.0; 1655 if (minf != 0.0) ratf = maxf / minf; 1656 else ratf = 0.0; 1657 if (TotalTime != 0.0) fracTime = tott / TotalTime; 1658 else fracTime = 0.0; 1659 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 1660 else fracFlops = 0.0; 1661 if (stageTime != 0.0) fracStageTime = tott / stageTime; 1662 else fracStageTime = 0.0; 1663 if (flops != 0.0) fracStageFlops = totf / flops; 1664 else fracStageFlops = 0.0; 1665 if (numMessages != 0.0) fracMess = totm / numMessages; 1666 else fracMess = 0.0; 1667 if (messageLength != 0.0) fracMessLen = totml / messageLength; 1668 else fracMessLen = 0.0; 1669 if (numReductions != 0.0) fracRed = totr / numReductions; 1670 else fracRed = 0.0; 1671 if (mess != 0.0) fracStageMess = totm / mess; 1672 else fracStageMess = 0.0; 1673 if (messLen != 0.0) fracStageMessLen = totml / messLen; 1674 else fracStageMessLen = 0.0; 1675 if (red != 0.0) fracStageRed = totr / red; 1676 else fracStageRed = 0.0; 1677 if (totm != 0.0) totml /= totm; 1678 else totml = 0.0; 1679 if (maxt != 0.0) flopr = totf / maxt; 1680 else flopr = 0.0; 1681 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) { 1682 if (PetscIsNanReal(maxt)) 1683 PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f n/a n/a %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages n/a", event_name, maxC, ratC, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed)); 1684 else 1685 PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 1686 } else { 1687 if (PetscIsNanReal(maxt)) { // when maxt, ratt, flopr are NaN (i.e., run with GPUs but without -log_view_gpu_time), replace the confusing "nan" with "n/a" 1688 PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f n/a n/a %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f n/a", event_name, maxC, ratC, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed)); 1689 } else { 1690 PetscCall(PetscViewerASCIIPrintf(viewer, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 1691 } 1692 } 1693 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 1694 #if defined(PETSC_HAVE_DEVICE) 1695 if (totf != 0.0) fracgflops = gflops / totf; 1696 else fracgflops = 0.0; 1697 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 1698 else gflopr = 0.0; 1699 if (PetscIsNanReal(gflopr)) { 1700 PetscCall(PetscViewerASCIIPrintf(viewer, " n/a %4.0f %3.2e %4.0f %3.2e %2.0f", cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1701 } else { 1702 PetscCall(PetscViewerASCIIPrintf(viewer, " %5.0f %4.0f %3.2e %4.0f %3.2e %2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1703 } 1704 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) { 1705 if (gjoules != 0.0) geff = gflops / gjoules; 1706 else geff = 0.0; 1707 PetscCall(PetscViewerASCIIPrintf(viewer, " %4.2e %4.2e", gjoules, geff)); 1708 } 1709 #endif 1710 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1711 } 1712 } 1713 } 1714 } 1715 1716 /* Memory usage and object creation */ 1717 PetscCall(PetscViewerASCIIPrintf(viewer, "------------------------------------------------------------------------------------------------------------------------")); 1718 if (PetscLogMemory) PetscCall(PetscViewerASCIIPrintf(viewer, "-----------------------------")); 1719 #if defined(PETSC_HAVE_DEVICE) 1720 PetscCall(PetscViewerASCIIPrintf(viewer, "---------------------------------------")); 1721 if (PetscLogGpuEnergyFlag || PetscLogGpuEnergyMeterFlag) PetscCall(PetscViewerASCIIPrintf(viewer, "-------------------")); 1722 #endif 1723 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1724 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1725 1726 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1727 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1728 stats for stages local to processor sets. 1729 */ 1730 /* We should figure out the longest object name here (now 20 characters) */ 1731 PetscCall(PetscViewerASCIIPrintf(viewer, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1732 for (stage = 0; stage < numStages; stage++) { 1733 const char *stage_name; 1734 1735 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1736 PetscCall(PetscViewerASCIIPrintf(viewer, "\n--- Event Stage %d: %s\n\n", stage, stage_name)); 1737 if (localStageUsed[stage]) { 1738 PetscInt num_classes; 1739 1740 PetscCall(PetscLogStateGetNumClasses(state, &num_classes)); 1741 for (oclass = 0; oclass < num_classes; oclass++) { 1742 PetscClassPerf *class_perf_info; 1743 1744 PetscCall(PetscLogHandlerDefaultGetClassPerf(handler, stage, oclass, &class_perf_info)); 1745 if (class_perf_info->creations > 0 || class_perf_info->destructions > 0) { 1746 PetscLogClassInfo class_reg_info; 1747 PetscBool flg = PETSC_FALSE; 1748 1749 PetscCall(PetscLogStateClassGetInfo(state, oclass, &class_reg_info)); 1750 if (stage == 0 && oclass == num_classes - 1) { 1751 if (PetscGlobalArgc == 0 && PetscGlobalArgs == NULL) { 1752 PetscCall(PetscStrcmp(class_reg_info.name, "Viewer", &flg)); 1753 if (flg && class_perf_info->creations == PetscLogNumViewersCreated && class_perf_info->destructions == PetscLogNumViewersDestroyed) continue; 1754 } 1755 } 1756 PetscCall(PetscViewerASCIIPrintf(viewer, "%20s %5d %5d\n", class_reg_info.name, class_perf_info->creations, class_perf_info->destructions)); 1757 } 1758 } 1759 } 1760 } 1761 1762 PetscCall(PetscFree(localStageUsed)); 1763 PetscCall(PetscFree(stageUsed)); 1764 PetscCall(PetscFree(localStageVisible)); 1765 PetscCall(PetscFree(stageVisible)); 1766 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1767 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1768 1769 /* Information unrelated to this particular run */ 1770 PetscCall(PetscViewerASCIIPrintf(viewer, "========================================================================================================================\n")); 1771 PetscCall(PetscTime(&y)); 1772 PetscCall(PetscTime(&x)); 1773 PetscCall(PetscTime(&y)); 1774 PetscCall(PetscTime(&y)); 1775 PetscCall(PetscTime(&y)); 1776 PetscCall(PetscTime(&y)); 1777 PetscCall(PetscTime(&y)); 1778 PetscCall(PetscTime(&y)); 1779 PetscCall(PetscTime(&y)); 1780 PetscCall(PetscTime(&y)); 1781 PetscCall(PetscTime(&y)); 1782 PetscCall(PetscTime(&y)); 1783 PetscCall(PetscViewerASCIIPrintf(viewer, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 1784 /* MPI information */ 1785 if (size > 1) { 1786 MPI_Status status; 1787 PetscMPIInt tag; 1788 MPI_Comm newcomm; 1789 1790 PetscCallMPI(MPI_Barrier(comm)); 1791 PetscCall(PetscTime(&x)); 1792 PetscCallMPI(MPI_Barrier(comm)); 1793 PetscCallMPI(MPI_Barrier(comm)); 1794 PetscCallMPI(MPI_Barrier(comm)); 1795 PetscCallMPI(MPI_Barrier(comm)); 1796 PetscCallMPI(MPI_Barrier(comm)); 1797 PetscCall(PetscTime(&y)); 1798 PetscCall(PetscViewerASCIIPrintf(viewer, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 1799 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 1800 PetscCallMPI(MPI_Barrier(comm)); 1801 if (rank) { 1802 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 1803 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 1804 } else { 1805 PetscCall(PetscTime(&x)); 1806 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 1807 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 1808 PetscCall(PetscTime(&y)); 1809 PetscCall(PetscViewerASCIIPrintf(viewer, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 1810 } 1811 PetscCall(PetscCommDestroy(&newcomm)); 1812 } 1813 PetscCall(PetscOptionsView(NULL, viewer)); 1814 1815 /* Machine and compile information */ 1816 if (PetscDefined(USE_FORTRAN_KERNELS)) { 1817 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with FORTRAN kernels\n")); 1818 } else { 1819 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled without FORTRAN kernels\n")); 1820 } 1821 if (PetscDefined(USE_64BIT_INDICES)) { 1822 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with 64-bit PetscInt\n")); 1823 } else if (PetscDefined(USE___FLOAT128)) { 1824 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with 32-bit PetscInt\n")); 1825 } 1826 if (PetscDefined(USE_REAL_SINGLE)) { 1827 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with single precision PetscScalar and PetscReal\n")); 1828 } else if (PetscDefined(USE___FLOAT128)) { 1829 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 1830 } 1831 if (PetscDefined(USE_REAL_MAT_SINGLE)) { 1832 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with single precision matrices\n")); 1833 } else { 1834 PetscCall(PetscViewerASCIIPrintf(viewer, "Compiled with full precision matrices (default)\n")); 1835 } 1836 PetscCall(PetscViewerASCIIPrintf(viewer, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 1837 1838 PetscCall(PetscViewerASCIIPrintf(viewer, "Configure options: %s", petscconfigureoptions)); 1839 PetscCall(PetscViewerASCIIPrintf(viewer, "%s", petscmachineinfo)); 1840 PetscCall(PetscViewerASCIIPrintf(viewer, "%s", petsccompilerinfo)); 1841 PetscCall(PetscViewerASCIIPrintf(viewer, "%s", petsccompilerflagsinfo)); 1842 PetscCall(PetscViewerASCIIPrintf(viewer, "%s", petsclinkerinfo)); 1843 1844 /* Cleanup */ 1845 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1846 PetscCall(PetscLogViewWarnNoGpuAwareMpi(viewer)); 1847 PetscCall(PetscLogViewWarnDebugging(viewer)); 1848 PetscCall(PetscFPTrapPop()); 1849 PetscFunctionReturn(PETSC_SUCCESS); 1850 } 1851 1852 static PetscErrorCode PetscLogHandlerView_Default(PetscLogHandler handler, PetscViewer viewer) 1853 { 1854 PetscViewerFormat format; 1855 1856 PetscFunctionBegin; 1857 PetscCall(PetscViewerGetFormat(viewer, &format)); 1858 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 1859 PetscCall(PetscLogHandlerView_Default_Info(handler, viewer)); 1860 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1861 PetscCall(PetscLogHandlerView_Default_Detailed(handler, viewer)); 1862 } else if (format == PETSC_VIEWER_ASCII_CSV) { 1863 PetscCall(PetscLogHandlerView_Default_CSV(handler, viewer)); 1864 } 1865 PetscFunctionReturn(PETSC_SUCCESS); 1866 } 1867 1868 /*MC 1869 PETSCLOGHANDLERDEFAULT - PETSCLOGHANDLERDEFAULT = "default" - A `PetscLogHandler` that collects data for PETSc 1870 default profiling log viewers (`PetscLogView()` and `PetscLogDump()`). A log handler of this type is 1871 created and started (`PetscLogHandlerStart()`) by `PetscLogDefaultBegin()`. 1872 1873 Options Database Keys: 1874 + -log_include_actions - include a growing list of actions (event beginnings and endings, object creations and destructions) in `PetscLogDump()` (`PetscLogActions()`). 1875 - -log_include_objects - include a growing list of object creations and destructions in `PetscLogDump()` (`PetscLogObjects()`). 1876 1877 Level: developer 1878 1879 .seealso: [](ch_profiling), `PetscLogHandler` 1880 M*/ 1881 1882 PETSC_INTERN PetscErrorCode PetscLogHandlerCreate_Default(PetscLogHandler handler) 1883 { 1884 PetscFunctionBegin; 1885 PetscCall(PetscLogHandlerContextCreate_Default((PetscLogHandler_Default *)&handler->data)); 1886 handler->ops->destroy = PetscLogHandlerDestroy_Default; 1887 handler->ops->eventbegin = PetscLogHandlerEventBegin_Default; 1888 handler->ops->eventend = PetscLogHandlerEventEnd_Default; 1889 handler->ops->eventsync = PetscLogHandlerEventSync_Default; 1890 handler->ops->objectcreate = PetscLogHandlerObjectCreate_Default; 1891 handler->ops->objectdestroy = PetscLogHandlerObjectDestroy_Default; 1892 handler->ops->stagepush = PetscLogHandlerStagePush_Default; 1893 handler->ops->stagepop = PetscLogHandlerStagePop_Default; 1894 handler->ops->view = PetscLogHandlerView_Default; 1895 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerGetEventPerfInfo_C", PetscLogHandlerGetEventPerfInfo_Default)); 1896 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerGetStagePerfInfo_C", PetscLogHandlerGetStagePerfInfo_Default)); 1897 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerSetLogActions_C", PetscLogHandlerSetLogActions_Default)); 1898 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerSetLogObjects_C", PetscLogHandlerSetLogObjects_Default)); 1899 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerLogObjectState_C", PetscLogHandlerLogObjectState_Default)); 1900 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerGetNumObjects_C", PetscLogHandlerGetNumObjects_Default)); 1901 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventDeactivatePush_C", PetscLogHandlerEventDeactivatePush_Default)); 1902 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventDeactivatePop_C", PetscLogHandlerEventDeactivatePop_Default)); 1903 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventsPause_C", PetscLogHandlerEventsPause_Default)); 1904 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerEventsResume_C", PetscLogHandlerEventsResume_Default)); 1905 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerDump_C", PetscLogHandlerDump_Default)); 1906 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerStageSetVisible_C", PetscLogHandlerStageSetVisible_Default)); 1907 PetscCall(PetscObjectComposeFunction((PetscObject)handler, "PetscLogHandlerStageGetVisible_C", PetscLogHandlerStageGetVisible_Default)); 1908 PetscFunctionReturn(PETSC_SUCCESS); 1909 } 1910