1 #include <petscviewer.h> 2 #include <petscdevice.h> 3 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 4 #include <petsc/private/loghandlerimpl.h> 5 #include <petsc/private/deviceimpl.h> 6 #include <petscconfiginfo.h> 7 #include <petscmachineinfo.h> 8 #include "logdefault.h" 9 10 static PetscErrorCode PetscEventPerfInfoInit(PetscEventPerfInfo *eventInfo) 11 { 12 PetscFunctionBegin; 13 PetscCall(PetscMemzero(eventInfo, sizeof(*eventInfo))); 14 eventInfo->visible = PETSC_TRUE; 15 eventInfo->id = -1; 16 eventInfo->dof[0] = -1.0; 17 eventInfo->dof[1] = -1.0; 18 eventInfo->dof[2] = -1.0; 19 eventInfo->dof[3] = -1.0; 20 eventInfo->dof[4] = -1.0; 21 eventInfo->dof[5] = -1.0; 22 eventInfo->dof[6] = -1.0; 23 eventInfo->dof[7] = -1.0; 24 eventInfo->errors[0] = -1.0; 25 eventInfo->errors[1] = -1.0; 26 eventInfo->errors[2] = -1.0; 27 eventInfo->errors[3] = -1.0; 28 eventInfo->errors[4] = -1.0; 29 eventInfo->errors[5] = -1.0; 30 eventInfo->errors[6] = -1.0; 31 eventInfo->errors[7] = -1.0; 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 static PetscErrorCode PetscEventPerfInfoTic_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool resume) 36 { 37 PetscFunctionBegin; 38 if (resume) { 39 eventInfo->timeTmp -= time; 40 eventInfo->flopsTmp -= petsc_TotalFlops_th; 41 } else { 42 eventInfo->timeTmp = -time; 43 eventInfo->flopsTmp = -petsc_TotalFlops_th; 44 } 45 eventInfo->numMessages -= petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th; 46 eventInfo->messageLength -= petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len_th + petsc_send_len_th; 47 eventInfo->numReductions -= petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th; 48 #if defined(PETSC_HAVE_DEVICE) 49 eventInfo->CpuToGpuCount -= petsc_ctog_ct_th; 50 eventInfo->GpuToCpuCount -= petsc_gtoc_ct_th; 51 eventInfo->CpuToGpuSize -= petsc_ctog_sz_th; 52 eventInfo->GpuToCpuSize -= petsc_gtoc_sz_th; 53 eventInfo->GpuFlops -= petsc_gflops_th; 54 eventInfo->GpuTime -= petsc_gtime; 55 #endif 56 if (logMemory) { 57 PetscLogDouble usage; 58 PetscCall(PetscMemoryGetCurrentUsage(&usage)); 59 eventInfo->memIncrease -= usage; 60 PetscCall(PetscMallocGetCurrentUsage(&usage)); 61 eventInfo->mallocSpace -= usage; 62 PetscCall(PetscMallocGetMaximumUsage(&usage)); 63 eventInfo->mallocIncrease -= usage; 64 PetscCall(PetscMallocPushMaximumUsage(event)); 65 } 66 PetscFunctionReturn(PETSC_SUCCESS); 67 } 68 69 static PetscErrorCode PetscEventPerfInfoTic(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 70 { 71 PetscFunctionBegin; 72 PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 73 PetscFunctionReturn(PETSC_SUCCESS); 74 } 75 76 static PetscErrorCode PetscEventPerfInfoResume(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 77 { 78 PetscFunctionBegin; 79 PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_TRUE)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode PetscEventPerfInfoToc_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool pause) 84 { 85 PetscFunctionBegin; 86 eventInfo->timeTmp += time; 87 eventInfo->flopsTmp += petsc_TotalFlops_th; 88 if (!pause) { 89 eventInfo->time += eventInfo->timeTmp; 90 eventInfo->time2 += eventInfo->timeTmp * eventInfo->timeTmp; 91 eventInfo->flops += eventInfo->flopsTmp; 92 eventInfo->flops2 += eventInfo->flopsTmp * eventInfo->flopsTmp; 93 } 94 eventInfo->numMessages += petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th; 95 eventInfo->messageLength += petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len + petsc_send_len_th; 96 eventInfo->numReductions += petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th; 97 #if defined(PETSC_HAVE_DEVICE) 98 eventInfo->CpuToGpuCount += petsc_ctog_ct_th; 99 eventInfo->GpuToCpuCount += petsc_gtoc_ct_th; 100 eventInfo->CpuToGpuSize += petsc_ctog_sz_th; 101 eventInfo->GpuToCpuSize += petsc_gtoc_sz_th; 102 eventInfo->GpuFlops += petsc_gflops_th; 103 eventInfo->GpuTime += petsc_gtime; 104 #endif 105 if (logMemory) { 106 PetscLogDouble usage, musage; 107 PetscCall(PetscMemoryGetCurrentUsage(&usage)); /* the comments below match the column labels printed in PetscLogView_Default() */ 108 eventInfo->memIncrease += usage; /* RMI */ 109 PetscCall(PetscMallocGetCurrentUsage(&usage)); 110 eventInfo->mallocSpace += usage; /* Malloc */ 111 PetscCall(PetscMallocPopMaximumUsage((int)event, &musage)); 112 eventInfo->mallocIncreaseEvent = PetscMax(musage - usage, eventInfo->mallocIncreaseEvent); /* EMalloc */ 113 PetscCall(PetscMallocGetMaximumUsage(&usage)); 114 eventInfo->mallocIncrease += usage; /* MMalloc */ 115 } 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 static PetscErrorCode PetscEventPerfInfoToc(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 120 { 121 PetscFunctionBegin; 122 PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 123 PetscFunctionReturn(PETSC_SUCCESS); 124 } 125 126 static PetscErrorCode PetscEventPerfInfoPause(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 127 { 128 PetscFunctionBegin; 129 PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 static PetscErrorCode PetscEventPerfInfoAdd_Internal(const PetscEventPerfInfo *eventInfo, PetscEventPerfInfo *outInfo) 134 { 135 PetscFunctionBegin; 136 outInfo->count += eventInfo->count; 137 outInfo->time += eventInfo->time; 138 outInfo->time2 += eventInfo->time2; 139 outInfo->flops += eventInfo->flops; 140 outInfo->flops2 += eventInfo->flops2; 141 outInfo->numMessages += eventInfo->numMessages; 142 outInfo->messageLength += eventInfo->messageLength; 143 outInfo->numReductions += eventInfo->numReductions; 144 #if defined(PETSC_HAVE_DEVICE) 145 outInfo->CpuToGpuCount += eventInfo->CpuToGpuCount; 146 outInfo->GpuToCpuCount += eventInfo->GpuToCpuCount; 147 outInfo->CpuToGpuSize += eventInfo->CpuToGpuSize; 148 outInfo->GpuToCpuSize += eventInfo->GpuToCpuSize; 149 outInfo->GpuFlops += eventInfo->GpuFlops; 150 outInfo->GpuTime += eventInfo->GpuTime; 151 #endif 152 outInfo->memIncrease += eventInfo->memIncrease; 153 outInfo->mallocSpace += eventInfo->mallocSpace; 154 outInfo->mallocIncreaseEvent += eventInfo->mallocIncreaseEvent; 155 outInfo->mallocIncrease += eventInfo->mallocIncrease; 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } 158 159 PETSC_LOG_RESIZABLE_ARRAY(EventPerfArray, PetscEventPerfInfo, PetscLogEvent, PetscEventPerfInfoInit, NULL, NULL) 160 161 /* --- PetscClassPerf --- */ 162 163 typedef struct { 164 int creations; /* The number of objects of this class created */ 165 int destructions; /* The number of objects of this class destroyed */ 166 PetscLogDouble mem; /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */ 167 PetscLogDouble descMem; /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */ 168 } PetscClassPerf; 169 170 static PetscErrorCode PetscClassPerfInit(PetscClassPerf *classInfo) 171 { 172 PetscFunctionBegin; 173 PetscCall(PetscMemzero(classInfo, sizeof(*classInfo))); 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 PETSC_LOG_RESIZABLE_ARRAY(ClassPerfArray, PetscClassPerf, PetscLogClass, PetscClassPerfInit, NULL, NULL) 178 179 /* --- PetscStagePerf --- */ 180 181 typedef struct _PetscStagePerf { 182 PetscBool used; /* The stage was pushed on this processor */ 183 PetscEventPerfInfo perfInfo; /* The stage performance information */ 184 PetscLogEventPerfArray eventLog; /* The event information for this stage */ 185 PetscLogClassPerfArray classLog; /* The class information for this stage */ 186 } PetscStagePerf; 187 188 static PetscErrorCode PetscStageInfoInit(PetscStagePerf *stageInfo) 189 { 190 PetscFunctionBegin; 191 PetscCall(PetscMemzero(stageInfo, sizeof(*stageInfo))); 192 PetscCall(PetscLogEventPerfArrayCreate(128, &(stageInfo->eventLog))); 193 PetscCall(PetscLogClassPerfArrayCreate(128, &(stageInfo->classLog))); 194 PetscCall(PetscEventPerfInfoInit(&stageInfo->perfInfo)); 195 PetscFunctionReturn(PETSC_SUCCESS); 196 } 197 198 static PetscErrorCode PetscStageInfoReset(PetscStagePerf *stageInfo) 199 { 200 PetscFunctionBegin; 201 PetscCall(PetscLogEventPerfArrayDestroy(&(stageInfo->eventLog))); 202 PetscCall(PetscLogClassPerfArrayDestroy(&(stageInfo->classLog))); 203 PetscFunctionReturn(PETSC_SUCCESS); 204 } 205 206 PETSC_LOG_RESIZABLE_ARRAY(StageInfoArray, PetscStagePerf, PetscLogStage, PetscStageInfoInit, PetscStageInfoReset, NULL) 207 208 /* --- Action --- */ 209 210 /* The structure for action logging */ 211 typedef enum { 212 PETSC_LOG_ACTION_CREATE, 213 PETSC_LOG_ACTION_DESTROY, 214 PETSC_LOG_ACTION_BEGIN, 215 PETSC_LOG_ACTION_END, 216 } PetscLogActionType; 217 218 PETSC_LOG_RESIZABLE_ARRAY(ActionArray, Action, PetscLogEvent, NULL, NULL, NULL) 219 220 /* --- Object --- */ 221 222 PETSC_LOG_RESIZABLE_ARRAY(ObjectArray, Object, PetscObject, NULL, NULL, NULL) 223 224 /* Map from (threadid,stage,event) to perfInfo data struct */ 225 #include <petsc/private/hashmapijk.h> 226 227 #if !PetscDefined(HAVE_THREADSAFETY) 228 // PetscHMapEvent is only declared in logimpl.h if PetscDefined(HAVE_THREADSAFETY): define it otherwise so that we 229 // can just always have it in the struct 230 PETSC_HASH_MAP(HMapEvent, PetscHashIJKKey, PetscEventPerfInfo *, PetscHashIJKKeyHash, PetscHashIJKKeyEqual, NULL) 231 #endif 232 233 typedef struct _n_PetscLogHandler_Default *PetscLogHandler_Default; 234 struct _n_PetscLogHandler_Default { 235 PetscLogStageInfoArray stages; 236 PetscSpinlock lock; 237 PetscLogActionArray petsc_actions; 238 PetscLogObjectArray petsc_objects; 239 PetscBool petsc_logActions; 240 PetscBool petsc_logObjects; 241 int petsc_numObjectsCreated; 242 int petsc_numObjectsDestroyed; 243 PetscHMapEvent eventInfoMap_th; 244 int pause_depth; 245 }; 246 247 /* --- PetscLogHandler_Default --- */ 248 249 static PetscErrorCode PetscLogHandlerContextCreate_Default(PetscLogHandler_Default *def_p) 250 { 251 PetscLogHandler_Default def; 252 253 PetscFunctionBegin; 254 PetscCall(PetscNew(def_p)); 255 def = *def_p; 256 PetscCall(PetscLogStageInfoArrayCreate(8, &(def->stages))); 257 PetscCall(PetscLogActionArrayCreate(64, &(def->petsc_actions))); 258 PetscCall(PetscLogObjectArrayCreate(64, &(def->petsc_objects))); 259 260 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_actions", &def->petsc_logActions, NULL)); 261 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_objects", &def->petsc_logObjects, NULL)); 262 if (PetscDefined(HAVE_THREADSAFETY)) { PetscCall(PetscHMapEventCreate(&def->eventInfoMap_th)); } 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 static PetscErrorCode PetscLogHandlerDestroy_Default(PetscLogHandler h) 267 { 268 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 269 270 PetscFunctionBegin; 271 PetscCall(PetscLogStageInfoArrayDestroy(&def->stages)); 272 PetscCall(PetscLogActionArrayDestroy(&def->petsc_actions)); 273 PetscCall(PetscLogObjectArrayDestroy(&def->petsc_objects)); 274 if (def->eventInfoMap_th) { 275 PetscEventPerfInfo **array; 276 PetscInt n, off = 0; 277 278 PetscCall(PetscHMapEventGetSize(def->eventInfoMap_th, &n)); 279 PetscCall(PetscMalloc1(n, &array)); 280 PetscCall(PetscHMapEventGetVals(def->eventInfoMap_th, &off, array)); 281 for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i])); 282 PetscCall(PetscFree(array)); 283 PetscCall(PetscHMapEventDestroy(&def->eventInfoMap_th)); 284 } 285 PetscCall(PetscFree(def)); 286 PetscFunctionReturn(PETSC_SUCCESS); 287 } 288 289 static PetscErrorCode PetscLogHandlerDefaultGetStageInfo(PetscLogHandler handler, PetscLogStage stage, PetscStagePerf **stage_info_p) 290 { 291 PetscStagePerf *stage_info = NULL; 292 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 293 294 PetscFunctionBegin; 295 PetscCall(PetscLogStageInfoArrayResize(def->stages, stage + 1)); 296 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 297 *stage_info_p = stage_info; 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultGetEventPerfInfo(PetscLogHandler handler, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **event_info_p) 302 { 303 PetscEventPerfInfo *event_info = NULL; 304 PetscStagePerf *stage_info = NULL; 305 PetscLogEventPerfArray event_log; 306 307 PetscFunctionBegin; 308 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(handler->state, &stage)); 309 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 310 event_log = stage_info->eventLog; 311 PetscCall(PetscLogEventPerfArrayResize(event_log, event + 1)); 312 PetscCall(PetscLogEventPerfArrayGetRef(event_log, event, &event_info)); 313 event_info->id = event; 314 *event_info_p = event_info; 315 PetscFunctionReturn(PETSC_SUCCESS); 316 } 317 318 static PetscErrorCode PetscLogHandlerDefaultGetClassPerf(PetscLogHandler handler, PetscLogStage stage, PetscLogClass clss, PetscClassPerf **class_info) 319 { 320 PetscLogClassPerfArray class_log; 321 PetscStagePerf *stage_info; 322 323 PetscFunctionBegin; 324 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 325 class_log = stage_info->classLog; 326 PetscCall(PetscLogClassPerfArrayResize(class_log, clss + 1)); 327 PetscCall(PetscLogClassPerfArrayGetRef(class_log, clss, class_info)); 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 static PetscErrorCode PetscLogHandlerObjectCreate_Default(PetscLogHandler h, PetscObject obj) 332 { 333 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 334 PetscLogState state; 335 PetscLogStage stage; 336 PetscClassPerf *classInfo; 337 int oclass = 0; 338 339 PetscFunctionBegin; 340 PetscCall(PetscLogHandlerGetState(h, &state)); 341 PetscCall(PetscSpinlockLock(&def->lock)); 342 /* Record stage info */ 343 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 344 PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass)); 345 PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo)); 346 classInfo->creations++; 347 /* Record the creation action */ 348 if (def->petsc_logActions) { 349 Action new_action; 350 351 PetscCall(PetscTime(&new_action.time)); 352 new_action.time -= petsc_BaseTime; 353 new_action.action = PETSC_LOG_ACTION_CREATE; 354 new_action.event = -1; 355 new_action.classid = obj->classid; 356 new_action.id1 = obj->id; 357 new_action.id2 = -1; 358 new_action.id3 = -1; 359 new_action.flops = petsc_TotalFlops; 360 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 361 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 362 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 363 } 364 /* We don't just use obj->id to count all objects that are created 365 because PetscLogHandlers are objects and PetscLogObjectDestroy() will not 366 be called for them: the number of objects created and destroyed as counted 367 here and below would have an imbalance */ 368 def->petsc_numObjectsCreated++; 369 /* Record the object */ 370 if (def->petsc_logObjects) { 371 Object new_object; 372 373 new_object.parent = -1; 374 new_object.obj = obj; 375 new_object.mem = 0; 376 377 PetscCall(PetscMemzero(new_object.name, sizeof(new_object.name))); 378 PetscCall(PetscMemzero(new_object.info, sizeof(new_object.info))); 379 PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1"); 380 PetscCall(PetscLogObjectArrayResize(def->petsc_objects, obj->id)); 381 PetscCall(PetscLogObjectArraySet(def->petsc_objects, obj->id - 1, new_object)); 382 } 383 PetscCall(PetscSpinlockUnlock(&def->lock)); 384 PetscFunctionReturn(PETSC_SUCCESS); 385 } 386 387 static PetscErrorCode PetscLogHandlerObjectDestroy_Default(PetscLogHandler h, PetscObject obj) 388 { 389 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 390 PetscLogState state; 391 PetscLogStage stage; 392 PetscClassPerf *classInfo; 393 int oclass = 0; 394 395 PetscFunctionBegin; 396 PetscCall(PetscLogHandlerGetState(h, &state)); 397 /* Record stage info */ 398 PetscCall(PetscSpinlockLock(&def->lock)); 399 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 400 if (stage >= 0) { 401 /* stage < 0 can happen if the log summary is output before some things are destroyed */ 402 PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass)); 403 PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo)); 404 classInfo->destructions++; 405 } 406 /* Cannot Credit all ancestors with your memory because they may have already been destroyed*/ 407 def->petsc_numObjectsDestroyed++; 408 /* Dynamically enlarge logging structures */ 409 /* Record the destruction action */ 410 if (def->petsc_logActions) { 411 Action new_action; 412 413 PetscCall(PetscTime(&new_action.time)); 414 new_action.time -= petsc_BaseTime; 415 new_action.event = -1; 416 new_action.action = PETSC_LOG_ACTION_DESTROY; 417 new_action.classid = obj->classid; 418 new_action.id1 = obj->id; 419 new_action.id2 = -1; 420 new_action.id3 = -1; 421 new_action.flops = petsc_TotalFlops; 422 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 423 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 424 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 425 } 426 if (def->petsc_logObjects) { 427 Object *obj_entry = NULL; 428 429 PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1"); 430 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, obj->id - 1, &obj_entry)); 431 if (obj->name) PetscCall(PetscStrncpy(obj_entry->name, obj->name, 64)); 432 obj_entry->obj = NULL; 433 } 434 PetscCall(PetscSpinlockUnlock(&def->lock)); 435 PetscFunctionReturn(PETSC_SUCCESS); 436 } 437 438 static PetscErrorCode PetscLogHandlerEventSync_Default(PetscLogHandler h, PetscLogEvent event, MPI_Comm comm) 439 { 440 PetscLogState state; 441 PetscLogEventInfo event_info; 442 PetscEventPerfInfo *event_perf_info; 443 int stage; 444 PetscLogDouble time = 0.0; 445 446 PetscFunctionBegin; 447 if (!(PetscLogSyncOn) || comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS); 448 PetscCall(PetscLogHandlerGetState(h, &state)); 449 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 450 if (!event_info.collective) PetscFunctionReturn(PETSC_SUCCESS); 451 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 452 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info)); 453 if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 454 455 PetscCall(PetscTimeSubtract(&time)); 456 PetscCallMPI(MPI_Barrier(comm)); 457 PetscCall(PetscTimeAdd(&time)); 458 event_perf_info->syncTime += time; 459 PetscFunctionReturn(PETSC_SUCCESS); 460 } 461 462 static PetscErrorCode PetscLogGetStageEventPerfInfo_threaded(PetscLogHandler_Default def, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **eventInfo) 463 { 464 PetscEventPerfInfo *leventInfo = NULL; 465 PetscHashIJKKey key; 466 467 PetscFunctionBegin; 468 469 #if PetscDefined(HAVE_THREADSAFETY) 470 key.i = PetscLogGetTid(); 471 #else 472 key.i = 0; 473 #endif 474 key.j = stage; 475 key.k = event; 476 PetscCall(PetscSpinlockLock(&def->lock)); 477 PetscCall(PetscHMapEventGet(def->eventInfoMap_th, key, &leventInfo)); 478 if (!leventInfo) { 479 PetscCall(PetscNew(&leventInfo)); 480 leventInfo->id = event; 481 PetscCall(PetscHMapEventSet(def->eventInfoMap_th, key, leventInfo)); 482 } 483 PetscCall(PetscSpinlockUnlock(&def->lock)); 484 *eventInfo = leventInfo; 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #if defined(PETSC_HAVE_CUDA) 489 #include <nvToolsExt.h> 490 #endif 491 static PetscErrorCode PetscLogHandlerEventBegin_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 492 { 493 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 494 PetscEventPerfInfo *event_perf_info = NULL; 495 PetscLogEventInfo event_info; 496 PetscLogDouble time; 497 PetscLogState state; 498 PetscLogStage stage; 499 500 PetscFunctionBegin; 501 PetscCall(PetscLogHandlerGetState(h, &state)); 502 /* Synchronization */ 503 PetscCall(PetscLogHandlerEventSync_Default(h, event, PetscObjectComm(o1))); 504 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 505 if (def->pause_depth > 0) stage = 0; // in pause-mode, all events run on the main stage 506 if (PetscDefined(HAVE_THREADSAFETY)) { 507 PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info)); 508 if (event_perf_info->depth == 0) { PetscCall(PetscEventPerfInfoInit(event_perf_info)); } 509 } else { 510 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info)); 511 } 512 PetscCheck(event_perf_info->depth >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to begin a paused event, this is not allowed"); 513 event_perf_info->depth++; 514 /* Check for double counting */ 515 if (event_perf_info->depth > 1) PetscFunctionReturn(PETSC_SUCCESS); 516 #if defined(PETSC_HAVE_CUDA) 517 if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { 518 PetscLogEventInfo event_reg_info; 519 520 PetscCall(PetscLogStateEventGetInfo(state, event, &event_reg_info)); 521 nvtxRangePushA(event_reg_info.name); 522 } 523 #endif 524 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 525 /* Log the performance info */ 526 event_perf_info->count++; 527 PetscCall(PetscTime(&time)); 528 PetscCall(PetscEventPerfInfoTic(event_perf_info, time, PetscLogMemory, (int)event)); 529 if (def->petsc_logActions) { 530 PetscLogDouble curTime; 531 Action new_action; 532 533 PetscCall(PetscTime(&curTime)); 534 new_action.time = curTime - petsc_BaseTime; 535 new_action.action = PETSC_LOG_ACTION_BEGIN; 536 new_action.event = event; 537 new_action.classid = event_info.classid; 538 new_action.id1 = o1 ? o1->id : -1; 539 new_action.id2 = o2 ? o2->id : -1; 540 new_action.id3 = o3 ? o3->id : -1; 541 new_action.flops = petsc_TotalFlops; 542 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 543 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 544 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 545 } 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548 549 static PetscErrorCode PetscLogHandlerEventEnd_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 550 { 551 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 552 PetscEventPerfInfo *event_perf_info = NULL; 553 PetscLogDouble time; 554 PetscLogState state; 555 int stage; 556 557 PetscFunctionBegin; 558 PetscCall(PetscLogHandlerGetState(h, &state)); 559 if (def->petsc_logActions) { 560 PetscLogEventInfo event_info; 561 PetscLogDouble curTime; 562 Action new_action; 563 564 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 565 PetscCall(PetscTime(&curTime)); 566 new_action.time = curTime - petsc_BaseTime; 567 new_action.action = PETSC_LOG_ACTION_END; 568 new_action.event = event; 569 new_action.classid = event_info.classid; 570 new_action.id1 = o1 ? o1->id : -1; 571 new_action.id2 = o2 ? o2->id : -2; 572 new_action.id3 = o3 ? o3->id : -3; 573 new_action.flops = petsc_TotalFlops; 574 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 575 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 576 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 577 } 578 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 579 if (def->pause_depth > 0) stage = 0; // all events run on the main stage in pause-mode 580 if (PetscDefined(HAVE_THREADSAFETY)) { 581 PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info)); 582 } else { 583 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info)); 584 } 585 PetscCheck(event_perf_info->depth > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to end paused event, not allowed"); 586 event_perf_info->depth--; 587 /* Check for double counting */ 588 if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 589 else PetscCheck(event_perf_info->depth == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Logging event had unbalanced begin/end pairs"); 590 591 /* Log performance info */ 592 PetscCall(PetscTime(&time)); 593 PetscCall(PetscEventPerfInfoToc(event_perf_info, time, PetscLogMemory, (int)event)); 594 if (PetscDefined(HAVE_THREADSAFETY)) { 595 PetscEventPerfInfo *event_perf_info_global; 596 PetscCall(PetscSpinlockLock(&def->lock)); 597 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info_global)); 598 PetscCall(PetscEventPerfInfoAdd_Internal(event_perf_info, event_perf_info_global)); 599 PetscCall(PetscSpinlockUnlock(&def->lock)); 600 } 601 #if defined(PETSC_HAVE_CUDA) 602 if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) nvtxRangePop(); 603 #endif 604 PetscFunctionReturn(PETSC_SUCCESS); 605 } 606 607 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultDeactivatePush(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event) 608 { 609 PetscEventPerfInfo *event_perf_info; 610 611 PetscFunctionBegin; 612 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 613 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info)); 614 event_perf_info->depth++; 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultDeactivatePop(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event) 619 { 620 PetscEventPerfInfo *event_perf_info; 621 622 PetscFunctionBegin; 623 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 624 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info)); 625 event_perf_info->depth--; 626 PetscFunctionReturn(PETSC_SUCCESS); 627 } 628 629 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultEventsPause(PetscLogHandler h) 630 { 631 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 632 PetscLogDouble time; 633 PetscInt num_stages; 634 635 PetscFunctionBegin; 636 if (def->pause_depth++ > 0) PetscFunctionReturn(PETSC_SUCCESS); 637 PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL)); 638 PetscCall(PetscTime(&time)); 639 for (PetscInt stage = 0; stage < num_stages; stage++) { 640 PetscStagePerf *stage_info = NULL; 641 PetscInt num_events; 642 643 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 644 PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL)); 645 for (PetscInt event = 0; event < num_events; event++) { 646 PetscEventPerfInfo *event_info = NULL; 647 PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info)); 648 if (event_info->depth > 0) { 649 event_info->depth *= -1; 650 PetscCall(PetscEventPerfInfoPause(event_info, time, PetscLogMemory, event)); 651 } 652 } 653 if (stage > 0 && stage_info->perfInfo.depth > 0) { 654 stage_info->perfInfo.depth *= -1; 655 PetscCall(PetscEventPerfInfoPause(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2))); 656 } 657 } 658 PetscFunctionReturn(PETSC_SUCCESS); 659 } 660 661 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultEventsResume(PetscLogHandler h) 662 { 663 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 664 PetscLogDouble time; 665 PetscInt num_stages; 666 667 PetscFunctionBegin; 668 if (--def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 669 PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL)); 670 PetscCall(PetscTime(&time)); 671 for (PetscInt stage = 0; stage < num_stages; stage++) { 672 PetscStagePerf *stage_info = NULL; 673 PetscInt num_events; 674 675 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 676 PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL)); 677 for (PetscInt event = 0; event < num_events; event++) { 678 PetscEventPerfInfo *event_info = NULL; 679 PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info)); 680 if (event_info->depth < 0) { 681 event_info->depth *= -1; 682 PetscCall(PetscEventPerfInfoResume(event_info, time, PetscLogMemory, event)); 683 } 684 } 685 if (stage > 0 && stage_info->perfInfo.depth < 0) { 686 stage_info->perfInfo.depth *= -1; 687 PetscCall(PetscEventPerfInfoResume(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2))); 688 } 689 } 690 PetscFunctionReturn(PETSC_SUCCESS); 691 } 692 693 static PetscErrorCode PetscLogHandlerStagePush_Default(PetscLogHandler h, PetscLogStage new_stage) 694 { 695 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 696 PetscLogDouble time; 697 PetscLogState state; 698 PetscLogStage current_stage; 699 PetscStagePerf *new_stage_info; 700 701 PetscFunctionBegin; 702 if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 703 PetscCall(PetscLogHandlerGetState(h, &state)); 704 current_stage = state->current_stage; 705 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, new_stage, &new_stage_info)); 706 PetscCall(PetscTime(&time)); 707 708 /* Record flops/time of previous stage */ 709 if (current_stage >= 0) { 710 if (PetscBTLookup(state->active, current_stage)) { 711 PetscStagePerf *current_stage_info; 712 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, ¤t_stage_info)); 713 PetscCall(PetscEventPerfInfoToc(¤t_stage_info->perfInfo, time, PetscLogMemory, (int)-(current_stage + 2))); 714 } 715 } 716 new_stage_info->used = PETSC_TRUE; 717 new_stage_info->perfInfo.count++; 718 new_stage_info->perfInfo.depth++; 719 /* Subtract current quantities so that we obtain the difference when we pop */ 720 if (PetscBTLookup(state->active, new_stage)) PetscCall(PetscEventPerfInfoTic(&new_stage_info->perfInfo, time, PetscLogMemory, (int)-(new_stage + 2))); 721 PetscFunctionReturn(PETSC_SUCCESS); 722 } 723 724 static PetscErrorCode PetscLogHandlerStagePop_Default(PetscLogHandler h, PetscLogStage old_stage) 725 { 726 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 727 PetscLogStage current_stage; 728 PetscStagePerf *old_stage_info; 729 PetscLogState state; 730 PetscLogDouble time; 731 732 PetscFunctionBegin; 733 if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 734 PetscCall(PetscLogHandlerGetState(h, &state)); 735 current_stage = state->current_stage; 736 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, old_stage, &old_stage_info)); 737 PetscCall(PetscTime(&time)); 738 old_stage_info->perfInfo.depth--; 739 if (PetscBTLookup(state->active, old_stage)) { PetscCall(PetscEventPerfInfoToc(&old_stage_info->perfInfo, time, PetscLogMemory, (int)-(old_stage + 2))); } 740 if (current_stage >= 0) { 741 if (PetscBTLookup(state->active, current_stage)) { 742 PetscStagePerf *current_stage_info; 743 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, ¤t_stage_info)); 744 PetscCall(PetscEventPerfInfoTic(¤t_stage_info->perfInfo, time, PetscLogMemory, (int)-(current_stage + 2))); 745 } 746 } 747 PetscFunctionReturn(PETSC_SUCCESS); 748 } 749 750 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultStageSetVisible(PetscLogHandler h, PetscLogStage stage, PetscBool is_visible) 751 { 752 PetscStagePerf *stage_info; 753 754 PetscFunctionBegin; 755 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 756 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info)); 757 stage_info->perfInfo.visible = is_visible; 758 PetscFunctionReturn(PETSC_SUCCESS); 759 } 760 761 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultStageGetVisible(PetscLogHandler h, PetscLogStage stage, PetscBool *is_visible) 762 { 763 PetscStagePerf *stage_info; 764 765 PetscFunctionBegin; 766 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 767 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info)); 768 *is_visible = stage_info->perfInfo.visible; 769 PetscFunctionReturn(PETSC_SUCCESS); 770 } 771 772 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultSetLogActions(PetscLogHandler handler, PetscBool flag) 773 { 774 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 775 776 PetscFunctionBegin; 777 def->petsc_logActions = flag; 778 PetscFunctionReturn(PETSC_SUCCESS); 779 } 780 781 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultSetLogObjects(PetscLogHandler handler, PetscBool flag) 782 { 783 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 784 785 PetscFunctionBegin; 786 def->petsc_logObjects = flag; 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultLogObjectState(PetscLogHandler handler, PetscObject obj, const char format[], va_list Argp) 791 { 792 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 793 size_t fullLength; 794 795 PetscFunctionBegin; 796 if (def->petsc_logObjects) { 797 Object *obj_entry = NULL; 798 799 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, obj->id - 1, &obj_entry)); 800 PetscCall(PetscVSNPrintf(obj_entry->info, 64, format, &fullLength, Argp)); 801 } 802 PetscFunctionReturn(PETSC_SUCCESS); 803 } 804 805 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultGetNumObjects(PetscLogHandler handler, PetscInt *num_objects) 806 { 807 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 808 809 PetscFunctionBegin; 810 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, num_objects, NULL)); 811 PetscFunctionReturn(PETSC_SUCCESS); 812 } 813 814 PETSC_INTERN PetscErrorCode PetscLogHandlerDump_Default(PetscLogHandler handler, const char sname[]) 815 { 816 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 817 FILE *fd; 818 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 819 PetscLogDouble flops, _TotalTime; 820 PetscMPIInt rank; 821 int curStage; 822 PetscLogState state; 823 PetscInt num_events; 824 PetscLogEvent event; 825 826 PetscFunctionBegin; 827 /* Calculate the total elapsed time */ 828 PetscCall(PetscTime(&_TotalTime)); 829 _TotalTime -= petsc_BaseTime; 830 /* Open log file */ 831 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)handler), &rank)); 832 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 833 PetscCall(PetscFixFilename(file, fname)); 834 PetscCall(PetscFOpen(PETSC_COMM_SELF, fname, "w", &fd)); 835 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 836 /* Output totals */ 837 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 838 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Clock Resolution %g\n", 0.0)); 839 /* Output actions */ 840 if (def->petsc_logActions) { 841 PetscInt num_actions; 842 PetscCall(PetscLogActionArrayGetSize(def->petsc_actions, &num_actions, NULL)); 843 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Actions accomplished %d\n", (int)num_actions)); 844 for (int a = 0; a < num_actions; a++) { 845 Action *action; 846 847 PetscCall(PetscLogActionArrayGetRef(def->petsc_actions, a, &action)); 848 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g %d %d %d %d %d %d %g %g %g\n", action->time, action->action, (int)action->event, (int)action->classid, action->id1, action->id2, action->id3, action->flops, action->mem, action->maxmem)); 849 } 850 } 851 /* Output objects */ 852 if (def->petsc_logObjects) { 853 PetscInt num_objects; 854 855 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 856 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Objects created %d destroyed %d\n", def->petsc_numObjectsCreated, def->petsc_numObjectsDestroyed)); 857 for (int o = 0; o < num_objects; o++) { 858 Object *object = NULL; 859 860 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, o, &object)); 861 if (object->parent != -1) continue; // object with this id wasn't logged, probably a PetscLogHandler 862 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Parent ID: %d Memory: %d\n", object->parent, (int)object->mem)); 863 if (!object->name[0]) { 864 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Name\n")); 865 } else { 866 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Name: %s\n", object->name)); 867 } 868 if (!object->info[0]) { 869 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Info\n")); 870 } else { 871 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Info: %s\n", object->info)); 872 } 873 } 874 } 875 /* Output events */ 876 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Event log:\n")); 877 PetscCall(PetscLogHandlerGetState(handler, &state)); 878 PetscCall(PetscLogStateGetNumEvents(state, &num_events)); 879 PetscCall(PetscLogStateGetCurrentStage(state, &curStage)); 880 for (event = 0; event < num_events; event++) { 881 PetscEventPerfInfo *event_info; 882 883 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, curStage, event, &event_info)); 884 if (event_info->time != 0.0) flops = event_info->flops / event_info->time; 885 else flops = 0.0; 886 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%d %16d %16g %16g %16g\n", event, event_info->count, event_info->flops, event_info->time, flops)); 887 } 888 PetscCall(PetscFClose(PETSC_COMM_SELF, fd)); 889 PetscFunctionReturn(PETSC_SUCCESS); 890 } 891 892 /* 893 PetscLogView_Detailed - Each process prints the times for its own events 894 895 */ 896 static PetscErrorCode PetscLogHandlerView_Default_Detailed(PetscLogHandler handler, PetscViewer viewer) 897 { 898 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 899 PetscLogDouble locTotalTime, numRed, maxMem; 900 PetscInt numStages, numEvents; 901 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 902 PetscMPIInt rank, size; 903 PetscLogGlobalNames global_stages, global_events; 904 PetscLogState state; 905 PetscEventPerfInfo zero_info; 906 907 PetscFunctionBegin; 908 PetscCall(PetscLogHandlerGetState(handler, &state)); 909 PetscCallMPI(MPI_Comm_size(comm, &size)); 910 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 911 /* Must preserve reduction count before we go on */ 912 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 913 /* Get the total elapsed time */ 914 PetscCall(PetscTime(&locTotalTime)); 915 locTotalTime -= petsc_BaseTime; 916 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 917 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 918 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 919 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 920 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 921 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 922 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 923 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 924 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 925 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 926 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 927 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 928 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 929 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 930 for (PetscInt stage = 0; stage < numStages; stage++) { 931 PetscInt stage_id; 932 const char *stage_name; 933 934 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 935 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 936 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stage_name)); 937 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stage_name)); 938 for (PetscInt event = 0; event < numEvents; event++) { 939 PetscEventPerfInfo *eventInfo = &zero_info; 940 PetscBool is_zero = PETSC_FALSE; 941 PetscInt event_id; 942 const char *event_name; 943 944 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 945 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 946 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &eventInfo)); 947 is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE; 948 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm)); 949 if (!is_zero) { PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stage_name, event_name)); } 950 } 951 } 952 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 953 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 954 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 955 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 956 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 957 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 958 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 959 { 960 PetscInt num_objects; 961 962 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 963 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, (int)num_objects)); 964 } 965 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 966 PetscCall(PetscViewerFlush(viewer)); 967 for (PetscInt stage = 0; stage < numStages; stage++) { 968 PetscEventPerfInfo *stage_perf_info = &zero_info; 969 PetscInt stage_id; 970 const char *stage_name; 971 972 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 973 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 974 if (stage_id >= 0) { 975 PetscStagePerf *stage_info; 976 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info)); 977 stage_perf_info = &stage_info->perfInfo; 978 } 979 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stage_name, rank, stage_perf_info->time, 980 stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops)); 981 for (PetscInt event = 0; event < numEvents; event++) { 982 PetscEventPerfInfo *eventInfo = &zero_info; 983 PetscBool is_zero = PETSC_FALSE; 984 PetscInt event_id; 985 const char *event_name; 986 987 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 988 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 989 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &eventInfo)); 990 is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE; 991 PetscCall(PetscMemcmp(eventInfo, &zero_info, sizeof(zero_info), &is_zero)); 992 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm)); 993 if (!is_zero) { 994 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", stage_name, event_name, rank, 995 eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops)); 996 if (eventInfo->dof[0] >= 0.) { 997 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 998 for (PetscInt d = 0; d < 8; ++d) { 999 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1000 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1001 } 1002 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1003 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1004 for (PetscInt e = 0; e < 8; ++e) { 1005 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1006 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1007 } 1008 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1009 } 1010 } 1011 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1012 } 1013 } 1014 PetscCall(PetscViewerFlush(viewer)); 1015 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1016 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1017 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1018 PetscFunctionReturn(PETSC_SUCCESS); 1019 } 1020 1021 /* 1022 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1023 */ 1024 static PetscErrorCode PetscLogHandlerView_Default_CSV(PetscLogHandler handler, PetscViewer viewer) 1025 { 1026 PetscLogDouble locTotalTime, maxMem; 1027 PetscInt numStages, numEvents, stage, event; 1028 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1029 PetscMPIInt rank, size; 1030 PetscLogGlobalNames global_stages, global_events; 1031 PetscLogState state; 1032 PetscEventPerfInfo zero_info; 1033 1034 PetscFunctionBegin; 1035 PetscCall(PetscLogHandlerGetState(handler, &state)); 1036 PetscCallMPI(MPI_Comm_size(comm, &size)); 1037 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1038 /* Must preserve reduction count before we go on */ 1039 /* Get the total elapsed time */ 1040 PetscCall(PetscTime(&locTotalTime)); 1041 locTotalTime -= petsc_BaseTime; 1042 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1043 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 1044 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 1045 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 1046 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 1047 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 1048 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1049 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1050 PetscCall(PetscViewerFlush(viewer)); 1051 for (stage = 0; stage < numStages; stage++) { 1052 PetscEventPerfInfo *stage_perf_info; 1053 PetscInt stage_id; 1054 const char *stage_name; 1055 1056 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1057 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1058 stage_perf_info = &zero_info; 1059 if (stage_id >= 0) { 1060 PetscStagePerf *stage_info; 1061 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info)); 1062 stage_perf_info = &stage_info->perfInfo; 1063 } 1064 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stage_name, rank, stage_perf_info->time, stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops)); 1065 for (event = 0; event < numEvents; event++) { 1066 PetscEventPerfInfo *eventInfo = &zero_info; 1067 PetscBool is_zero = PETSC_FALSE; 1068 PetscInt event_id; 1069 const char *event_name; 1070 1071 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1072 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1073 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &eventInfo)); 1074 PetscCall(PetscMemcmp(eventInfo, &zero_info, sizeof(zero_info), &is_zero)); 1075 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm)); 1076 if (!is_zero) { 1077 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stage_name, event_name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops)); 1078 if (eventInfo->dof[0] >= 0.) { 1079 for (PetscInt d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1080 for (PetscInt e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1081 } 1082 } 1083 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1084 } 1085 } 1086 PetscCall(PetscViewerFlush(viewer)); 1087 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1088 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1089 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1090 PetscFunctionReturn(PETSC_SUCCESS); 1091 } 1092 1093 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1094 { 1095 PetscFunctionBegin; 1096 if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS); 1097 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1098 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1099 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1100 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1101 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1102 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1103 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1104 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1105 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1106 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1107 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1108 PetscFunctionReturn(PETSC_SUCCESS); 1109 } 1110 1111 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1112 { 1113 PetscFunctionBegin; 1114 if (PetscDefined(USE_DEBUG)) { 1115 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1116 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1117 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1118 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1119 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1120 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1121 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1122 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1123 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1124 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1125 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1126 } 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1131 { 1132 #if defined(PETSC_HAVE_DEVICE) 1133 PetscMPIInt size; 1134 PetscBool deviceInitialized = PETSC_FALSE; 1135 1136 PetscFunctionBegin; 1137 PetscCallMPI(MPI_Comm_size(comm, &size)); 1138 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1139 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1140 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1141 deviceInitialized = PETSC_TRUE; 1142 break; 1143 } 1144 } 1145 /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1146 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS); 1147 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1148 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1149 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1150 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1151 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1152 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1153 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1154 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1155 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1156 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1157 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1158 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1159 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1160 PetscFunctionReturn(PETSC_SUCCESS); 1161 #else 1162 return PETSC_SUCCESS; 1163 #endif 1164 } 1165 1166 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1167 { 1168 #if defined(PETSC_HAVE_DEVICE) 1169 1170 PetscFunctionBegin; 1171 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS); 1172 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1173 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1174 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1175 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1176 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1177 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1178 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1179 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1180 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1181 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1182 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1183 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1184 PetscFunctionReturn(PETSC_SUCCESS); 1185 #else 1186 return PETSC_SUCCESS; 1187 #endif 1188 } 1189 1190 static PetscErrorCode PetscLogHandlerView_Default_Info(PetscLogHandler handler, PetscViewer viewer) 1191 { 1192 FILE *fd; 1193 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 1194 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1195 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1196 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1197 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1198 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1199 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1200 PetscLogDouble min, max, tot, ratio, avg, x, y; 1201 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1202 #if defined(PETSC_HAVE_DEVICE) 1203 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1204 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1205 #endif 1206 PetscMPIInt minC, maxC; 1207 PetscMPIInt size, rank; 1208 PetscBool *localStageUsed, *stageUsed; 1209 PetscBool *localStageVisible, *stageVisible; 1210 PetscInt numStages, numEvents; 1211 int stage, oclass; 1212 PetscLogEvent event; 1213 char version[256]; 1214 MPI_Comm comm; 1215 #if defined(PETSC_HAVE_DEVICE) 1216 PetscInt64 nas = 0x7FF0000000000002; 1217 #endif 1218 PetscLogGlobalNames global_stages, global_events; 1219 PetscEventPerfInfo zero_info; 1220 PetscLogState state; 1221 1222 PetscFunctionBegin; 1223 PetscCall(PetscLogHandlerGetState(handler, &state)); 1224 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1225 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1226 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1227 PetscCallMPI(MPI_Comm_size(comm, &size)); 1228 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1229 /* Get the total elapsed time */ 1230 PetscCall(PetscTime(&locTotalTime)); 1231 locTotalTime -= petsc_BaseTime; 1232 1233 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1234 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1235 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1236 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1237 PetscCall(PetscLogViewWarnSync(comm, fd)); 1238 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1239 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1240 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1241 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1242 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1243 PetscCall(PetscGetUserName(username, sizeof(username))); 1244 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1245 PetscCall(PetscGetDate(date, sizeof(date))); 1246 PetscCall(PetscGetVersion(version, sizeof(version))); 1247 if (size == 1) { 1248 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1249 } else { 1250 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1251 } 1252 #if defined(PETSC_HAVE_OPENMP) 1253 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1254 #endif 1255 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1256 1257 /* Must preserve reduction count before we go on */ 1258 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1259 1260 /* Calculate summary information */ 1261 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1262 /* Time */ 1263 PetscCall(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1264 PetscCall(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1265 PetscCall(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1266 avg = tot / ((PetscLogDouble)size); 1267 if (min != 0.0) ratio = max / min; 1268 else ratio = 0.0; 1269 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1270 TotalTime = tot; 1271 /* Objects */ 1272 { 1273 PetscInt num_objects; 1274 1275 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 1276 avg = (PetscLogDouble)num_objects; 1277 } 1278 PetscCall(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1279 PetscCall(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1280 PetscCall(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1281 avg = tot / ((PetscLogDouble)size); 1282 if (min != 0.0) ratio = max / min; 1283 else ratio = 0.0; 1284 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1285 /* Flops */ 1286 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1287 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1288 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1289 avg = tot / ((PetscLogDouble)size); 1290 if (min != 0.0) ratio = max / min; 1291 else ratio = 0.0; 1292 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1293 TotalFlops = tot; 1294 /* Flops/sec -- Must talk to Barry here */ 1295 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1296 else flops = 0.0; 1297 PetscCall(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1298 PetscCall(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1299 PetscCall(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1300 avg = tot / ((PetscLogDouble)size); 1301 if (min != 0.0) ratio = max / min; 1302 else ratio = 0.0; 1303 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1304 /* Memory */ 1305 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1306 if (mem > 0.0) { 1307 PetscCall(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1308 PetscCall(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1309 PetscCall(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1310 avg = tot / ((PetscLogDouble)size); 1311 if (min != 0.0) ratio = max / min; 1312 else ratio = 0.0; 1313 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1314 } 1315 /* Messages */ 1316 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1317 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1318 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1319 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1320 avg = tot / ((PetscLogDouble)size); 1321 if (min != 0.0) ratio = max / min; 1322 else ratio = 0.0; 1323 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1324 numMessages = tot; 1325 /* Message Lengths */ 1326 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1327 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1328 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1329 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1330 if (numMessages != 0) avg = tot / numMessages; 1331 else avg = 0.0; 1332 if (min != 0.0) ratio = max / min; 1333 else ratio = 0.0; 1334 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1335 messageLength = tot; 1336 /* Reductions */ 1337 PetscCall(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1338 PetscCall(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1339 PetscCall(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1340 if (min != 0.0) ratio = max / min; 1341 else ratio = 0.0; 1342 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1343 numReductions = red; /* wrong because uses count from process zero */ 1344 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1345 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1346 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1347 1348 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 1349 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 1350 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 1351 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 1352 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 1353 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1354 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1355 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1356 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1357 if (numStages > 0) { 1358 for (stage = 0; stage < numStages; stage++) { 1359 PetscInt stage_id; 1360 1361 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1362 if (stage_id >= 0) { 1363 PetscStagePerf *stage_info; 1364 1365 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 1366 localStageUsed[stage] = stage_info->used; 1367 localStageVisible[stage] = stage_info->perfInfo.visible; 1368 } else { 1369 localStageUsed[stage] = PETSC_FALSE; 1370 localStageVisible[stage] = PETSC_TRUE; 1371 } 1372 } 1373 PetscCall(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1374 PetscCall(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1375 for (stage = 0; stage < numStages; stage++) { 1376 if (stageUsed[stage] && stageVisible[stage]) { 1377 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1378 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1379 break; 1380 } 1381 } 1382 for (stage = 0; stage < numStages; stage++) { 1383 PetscInt stage_id; 1384 PetscEventPerfInfo *stage_info; 1385 const char *stage_name; 1386 1387 if (!(stageUsed[stage] && stageVisible[stage])) continue; 1388 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1389 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1390 stage_info = &zero_info; 1391 if (localStageUsed[stage]) { 1392 PetscStagePerf *stage_perf_info; 1393 1394 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_perf_info)); 1395 stage_info = &stage_perf_info->perfInfo; 1396 } 1397 PetscCall(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1398 PetscCall(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1399 PetscCall(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1400 PetscCall(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1401 PetscCall(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1402 mess *= 0.5; 1403 messLen *= 0.5; 1404 red /= size; 1405 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1406 else fracTime = 0.0; 1407 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1408 else fracFlops = 0.0; 1409 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1410 if (numMessages != 0.0) fracMessages = mess / numMessages; 1411 else fracMessages = 0.0; 1412 if (mess != 0.0) avgMessLen = messLen / mess; 1413 else avgMessLen = 0.0; 1414 if (messageLength != 0.0) fracLength = messLen / messageLength; 1415 else fracLength = 0.0; 1416 if (numReductions != 0.0) fracReductions = red / numReductions; 1417 else fracReductions = 0.0; 1418 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, stage_name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1419 } 1420 } 1421 1422 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1423 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1424 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1425 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1426 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1427 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1428 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1429 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1430 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1431 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1432 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1433 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1434 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1435 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1436 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1437 if (PetscLogMemory) { 1438 PetscCall(PetscFPrintf(comm, fd, " Memory usage is summed over all MPI processes, it is given in mega-bytes\n")); 1439 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1440 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1441 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1442 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1443 } 1444 #if defined(PETSC_HAVE_DEVICE) 1445 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1446 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1447 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1448 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1449 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1450 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1451 #endif 1452 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1453 1454 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1455 1456 /* Report events */ 1457 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1458 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1459 #if defined(PETSC_HAVE_DEVICE) 1460 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1461 #endif 1462 PetscCall(PetscFPrintf(comm, fd, "\n")); 1463 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1464 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1465 #if defined(PETSC_HAVE_DEVICE) 1466 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1467 #endif 1468 PetscCall(PetscFPrintf(comm, fd, "\n")); 1469 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1470 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1471 #if defined(PETSC_HAVE_DEVICE) 1472 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1473 #endif 1474 PetscCall(PetscFPrintf(comm, fd, "\n")); 1475 1476 #if defined(PETSC_HAVE_DEVICE) 1477 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1478 PetscCall(PetscLogStateGetEventFromName(state, "TAOSolve", &TAO_Solve)); 1479 PetscCall(PetscLogStateGetEventFromName(state, "TSStep", &TS_Step)); 1480 PetscCall(PetscLogStateGetEventFromName(state, "SNESSolve", &SNES_Solve)); 1481 PetscCall(PetscLogStateGetEventFromName(state, "KSPSolve", &KSP_Solve)); 1482 #endif 1483 1484 for (stage = 0; stage < numStages; stage++) { 1485 PetscInt stage_id; 1486 PetscEventPerfInfo *stage_info; 1487 const char *stage_name; 1488 1489 if (!(stageVisible[stage] && stageUsed[stage])) continue; 1490 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1491 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1492 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stage_name)); 1493 stage_info = &zero_info; 1494 if (localStageUsed[stage]) { 1495 PetscStagePerf *stage_perf_info; 1496 1497 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_perf_info)); 1498 stage_info = &stage_perf_info->perfInfo; 1499 } 1500 PetscCall(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1501 PetscCall(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1502 PetscCall(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1503 PetscCall(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1504 PetscCall(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1505 mess *= 0.5; 1506 messLen *= 0.5; 1507 red /= size; 1508 1509 for (event = 0; event < numEvents; event++) { 1510 PetscInt event_id; 1511 PetscEventPerfInfo *event_info = &zero_info; 1512 PetscBool is_zero = PETSC_FALSE; 1513 const char *event_name; 1514 1515 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1516 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1517 if (event_id >= 0 && stage_id >= 0) { PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &event_info)); } 1518 PetscCall(PetscMemcmp(event_info, &zero_info, sizeof(zero_info), &is_zero)); 1519 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm)); 1520 if (!is_zero) { 1521 flopr = event_info->flops; 1522 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1523 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1524 PetscCall(MPIU_Allreduce(&event_info->flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1525 PetscCall(MPIU_Allreduce(&event_info->time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1526 PetscCall(MPIU_Allreduce(&event_info->time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1527 PetscCall(MPIU_Allreduce(&event_info->time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1528 PetscCall(MPIU_Allreduce(&event_info->numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1529 PetscCall(MPIU_Allreduce(&event_info->messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1530 PetscCall(MPIU_Allreduce(&event_info->numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1531 PetscCall(MPIU_Allreduce(&event_info->count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1532 PetscCall(MPIU_Allreduce(&event_info->count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1533 if (PetscLogMemory) { 1534 PetscCall(MPIU_Allreduce(&event_info->memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1535 PetscCall(MPIU_Allreduce(&event_info->mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1536 PetscCall(MPIU_Allreduce(&event_info->mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1537 PetscCall(MPIU_Allreduce(&event_info->mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1538 } 1539 #if defined(PETSC_HAVE_DEVICE) 1540 PetscCall(MPIU_Allreduce(&event_info->CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1541 PetscCall(MPIU_Allreduce(&event_info->GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1542 PetscCall(MPIU_Allreduce(&event_info->CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1543 PetscCall(MPIU_Allreduce(&event_info->GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1544 PetscCall(MPIU_Allreduce(&event_info->GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1545 PetscCall(MPIU_Allreduce(&event_info->GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1546 #endif 1547 if (mint < 0.0) { 1548 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, event_name)); 1549 mint = 0; 1550 } 1551 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, event_name); 1552 #if defined(PETSC_HAVE_DEVICE) 1553 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1554 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1555 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 1556 if (event_id != SNES_Solve && event_id != KSP_Solve && event_id != TS_Step && event_id != TAO_Solve) { 1557 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 1558 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 1559 } 1560 } 1561 #endif 1562 totm *= 0.5; 1563 totml *= 0.5; 1564 totr /= size; 1565 1566 if (maxC != 0) { 1567 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 1568 else ratC = 0.0; 1569 if (mint != 0.0) ratt = maxt / mint; 1570 else ratt = 0.0; 1571 if (minf != 0.0) ratf = maxf / minf; 1572 else ratf = 0.0; 1573 if (TotalTime != 0.0) fracTime = tott / TotalTime; 1574 else fracTime = 0.0; 1575 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 1576 else fracFlops = 0.0; 1577 if (stageTime != 0.0) fracStageTime = tott / stageTime; 1578 else fracStageTime = 0.0; 1579 if (flops != 0.0) fracStageFlops = totf / flops; 1580 else fracStageFlops = 0.0; 1581 if (numMessages != 0.0) fracMess = totm / numMessages; 1582 else fracMess = 0.0; 1583 if (messageLength != 0.0) fracMessLen = totml / messageLength; 1584 else fracMessLen = 0.0; 1585 if (numReductions != 0.0) fracRed = totr / numReductions; 1586 else fracRed = 0.0; 1587 if (mess != 0.0) fracStageMess = totm / mess; 1588 else fracStageMess = 0.0; 1589 if (messLen != 0.0) fracStageMessLen = totml / messLen; 1590 else fracStageMessLen = 0.0; 1591 if (red != 0.0) fracStageRed = totr / red; 1592 else fracStageRed = 0.0; 1593 if (totm != 0.0) totml /= totm; 1594 else totml = 0.0; 1595 if (maxt != 0.0) flopr = totf / maxt; 1596 else flopr = 0.0; 1597 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 1598 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 1599 else 1600 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 1601 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 1602 #if defined(PETSC_HAVE_DEVICE) 1603 if (totf != 0.0) fracgflops = gflops / totf; 1604 else fracgflops = 0.0; 1605 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 1606 else gflopr = 0.0; 1607 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1608 #endif 1609 PetscCall(PetscFPrintf(comm, fd, "\n")); 1610 } 1611 } 1612 } 1613 } 1614 1615 /* Memory usage and object creation */ 1616 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1617 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1618 #if defined(PETSC_HAVE_DEVICE) 1619 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1620 #endif 1621 PetscCall(PetscFPrintf(comm, fd, "\n")); 1622 PetscCall(PetscFPrintf(comm, fd, "\n")); 1623 1624 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1625 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1626 stats for stages local to processor sets. 1627 */ 1628 /* We should figure out the longest object name here (now 20 characters) */ 1629 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1630 for (stage = 0; stage < numStages; stage++) { 1631 const char *stage_name; 1632 1633 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1634 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stage_name)); 1635 if (localStageUsed[stage]) { 1636 PetscInt num_classes; 1637 1638 PetscCall(PetscLogStateGetNumClasses(state, &num_classes)); 1639 for (oclass = 0; oclass < num_classes; oclass++) { 1640 PetscClassPerf *class_perf_info; 1641 1642 PetscCall(PetscLogHandlerDefaultGetClassPerf(handler, stage, oclass, &class_perf_info)); 1643 if ((class_perf_info->creations > 0) || (class_perf_info->destructions > 0)) { 1644 PetscLogClassInfo class_reg_info; 1645 1646 PetscCall(PetscLogStateClassGetInfo(state, oclass, &class_reg_info)); 1647 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", class_reg_info.name, class_perf_info->creations, class_perf_info->destructions)); 1648 } 1649 } 1650 } 1651 } 1652 1653 PetscCall(PetscFree(localStageUsed)); 1654 PetscCall(PetscFree(stageUsed)); 1655 PetscCall(PetscFree(localStageVisible)); 1656 PetscCall(PetscFree(stageVisible)); 1657 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1658 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1659 1660 /* Information unrelated to this particular run */ 1661 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1662 PetscCall(PetscTime(&y)); 1663 PetscCall(PetscTime(&x)); 1664 PetscCall(PetscTime(&y)); 1665 PetscCall(PetscTime(&y)); 1666 PetscCall(PetscTime(&y)); 1667 PetscCall(PetscTime(&y)); 1668 PetscCall(PetscTime(&y)); 1669 PetscCall(PetscTime(&y)); 1670 PetscCall(PetscTime(&y)); 1671 PetscCall(PetscTime(&y)); 1672 PetscCall(PetscTime(&y)); 1673 PetscCall(PetscTime(&y)); 1674 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 1675 /* MPI information */ 1676 if (size > 1) { 1677 MPI_Status status; 1678 PetscMPIInt tag; 1679 MPI_Comm newcomm; 1680 1681 PetscCallMPI(MPI_Barrier(comm)); 1682 PetscCall(PetscTime(&x)); 1683 PetscCallMPI(MPI_Barrier(comm)); 1684 PetscCallMPI(MPI_Barrier(comm)); 1685 PetscCallMPI(MPI_Barrier(comm)); 1686 PetscCallMPI(MPI_Barrier(comm)); 1687 PetscCallMPI(MPI_Barrier(comm)); 1688 PetscCall(PetscTime(&y)); 1689 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 1690 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 1691 PetscCallMPI(MPI_Barrier(comm)); 1692 if (rank) { 1693 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 1694 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 1695 } else { 1696 PetscCall(PetscTime(&x)); 1697 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 1698 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 1699 PetscCall(PetscTime(&y)); 1700 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 1701 } 1702 PetscCall(PetscCommDestroy(&newcomm)); 1703 } 1704 PetscCall(PetscOptionsView(NULL, viewer)); 1705 1706 /* Machine and compile information */ 1707 if (PetscDefined(USE_FORTRAN_KERNELS)) { 1708 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 1709 } else { 1710 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 1711 } 1712 if (PetscDefined(USE_64BIT_INDICES)) { 1713 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64-bit PetscInt\n")); 1714 } else if (PetscDefined(USE___FLOAT128)) { 1715 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32-bit PetscInt\n")); 1716 } 1717 if (PetscDefined(USE_REAL_SINGLE)) { 1718 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 1719 } else if (PetscDefined(USE___FLOAT128)) { 1720 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 1721 } 1722 if (PetscDefined(USE_REAL_MAT_SINGLE)) { 1723 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 1724 } else { 1725 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 1726 } 1727 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 1728 1729 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 1730 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 1731 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 1732 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 1733 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 1734 1735 /* Cleanup */ 1736 PetscCall(PetscFPrintf(comm, fd, "\n")); 1737 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1738 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1739 PetscCall(PetscFPTrapPop()); 1740 PetscFunctionReturn(PETSC_SUCCESS); 1741 } 1742 1743 static PetscErrorCode PetscLogHandlerView_Default(PetscLogHandler handler, PetscViewer viewer) 1744 { 1745 PetscViewerFormat format; 1746 PetscFunctionBegin; 1747 PetscCall(PetscViewerGetFormat(viewer, &format)); 1748 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 1749 PetscCall(PetscLogHandlerView_Default_Info(handler, viewer)); 1750 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1751 PetscCall(PetscLogHandlerView_Default_Detailed(handler, viewer)); 1752 } else if (format == PETSC_VIEWER_ASCII_CSV) { 1753 PetscCall(PetscLogHandlerView_Default_CSV(handler, viewer)); 1754 } 1755 PetscFunctionReturn(PETSC_SUCCESS); 1756 } 1757 1758 /*MC 1759 PETSC_LOG_HANDLER_DEFAULT - PETSC_LOG_DEFAULT = "default" - A `PetscLogHandler` that collects data for PETSc 1760 default profiling log viewers (`PetscLogView()` and `PetscLogDump()`). 1761 1762 Options Database Keys: 1763 + -log_include_actions - include a growing list of actions (event beginnings and endings, object creations and destructions) in `PetscLogDump()` (`PetscLogActions()`). 1764 - -log_include_objects - include a growing list of object creations and destructions in `PetscLogDump()` (`PetscLogObjects()`). 1765 1766 Level: developer 1767 1768 .seealso: [](ch_profiling), `PetscLogHandler` 1769 M*/ 1770 1771 PETSC_INTERN PetscErrorCode PetscLogHandlerCreate_Default(PetscLogHandler handler) 1772 { 1773 PetscFunctionBegin; 1774 PetscCall(PetscLogHandlerContextCreate_Default((PetscLogHandler_Default *)&handler->data)); 1775 handler->ops->destroy = PetscLogHandlerDestroy_Default; 1776 handler->ops->eventbegin = PetscLogHandlerEventBegin_Default; 1777 handler->ops->eventend = PetscLogHandlerEventEnd_Default; 1778 handler->ops->eventsync = PetscLogHandlerEventSync_Default; 1779 handler->ops->objectcreate = PetscLogHandlerObjectCreate_Default; 1780 handler->ops->objectdestroy = PetscLogHandlerObjectDestroy_Default; 1781 handler->ops->stagepush = PetscLogHandlerStagePush_Default; 1782 handler->ops->stagepop = PetscLogHandlerStagePop_Default; 1783 handler->ops->view = PetscLogHandlerView_Default; 1784 PetscFunctionReturn(PETSC_SUCCESS); 1785 } 1786