1 #include <petscviewer.h> 2 #include <petscdevice.h> 3 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 4 #include <petsc/private/loghandlerimpl.h> 5 #include <petsc/private/deviceimpl.h> 6 #include <petscconfiginfo.h> 7 #include <petscmachineinfo.h> 8 #include "logdefault.h" 9 10 #if !PetscDefined(USE_LOG) 11 // zeros for global counters 12 static PetscLogDouble petsc_TotalFlops = 0.0; 13 static PetscLogDouble petsc_irecv_ct = 0.0; 14 static PetscLogDouble petsc_isend_ct = 0.0; 15 static PetscLogDouble petsc_recv_ct = 0.0; 16 static PetscLogDouble petsc_send_ct = 0.0; 17 static PetscLogDouble petsc_irecv_len = 0.0; 18 static PetscLogDouble petsc_isend_len = 0.0; 19 static PetscLogDouble petsc_recv_len = 0.0; 20 static PetscLogDouble petsc_send_len = 0.0; 21 static PetscLogDouble petsc_allreduce_ct = 0.0; 22 static PetscLogDouble petsc_gather_ct = 0.0; 23 static PetscLogDouble petsc_scatter_ct = 0.0; 24 25 /* Thread local storage */ 26 static PetscLogDouble petsc_TotalFlops_th = 0.0; 27 static PetscLogDouble petsc_irecv_ct_th = 0.0; 28 static PetscLogDouble petsc_isend_ct_th = 0.0; 29 static PetscLogDouble petsc_recv_ct_th = 0.0; 30 static PetscLogDouble petsc_send_ct_th = 0.0; 31 static PetscLogDouble petsc_irecv_len_th = 0.0; 32 static PetscLogDouble petsc_isend_len_th = 0.0; 33 static PetscLogDouble petsc_recv_len_th = 0.0; 34 static PetscLogDouble petsc_send_len_th = 0.0; 35 static PetscLogDouble petsc_allreduce_ct_th = 0.0; 36 static PetscLogDouble petsc_gather_ct_th = 0.0; 37 static PetscLogDouble petsc_scatter_ct_th = 0.0; 38 39 #if PetscDefined(HAVE_DEVICE) 40 /* Global GPU counters */ 41 static PetscLogDouble petsc_gflops = 0.0; 42 static PetscLogDouble petsc_gtime = 0.0; 43 44 /* Thread local storage */ 45 static PetscLogDouble petsc_ctog_ct_th = 0.0; 46 static PetscLogDouble petsc_gtoc_ct_th = 0.0; 47 static PetscLogDouble petsc_ctog_sz_th = 0.0; 48 static PetscLogDouble petsc_gtoc_sz_th = 0.0; 49 static PetscLogDouble petsc_gflops_th = 0.0; 50 #endif 51 #endif 52 53 static PetscErrorCode PetscEventPerfInfoInit(PetscEventPerfInfo *eventInfo) 54 { 55 PetscFunctionBegin; 56 PetscCall(PetscMemzero(eventInfo, sizeof(*eventInfo))); 57 eventInfo->visible = PETSC_TRUE; 58 eventInfo->id = -1; 59 eventInfo->dof[0] = -1.0; 60 eventInfo->dof[1] = -1.0; 61 eventInfo->dof[2] = -1.0; 62 eventInfo->dof[3] = -1.0; 63 eventInfo->dof[4] = -1.0; 64 eventInfo->dof[5] = -1.0; 65 eventInfo->dof[6] = -1.0; 66 eventInfo->dof[7] = -1.0; 67 eventInfo->errors[0] = -1.0; 68 eventInfo->errors[1] = -1.0; 69 eventInfo->errors[2] = -1.0; 70 eventInfo->errors[3] = -1.0; 71 eventInfo->errors[4] = -1.0; 72 eventInfo->errors[5] = -1.0; 73 eventInfo->errors[6] = -1.0; 74 eventInfo->errors[7] = -1.0; 75 PetscFunctionReturn(PETSC_SUCCESS); 76 } 77 78 static PetscErrorCode PetscEventPerfInfoTic_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool resume) 79 { 80 PetscFunctionBegin; 81 if (resume) { 82 eventInfo->timeTmp -= time; 83 eventInfo->flopsTmp -= petsc_TotalFlops_th; 84 } else { 85 eventInfo->timeTmp = -time; 86 eventInfo->flopsTmp = -petsc_TotalFlops_th; 87 } 88 eventInfo->numMessages -= petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th; 89 eventInfo->messageLength -= petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len_th + petsc_send_len_th; 90 eventInfo->numReductions -= petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th; 91 #if defined(PETSC_HAVE_DEVICE) 92 eventInfo->CpuToGpuCount -= petsc_ctog_ct_th; 93 eventInfo->GpuToCpuCount -= petsc_gtoc_ct_th; 94 eventInfo->CpuToGpuSize -= petsc_ctog_sz_th; 95 eventInfo->GpuToCpuSize -= petsc_gtoc_sz_th; 96 eventInfo->GpuFlops -= petsc_gflops_th; 97 eventInfo->GpuTime -= petsc_gtime; 98 #endif 99 if (logMemory) { 100 PetscLogDouble usage; 101 PetscCall(PetscMemoryGetCurrentUsage(&usage)); 102 eventInfo->memIncrease -= usage; 103 PetscCall(PetscMallocGetCurrentUsage(&usage)); 104 eventInfo->mallocSpace -= usage; 105 PetscCall(PetscMallocGetMaximumUsage(&usage)); 106 eventInfo->mallocIncrease -= usage; 107 PetscCall(PetscMallocPushMaximumUsage(event)); 108 } 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 static PetscErrorCode PetscEventPerfInfoTic(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 113 { 114 PetscFunctionBegin; 115 PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 static PetscErrorCode PetscEventPerfInfoResume(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 120 { 121 PetscFunctionBegin; 122 PetscCall(PetscEventPerfInfoTic_Internal(eventInfo, time, logMemory, event, PETSC_TRUE)); 123 PetscFunctionReturn(PETSC_SUCCESS); 124 } 125 126 static PetscErrorCode PetscEventPerfInfoToc_Internal(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event, PetscBool pause) 127 { 128 PetscFunctionBegin; 129 eventInfo->timeTmp += time; 130 eventInfo->flopsTmp += petsc_TotalFlops_th; 131 if (!pause) { 132 eventInfo->time += eventInfo->timeTmp; 133 eventInfo->time2 += eventInfo->timeTmp * eventInfo->timeTmp; 134 eventInfo->flops += eventInfo->flopsTmp; 135 eventInfo->flops2 += eventInfo->flopsTmp * eventInfo->flopsTmp; 136 } 137 eventInfo->numMessages += petsc_irecv_ct_th + petsc_isend_ct_th + petsc_recv_ct_th + petsc_send_ct_th; 138 eventInfo->messageLength += petsc_irecv_len_th + petsc_isend_len_th + petsc_recv_len + petsc_send_len_th; 139 eventInfo->numReductions += petsc_allreduce_ct_th + petsc_gather_ct_th + petsc_scatter_ct_th; 140 #if defined(PETSC_HAVE_DEVICE) 141 eventInfo->CpuToGpuCount += petsc_ctog_ct_th; 142 eventInfo->GpuToCpuCount += petsc_gtoc_ct_th; 143 eventInfo->CpuToGpuSize += petsc_ctog_sz_th; 144 eventInfo->GpuToCpuSize += petsc_gtoc_sz_th; 145 eventInfo->GpuFlops += petsc_gflops_th; 146 eventInfo->GpuTime += petsc_gtime; 147 #endif 148 if (logMemory) { 149 PetscLogDouble usage, musage; 150 PetscCall(PetscMemoryGetCurrentUsage(&usage)); /* the comments below match the column labels printed in PetscLogView_Default() */ 151 eventInfo->memIncrease += usage; /* RMI */ 152 PetscCall(PetscMallocGetCurrentUsage(&usage)); 153 eventInfo->mallocSpace += usage; /* Malloc */ 154 PetscCall(PetscMallocPopMaximumUsage((int)event, &musage)); 155 eventInfo->mallocIncreaseEvent = PetscMax(musage - usage, eventInfo->mallocIncreaseEvent); /* EMalloc */ 156 PetscCall(PetscMallocGetMaximumUsage(&usage)); 157 eventInfo->mallocIncrease += usage; /* MMalloc */ 158 } 159 PetscFunctionReturn(PETSC_SUCCESS); 160 } 161 162 static PetscErrorCode PetscEventPerfInfoToc(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 163 { 164 PetscFunctionBegin; 165 PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode PetscEventPerfInfoPause(PetscEventPerfInfo *eventInfo, PetscLogDouble time, PetscBool logMemory, int event) 170 { 171 PetscFunctionBegin; 172 PetscCall(PetscEventPerfInfoToc_Internal(eventInfo, time, logMemory, event, PETSC_FALSE)); 173 PetscFunctionReturn(PETSC_SUCCESS); 174 } 175 176 static PetscErrorCode PetscEventPerfInfoAdd_Internal(const PetscEventPerfInfo *eventInfo, PetscEventPerfInfo *outInfo) 177 { 178 PetscFunctionBegin; 179 outInfo->count += eventInfo->count; 180 outInfo->time += eventInfo->time; 181 outInfo->time2 += eventInfo->time2; 182 outInfo->flops += eventInfo->flops; 183 outInfo->flops2 += eventInfo->flops2; 184 outInfo->numMessages += eventInfo->numMessages; 185 outInfo->messageLength += eventInfo->messageLength; 186 outInfo->numReductions += eventInfo->numReductions; 187 #if defined(PETSC_HAVE_DEVICE) 188 outInfo->CpuToGpuCount += eventInfo->CpuToGpuCount; 189 outInfo->GpuToCpuCount += eventInfo->GpuToCpuCount; 190 outInfo->CpuToGpuSize += eventInfo->CpuToGpuSize; 191 outInfo->GpuToCpuSize += eventInfo->GpuToCpuSize; 192 outInfo->GpuFlops += eventInfo->GpuFlops; 193 outInfo->GpuTime += eventInfo->GpuTime; 194 #endif 195 outInfo->memIncrease += eventInfo->memIncrease; 196 outInfo->mallocSpace += eventInfo->mallocSpace; 197 outInfo->mallocIncreaseEvent += eventInfo->mallocIncreaseEvent; 198 outInfo->mallocIncrease += eventInfo->mallocIncrease; 199 PetscFunctionReturn(PETSC_SUCCESS); 200 } 201 202 PETSC_LOG_RESIZABLE_ARRAY(EventPerfArray, PetscEventPerfInfo, PetscLogEvent, PetscEventPerfInfoInit, NULL, NULL) 203 204 /* --- PetscClassPerf --- */ 205 206 typedef struct { 207 int creations; /* The number of objects of this class created */ 208 int destructions; /* The number of objects of this class destroyed */ 209 PetscLogDouble mem; /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */ 210 PetscLogDouble descMem; /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */ 211 } PetscClassPerf; 212 213 static PetscErrorCode PetscClassPerfInit(PetscClassPerf *classInfo) 214 { 215 PetscFunctionBegin; 216 PetscCall(PetscMemzero(classInfo, sizeof(*classInfo))); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 PETSC_LOG_RESIZABLE_ARRAY(ClassPerfArray, PetscClassPerf, PetscLogClass, PetscClassPerfInit, NULL, NULL) 221 222 /* --- PetscStagePerf --- */ 223 224 typedef struct _PetscStagePerf { 225 PetscBool used; /* The stage was pushed on this processor */ 226 PetscEventPerfInfo perfInfo; /* The stage performance information */ 227 PetscLogEventPerfArray eventLog; /* The event information for this stage */ 228 PetscLogClassPerfArray classLog; /* The class information for this stage */ 229 } PetscStagePerf; 230 231 static PetscErrorCode PetscStageInfoInit(PetscStagePerf *stageInfo) 232 { 233 PetscFunctionBegin; 234 PetscCall(PetscMemzero(stageInfo, sizeof(*stageInfo))); 235 PetscCall(PetscLogEventPerfArrayCreate(128, &(stageInfo->eventLog))); 236 PetscCall(PetscLogClassPerfArrayCreate(128, &(stageInfo->classLog))); 237 PetscCall(PetscEventPerfInfoInit(&stageInfo->perfInfo)); 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 static PetscErrorCode PetscStageInfoReset(PetscStagePerf *stageInfo) 242 { 243 PetscFunctionBegin; 244 PetscCall(PetscLogEventPerfArrayDestroy(&(stageInfo->eventLog))); 245 PetscCall(PetscLogClassPerfArrayDestroy(&(stageInfo->classLog))); 246 PetscFunctionReturn(PETSC_SUCCESS); 247 } 248 249 PETSC_LOG_RESIZABLE_ARRAY(StageInfoArray, PetscStagePerf, PetscLogStage, PetscStageInfoInit, PetscStageInfoReset, NULL) 250 251 /* --- Action --- */ 252 253 /* The structure for action logging */ 254 typedef enum { 255 PETSC_LOG_ACTION_CREATE, 256 PETSC_LOG_ACTION_DESTROY, 257 PETSC_LOG_ACTION_BEGIN, 258 PETSC_LOG_ACTION_END, 259 } PetscLogActionType; 260 261 PETSC_LOG_RESIZABLE_ARRAY(ActionArray, Action, PetscLogEvent, NULL, NULL, NULL) 262 263 /* --- Object --- */ 264 265 PETSC_LOG_RESIZABLE_ARRAY(ObjectArray, Object, PetscObject, NULL, NULL, NULL) 266 267 /* Map from (threadid,stage,event) to perfInfo data struct */ 268 #include <petsc/private/hashmapijk.h> 269 270 #if !PetscDefined(HAVE_THREADSAFETY) 271 // PetscHMapEvent is only declared in logimpl.h if PetscDefined(HAVE_THREADSAFETY): define it otherwise so that we 272 // can just always have it in the struct 273 PETSC_HASH_MAP(HMapEvent, PetscHashIJKKey, PetscEventPerfInfo *, PetscHashIJKKeyHash, PetscHashIJKKeyEqual, NULL) 274 #endif 275 276 typedef struct _n_PetscLogHandler_Default *PetscLogHandler_Default; 277 struct _n_PetscLogHandler_Default { 278 PetscLogStageInfoArray stages; 279 PetscSpinlock lock; 280 PetscLogActionArray petsc_actions; 281 PetscLogObjectArray petsc_objects; 282 PetscBool petsc_logActions; 283 PetscBool petsc_logObjects; 284 int petsc_numObjectsCreated; 285 int petsc_numObjectsDestroyed; 286 PetscHMapEvent eventInfoMap_th; 287 int pause_depth; 288 }; 289 290 /* --- PetscLogHandler_Default --- */ 291 292 static PetscErrorCode PetscLogHandlerContextCreate_Default(PetscLogHandler_Default *def_p) 293 { 294 PetscLogHandler_Default def; 295 296 PetscFunctionBegin; 297 PetscCall(PetscNew(def_p)); 298 def = *def_p; 299 PetscCall(PetscLogStageInfoArrayCreate(8, &(def->stages))); 300 PetscCall(PetscLogActionArrayCreate(64, &(def->petsc_actions))); 301 PetscCall(PetscLogObjectArrayCreate(64, &(def->petsc_objects))); 302 303 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_actions", &def->petsc_logActions, NULL)); 304 PetscCall(PetscOptionsGetBool(NULL, NULL, "-log_include_objects", &def->petsc_logObjects, NULL)); 305 if (PetscDefined(HAVE_THREADSAFETY)) { PetscCall(PetscHMapEventCreate(&def->eventInfoMap_th)); } 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 static PetscErrorCode PetscLogHandlerDestroy_Default(PetscLogHandler h) 310 { 311 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 312 313 PetscFunctionBegin; 314 PetscCall(PetscLogStageInfoArrayDestroy(&def->stages)); 315 PetscCall(PetscLogActionArrayDestroy(&def->petsc_actions)); 316 PetscCall(PetscLogObjectArrayDestroy(&def->petsc_objects)); 317 if (def->eventInfoMap_th) { 318 PetscEventPerfInfo **array; 319 PetscInt n, off = 0; 320 321 PetscCall(PetscHMapEventGetSize(def->eventInfoMap_th, &n)); 322 PetscCall(PetscMalloc1(n, &array)); 323 PetscCall(PetscHMapEventGetVals(def->eventInfoMap_th, &off, array)); 324 for (PetscInt i = 0; i < n; i++) PetscCall(PetscFree(array[i])); 325 PetscCall(PetscFree(array)); 326 PetscCall(PetscHMapEventDestroy(&def->eventInfoMap_th)); 327 } 328 PetscCall(PetscFree(def)); 329 PetscFunctionReturn(PETSC_SUCCESS); 330 } 331 332 static PetscErrorCode PetscLogHandlerDefaultGetStageInfo(PetscLogHandler handler, PetscLogStage stage, PetscStagePerf **stage_info_p) 333 { 334 PetscStagePerf *stage_info = NULL; 335 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 336 337 PetscFunctionBegin; 338 PetscCall(PetscLogStageInfoArrayResize(def->stages, stage + 1)); 339 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 340 *stage_info_p = stage_info; 341 PetscFunctionReturn(PETSC_SUCCESS); 342 } 343 344 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultGetEventPerfInfo(PetscLogHandler handler, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **event_info_p) 345 { 346 PetscEventPerfInfo *event_info = NULL; 347 PetscStagePerf *stage_info = NULL; 348 PetscLogEventPerfArray event_log; 349 350 PetscFunctionBegin; 351 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(handler->state, &stage)); 352 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 353 event_log = stage_info->eventLog; 354 PetscCall(PetscLogEventPerfArrayResize(event_log, event + 1)); 355 PetscCall(PetscLogEventPerfArrayGetRef(event_log, event, &event_info)); 356 event_info->id = event; 357 *event_info_p = event_info; 358 PetscFunctionReturn(PETSC_SUCCESS); 359 } 360 361 static PetscErrorCode PetscLogHandlerDefaultGetClassPerf(PetscLogHandler handler, PetscLogStage stage, PetscLogClass clss, PetscClassPerf **class_info) 362 { 363 PetscLogClassPerfArray class_log; 364 PetscStagePerf *stage_info; 365 366 PetscFunctionBegin; 367 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 368 class_log = stage_info->classLog; 369 PetscCall(PetscLogClassPerfArrayResize(class_log, clss + 1)); 370 PetscCall(PetscLogClassPerfArrayGetRef(class_log, clss, class_info)); 371 PetscFunctionReturn(PETSC_SUCCESS); 372 } 373 374 static PetscErrorCode PetscLogHandlerObjectCreate_Default(PetscLogHandler h, PetscObject obj) 375 { 376 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 377 PetscLogState state; 378 PetscLogStage stage; 379 PetscClassPerf *classInfo; 380 int oclass = 0; 381 382 PetscFunctionBegin; 383 PetscCall(PetscLogHandlerGetState(h, &state)); 384 PetscCall(PetscSpinlockLock(&def->lock)); 385 /* Record stage info */ 386 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 387 PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass)); 388 PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo)); 389 classInfo->creations++; 390 /* Record the creation action */ 391 if (def->petsc_logActions) { 392 Action new_action; 393 394 PetscCall(PetscTime(&new_action.time)); 395 new_action.time -= petsc_BaseTime; 396 new_action.action = PETSC_LOG_ACTION_CREATE; 397 new_action.event = -1; 398 new_action.classid = obj->classid; 399 new_action.id1 = obj->id; 400 new_action.id2 = -1; 401 new_action.id3 = -1; 402 new_action.flops = petsc_TotalFlops; 403 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 404 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 405 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 406 } 407 /* We don't just use obj->id to count all objects that are created 408 because PetscLogHandlers are objects and PetscLogObjectDestroy() will not 409 be called for them: the number of objects created and destroyed as counted 410 here and below would have an imbalance */ 411 def->petsc_numObjectsCreated++; 412 /* Record the object */ 413 if (def->petsc_logObjects) { 414 Object new_object; 415 416 new_object.parent = -1; 417 new_object.obj = obj; 418 new_object.mem = 0; 419 420 PetscCall(PetscMemzero(new_object.name, sizeof(new_object.name))); 421 PetscCall(PetscMemzero(new_object.info, sizeof(new_object.info))); 422 PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1"); 423 PetscCall(PetscLogObjectArrayResize(def->petsc_objects, obj->id)); 424 PetscCall(PetscLogObjectArraySet(def->petsc_objects, obj->id - 1, new_object)); 425 } 426 PetscCall(PetscSpinlockUnlock(&def->lock)); 427 PetscFunctionReturn(PETSC_SUCCESS); 428 } 429 430 static PetscErrorCode PetscLogHandlerObjectDestroy_Default(PetscLogHandler h, PetscObject obj) 431 { 432 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 433 PetscLogState state; 434 PetscLogStage stage; 435 PetscClassPerf *classInfo; 436 int oclass = 0; 437 438 PetscFunctionBegin; 439 PetscCall(PetscLogHandlerGetState(h, &state)); 440 /* Record stage info */ 441 PetscCall(PetscSpinlockLock(&def->lock)); 442 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 443 if (stage >= 0) { 444 /* stage < 0 can happen if the log summary is output before some things are destroyed */ 445 PetscCall(PetscLogStateGetClassFromClassId(state, obj->classid, &oclass)); 446 PetscCall(PetscLogHandlerDefaultGetClassPerf(h, stage, oclass, &classInfo)); 447 classInfo->destructions++; 448 } 449 /* Cannot Credit all ancestors with your memory because they may have already been destroyed*/ 450 def->petsc_numObjectsDestroyed++; 451 /* Dynamically enlarge logging structures */ 452 /* Record the destruction action */ 453 if (def->petsc_logActions) { 454 Action new_action; 455 456 PetscCall(PetscTime(&new_action.time)); 457 new_action.time -= petsc_BaseTime; 458 new_action.event = -1; 459 new_action.action = PETSC_LOG_ACTION_DESTROY; 460 new_action.classid = obj->classid; 461 new_action.id1 = obj->id; 462 new_action.id2 = -1; 463 new_action.id3 = -1; 464 new_action.flops = petsc_TotalFlops; 465 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 466 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 467 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 468 } 469 if (def->petsc_logObjects) { 470 Object *obj_entry = NULL; 471 472 PetscAssert(obj->id >= 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Object ids from PetscObjectNewId_Internal() start at 1"); 473 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, obj->id - 1, &obj_entry)); 474 if (obj->name) PetscCall(PetscStrncpy(obj_entry->name, obj->name, 64)); 475 obj_entry->obj = NULL; 476 } 477 PetscCall(PetscSpinlockUnlock(&def->lock)); 478 PetscFunctionReturn(PETSC_SUCCESS); 479 } 480 481 static PetscErrorCode PetscLogHandlerEventSync_Default(PetscLogHandler h, PetscLogEvent event, MPI_Comm comm) 482 { 483 PetscLogState state; 484 PetscLogEventInfo event_info; 485 PetscEventPerfInfo *event_perf_info; 486 int stage; 487 PetscLogDouble time = 0.0; 488 489 PetscFunctionBegin; 490 if (!(PetscLogSyncOn) || comm == MPI_COMM_NULL) PetscFunctionReturn(PETSC_SUCCESS); 491 PetscCall(PetscLogHandlerGetState(h, &state)); 492 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 493 if (!event_info.collective) PetscFunctionReturn(PETSC_SUCCESS); 494 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 495 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info)); 496 if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 497 498 PetscCall(PetscTimeSubtract(&time)); 499 PetscCallMPI(MPI_Barrier(comm)); 500 PetscCall(PetscTimeAdd(&time)); 501 event_perf_info->syncTime += time; 502 PetscFunctionReturn(PETSC_SUCCESS); 503 } 504 505 static PetscErrorCode PetscLogGetStageEventPerfInfo_threaded(PetscLogHandler_Default def, PetscLogStage stage, PetscLogEvent event, PetscEventPerfInfo **eventInfo) 506 { 507 PetscEventPerfInfo *leventInfo = NULL; 508 PetscHashIJKKey key; 509 510 PetscFunctionBegin; 511 512 #if PetscDefined(HAVE_THREADSAFETY) 513 key.i = PetscLogGetTid(); 514 #else 515 key.i = 0; 516 #endif 517 key.j = stage; 518 key.k = event; 519 PetscCall(PetscSpinlockLock(&def->lock)); 520 PetscCall(PetscHMapEventGet(def->eventInfoMap_th, key, &leventInfo)); 521 if (!leventInfo) { 522 PetscCall(PetscNew(&leventInfo)); 523 leventInfo->id = event; 524 PetscCall(PetscHMapEventSet(def->eventInfoMap_th, key, leventInfo)); 525 } 526 PetscCall(PetscSpinlockUnlock(&def->lock)); 527 *eventInfo = leventInfo; 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 #if defined(PETSC_HAVE_CUDA) 532 #include <nvToolsExt.h> 533 #endif 534 static PetscErrorCode PetscLogHandlerEventBegin_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 535 { 536 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 537 PetscEventPerfInfo *event_perf_info = NULL; 538 PetscLogEventInfo event_info; 539 PetscLogDouble time; 540 PetscLogState state; 541 PetscLogStage stage; 542 543 PetscFunctionBegin; 544 PetscCall(PetscLogHandlerGetState(h, &state)); 545 /* Synchronization */ 546 PetscCall(PetscLogHandlerEventSync_Default(h, event, PetscObjectComm(o1))); 547 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 548 if (def->pause_depth > 0) stage = 0; // in pause-mode, all events run on the main stage 549 if (PetscDefined(HAVE_THREADSAFETY)) { 550 PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info)); 551 if (event_perf_info->depth == 0) { PetscCall(PetscEventPerfInfoInit(event_perf_info)); } 552 } else { 553 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info)); 554 } 555 PetscCheck(event_perf_info->depth >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to begin a paused event, this is not allowed"); 556 event_perf_info->depth++; 557 /* Check for double counting */ 558 if (event_perf_info->depth > 1) PetscFunctionReturn(PETSC_SUCCESS); 559 #if defined(PETSC_HAVE_CUDA) 560 if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { 561 PetscLogEventInfo event_reg_info; 562 563 PetscCall(PetscLogStateEventGetInfo(state, event, &event_reg_info)); 564 nvtxRangePushA(event_reg_info.name); 565 } 566 #endif 567 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 568 /* Log the performance info */ 569 event_perf_info->count++; 570 PetscCall(PetscTime(&time)); 571 PetscCall(PetscEventPerfInfoTic(event_perf_info, time, PetscLogMemory, (int)event)); 572 if (def->petsc_logActions) { 573 PetscLogDouble curTime; 574 Action new_action; 575 576 PetscCall(PetscTime(&curTime)); 577 new_action.time = curTime - petsc_BaseTime; 578 new_action.action = PETSC_LOG_ACTION_BEGIN; 579 new_action.event = event; 580 new_action.classid = event_info.classid; 581 new_action.id1 = o1 ? o1->id : -1; 582 new_action.id2 = o2 ? o2->id : -1; 583 new_action.id3 = o3 ? o3->id : -1; 584 new_action.flops = petsc_TotalFlops; 585 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 586 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 587 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 588 } 589 PetscFunctionReturn(PETSC_SUCCESS); 590 } 591 592 static PetscErrorCode PetscLogHandlerEventEnd_Default(PetscLogHandler h, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 593 { 594 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 595 PetscEventPerfInfo *event_perf_info = NULL; 596 PetscLogDouble time; 597 PetscLogState state; 598 int stage; 599 600 PetscFunctionBegin; 601 PetscCall(PetscLogHandlerGetState(h, &state)); 602 if (def->petsc_logActions) { 603 PetscLogEventInfo event_info; 604 PetscLogDouble curTime; 605 Action new_action; 606 607 PetscCall(PetscLogStateEventGetInfo(state, event, &event_info)); 608 PetscCall(PetscTime(&curTime)); 609 new_action.time = curTime - petsc_BaseTime; 610 new_action.action = PETSC_LOG_ACTION_END; 611 new_action.event = event; 612 new_action.classid = event_info.classid; 613 new_action.id1 = o1 ? o1->id : -1; 614 new_action.id2 = o2 ? o2->id : -2; 615 new_action.id3 = o3 ? o3->id : -3; 616 new_action.flops = petsc_TotalFlops; 617 PetscCall(PetscMallocGetCurrentUsage(&new_action.mem)); 618 PetscCall(PetscMallocGetMaximumUsage(&new_action.maxmem)); 619 PetscCall(PetscLogActionArrayPush(def->petsc_actions, new_action)); 620 } 621 PetscCall(PetscLogStateGetCurrentStage(state, &stage)); 622 if (def->pause_depth > 0) stage = 0; // all events run on the main stage in pause-mode 623 if (PetscDefined(HAVE_THREADSAFETY)) { 624 PetscCall(PetscLogGetStageEventPerfInfo_threaded(def, stage, event, &event_perf_info)); 625 } else { 626 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info)); 627 } 628 PetscCheck(event_perf_info->depth > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to end paused event, not allowed"); 629 event_perf_info->depth--; 630 /* Check for double counting */ 631 if (event_perf_info->depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 632 else PetscCheck(event_perf_info->depth == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Logging event had unbalanced begin/end pairs"); 633 634 /* Log performance info */ 635 PetscCall(PetscTime(&time)); 636 PetscCall(PetscEventPerfInfoToc(event_perf_info, time, PetscLogMemory, (int)event)); 637 if (PetscDefined(HAVE_THREADSAFETY)) { 638 PetscEventPerfInfo *event_perf_info_global; 639 PetscCall(PetscSpinlockLock(&def->lock)); 640 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info_global)); 641 PetscCall(PetscEventPerfInfoAdd_Internal(event_perf_info, event_perf_info_global)); 642 PetscCall(PetscSpinlockUnlock(&def->lock)); 643 } 644 #if defined(PETSC_HAVE_CUDA) 645 if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) nvtxRangePop(); 646 #endif 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultDeactivatePush(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event) 651 { 652 PetscEventPerfInfo *event_perf_info; 653 654 PetscFunctionBegin; 655 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 656 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info)); 657 event_perf_info->depth++; 658 PetscFunctionReturn(PETSC_SUCCESS); 659 } 660 661 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultDeactivatePop(PetscLogHandler h, PetscLogStage stage, PetscLogEvent event) 662 { 663 PetscEventPerfInfo *event_perf_info; 664 665 PetscFunctionBegin; 666 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 667 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(h, stage, event, &event_perf_info)); 668 event_perf_info->depth--; 669 PetscFunctionReturn(PETSC_SUCCESS); 670 } 671 672 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultEventsPause(PetscLogHandler h) 673 { 674 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 675 PetscLogDouble time; 676 PetscInt num_stages; 677 678 PetscFunctionBegin; 679 if (def->pause_depth++ > 0) PetscFunctionReturn(PETSC_SUCCESS); 680 PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL)); 681 PetscCall(PetscTime(&time)); 682 for (PetscInt stage = 0; stage < num_stages; stage++) { 683 PetscStagePerf *stage_info = NULL; 684 PetscInt num_events; 685 686 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 687 PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL)); 688 for (PetscInt event = 0; event < num_events; event++) { 689 PetscEventPerfInfo *event_info = NULL; 690 PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info)); 691 if (event_info->depth > 0) { 692 event_info->depth *= -1; 693 PetscCall(PetscEventPerfInfoPause(event_info, time, PetscLogMemory, event)); 694 } 695 } 696 if (stage > 0 && stage_info->perfInfo.depth > 0) { 697 stage_info->perfInfo.depth *= -1; 698 PetscCall(PetscEventPerfInfoPause(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2))); 699 } 700 } 701 PetscFunctionReturn(PETSC_SUCCESS); 702 } 703 704 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultEventsResume(PetscLogHandler h) 705 { 706 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 707 PetscLogDouble time; 708 PetscInt num_stages; 709 710 PetscFunctionBegin; 711 if (--def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 712 PetscCall(PetscLogStageInfoArrayGetSize(def->stages, &num_stages, NULL)); 713 PetscCall(PetscTime(&time)); 714 for (PetscInt stage = 0; stage < num_stages; stage++) { 715 PetscStagePerf *stage_info = NULL; 716 PetscInt num_events; 717 718 PetscCall(PetscLogStageInfoArrayGetRef(def->stages, stage, &stage_info)); 719 PetscCall(PetscLogEventPerfArrayGetSize(stage_info->eventLog, &num_events, NULL)); 720 for (PetscInt event = 0; event < num_events; event++) { 721 PetscEventPerfInfo *event_info = NULL; 722 PetscCall(PetscLogEventPerfArrayGetRef(stage_info->eventLog, event, &event_info)); 723 if (event_info->depth < 0) { 724 event_info->depth *= -1; 725 PetscCall(PetscEventPerfInfoResume(event_info, time, PetscLogMemory, event)); 726 } 727 } 728 if (stage > 0 && stage_info->perfInfo.depth < 0) { 729 stage_info->perfInfo.depth *= -1; 730 PetscCall(PetscEventPerfInfoResume(&stage_info->perfInfo, time, PetscLogMemory, -(stage + 2))); 731 } 732 } 733 PetscFunctionReturn(PETSC_SUCCESS); 734 } 735 736 static PetscErrorCode PetscLogHandlerStagePush_Default(PetscLogHandler h, PetscLogStage new_stage) 737 { 738 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 739 PetscLogDouble time; 740 PetscLogState state; 741 PetscLogStage current_stage; 742 PetscStagePerf *new_stage_info; 743 744 PetscFunctionBegin; 745 if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 746 PetscCall(PetscLogHandlerGetState(h, &state)); 747 current_stage = state->current_stage; 748 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, new_stage, &new_stage_info)); 749 PetscCall(PetscTime(&time)); 750 751 /* Record flops/time of previous stage */ 752 if (current_stage >= 0) { 753 if (PetscBTLookup(state->active, current_stage)) { 754 PetscStagePerf *current_stage_info; 755 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, ¤t_stage_info)); 756 PetscCall(PetscEventPerfInfoToc(¤t_stage_info->perfInfo, time, PetscLogMemory, (int)-(current_stage + 2))); 757 } 758 } 759 new_stage_info->used = PETSC_TRUE; 760 new_stage_info->perfInfo.count++; 761 new_stage_info->perfInfo.depth++; 762 /* Subtract current quantities so that we obtain the difference when we pop */ 763 if (PetscBTLookup(state->active, new_stage)) PetscCall(PetscEventPerfInfoTic(&new_stage_info->perfInfo, time, PetscLogMemory, (int)-(new_stage + 2))); 764 PetscFunctionReturn(PETSC_SUCCESS); 765 } 766 767 static PetscErrorCode PetscLogHandlerStagePop_Default(PetscLogHandler h, PetscLogStage old_stage) 768 { 769 PetscLogHandler_Default def = (PetscLogHandler_Default)h->data; 770 PetscLogStage current_stage; 771 PetscStagePerf *old_stage_info; 772 PetscLogState state; 773 PetscLogDouble time; 774 775 PetscFunctionBegin; 776 if (def->pause_depth > 0) PetscFunctionReturn(PETSC_SUCCESS); 777 PetscCall(PetscLogHandlerGetState(h, &state)); 778 current_stage = state->current_stage; 779 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, old_stage, &old_stage_info)); 780 PetscCall(PetscTime(&time)); 781 old_stage_info->perfInfo.depth--; 782 if (PetscBTLookup(state->active, old_stage)) { PetscCall(PetscEventPerfInfoToc(&old_stage_info->perfInfo, time, PetscLogMemory, (int)-(old_stage + 2))); } 783 if (current_stage >= 0) { 784 if (PetscBTLookup(state->active, current_stage)) { 785 PetscStagePerf *current_stage_info; 786 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, current_stage, ¤t_stage_info)); 787 PetscCall(PetscEventPerfInfoTic(¤t_stage_info->perfInfo, time, PetscLogMemory, (int)-(current_stage + 2))); 788 } 789 } 790 PetscFunctionReturn(PETSC_SUCCESS); 791 } 792 793 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultStageSetVisible(PetscLogHandler h, PetscLogStage stage, PetscBool is_visible) 794 { 795 PetscStagePerf *stage_info; 796 797 PetscFunctionBegin; 798 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 799 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info)); 800 stage_info->perfInfo.visible = is_visible; 801 PetscFunctionReturn(PETSC_SUCCESS); 802 } 803 804 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultStageGetVisible(PetscLogHandler h, PetscLogStage stage, PetscBool *is_visible) 805 { 806 PetscStagePerf *stage_info; 807 808 PetscFunctionBegin; 809 if (stage < 0) PetscCall(PetscLogStateGetCurrentStage(h->state, &stage)); 810 PetscCall(PetscLogHandlerDefaultGetStageInfo(h, stage, &stage_info)); 811 *is_visible = stage_info->perfInfo.visible; 812 PetscFunctionReturn(PETSC_SUCCESS); 813 } 814 815 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultSetLogActions(PetscLogHandler handler, PetscBool flag) 816 { 817 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 818 819 PetscFunctionBegin; 820 def->petsc_logActions = flag; 821 PetscFunctionReturn(PETSC_SUCCESS); 822 } 823 824 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultSetLogObjects(PetscLogHandler handler, PetscBool flag) 825 { 826 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 827 828 PetscFunctionBegin; 829 def->petsc_logObjects = flag; 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultLogObjectState(PetscLogHandler handler, PetscObject obj, const char format[], va_list Argp) 834 { 835 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 836 size_t fullLength; 837 838 PetscFunctionBegin; 839 if (def->petsc_logObjects) { 840 Object *obj_entry = NULL; 841 842 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, obj->id - 1, &obj_entry)); 843 PetscCall(PetscVSNPrintf(obj_entry->info, 64, format, &fullLength, Argp)); 844 } 845 PetscFunctionReturn(PETSC_SUCCESS); 846 } 847 848 PETSC_INTERN PetscErrorCode PetscLogHandlerDefaultGetNumObjects(PetscLogHandler handler, PetscInt *num_objects) 849 { 850 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 851 852 PetscFunctionBegin; 853 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, num_objects, NULL)); 854 PetscFunctionReturn(PETSC_SUCCESS); 855 } 856 857 PETSC_INTERN PetscErrorCode PetscLogHandlerDump_Default(PetscLogHandler handler, const char sname[]) 858 { 859 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 860 FILE *fd; 861 char file[PETSC_MAX_PATH_LEN], fname[PETSC_MAX_PATH_LEN]; 862 PetscLogDouble flops, _TotalTime; 863 PetscMPIInt rank; 864 int curStage; 865 PetscLogState state; 866 PetscInt num_events; 867 PetscLogEvent event; 868 869 PetscFunctionBegin; 870 /* Calculate the total elapsed time */ 871 PetscCall(PetscTime(&_TotalTime)); 872 _TotalTime -= petsc_BaseTime; 873 /* Open log file */ 874 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)handler), &rank)); 875 PetscCall(PetscSNPrintf(file, PETSC_STATIC_ARRAY_LENGTH(file), "%s.%d", sname && sname[0] ? sname : "Log", rank)); 876 PetscCall(PetscFixFilename(file, fname)); 877 PetscCall(PetscFOpen(PETSC_COMM_SELF, fname, "w", &fd)); 878 PetscCheck(!(rank == 0) || !(!fd), PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Cannot open file: %s", fname); 879 /* Output totals */ 880 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Total Flop %14e %16.8e\n", petsc_TotalFlops, _TotalTime)); 881 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Clock Resolution %g\n", 0.0)); 882 /* Output actions */ 883 if (def->petsc_logActions) { 884 PetscInt num_actions; 885 PetscCall(PetscLogActionArrayGetSize(def->petsc_actions, &num_actions, NULL)); 886 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Actions accomplished %d\n", (int)num_actions)); 887 for (int a = 0; a < num_actions; a++) { 888 Action *action; 889 890 PetscCall(PetscLogActionArrayGetRef(def->petsc_actions, a, &action)); 891 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g %d %d %d %d %d %d %g %g %g\n", action->time, action->action, (int)action->event, (int)action->classid, action->id1, action->id2, action->id3, action->flops, action->mem, action->maxmem)); 892 } 893 } 894 /* Output objects */ 895 if (def->petsc_logObjects) { 896 PetscInt num_objects; 897 898 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 899 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Objects created %d destroyed %d\n", def->petsc_numObjectsCreated, def->petsc_numObjectsDestroyed)); 900 for (int o = 0; o < num_objects; o++) { 901 Object *object = NULL; 902 903 PetscCall(PetscLogObjectArrayGetRef(def->petsc_objects, o, &object)); 904 if (object->parent != -1) continue; // object with this id wasn't logged, probably a PetscLogHandler 905 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Parent ID: %d Memory: %d\n", object->parent, (int)object->mem)); 906 if (!object->name[0]) { 907 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Name\n")); 908 } else { 909 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Name: %s\n", object->name)); 910 } 911 if (!object->info[0]) { 912 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "No Info\n")); 913 } else { 914 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Info: %s\n", object->info)); 915 } 916 } 917 } 918 /* Output events */ 919 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "Event log:\n")); 920 PetscCall(PetscLogHandlerGetState(handler, &state)); 921 PetscCall(PetscLogStateGetNumEvents(state, &num_events)); 922 PetscCall(PetscLogStateGetCurrentStage(state, &curStage)); 923 for (event = 0; event < num_events; event++) { 924 PetscEventPerfInfo *event_info; 925 926 PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, curStage, event, &event_info)); 927 if (event_info->time != 0.0) flops = event_info->flops / event_info->time; 928 else flops = 0.0; 929 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%d %16d %16g %16g %16g\n", event, event_info->count, event_info->flops, event_info->time, flops)); 930 } 931 PetscCall(PetscFClose(PETSC_COMM_SELF, fd)); 932 PetscFunctionReturn(PETSC_SUCCESS); 933 } 934 935 /* 936 PetscLogView_Detailed - Each process prints the times for its own events 937 938 */ 939 static PetscErrorCode PetscLogHandlerView_Default_Detailed(PetscLogHandler handler, PetscViewer viewer) 940 { 941 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 942 PetscLogDouble locTotalTime, numRed, maxMem; 943 PetscInt numStages, numEvents; 944 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 945 PetscMPIInt rank, size; 946 PetscLogGlobalNames global_stages, global_events; 947 PetscLogState state; 948 PetscEventPerfInfo zero_info; 949 950 PetscFunctionBegin; 951 PetscCall(PetscLogHandlerGetState(handler, &state)); 952 PetscCallMPI(MPI_Comm_size(comm, &size)); 953 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 954 /* Must preserve reduction count before we go on */ 955 numRed = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 956 /* Get the total elapsed time */ 957 PetscCall(PetscTime(&locTotalTime)); 958 locTotalTime -= petsc_BaseTime; 959 PetscCall(PetscViewerASCIIPrintf(viewer, "size = %d\n", size)); 960 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalTimes = {}\n")); 961 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessages = {}\n")); 962 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMessageLens = {}\n")); 963 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalReductions = {}\n")); 964 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalFlop = {}\n")); 965 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalObjects = {}\n")); 966 PetscCall(PetscViewerASCIIPrintf(viewer, "LocalMemory = {}\n")); 967 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 968 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 969 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 970 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 971 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 972 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages = {}\n")); 973 for (PetscInt stage = 0; stage < numStages; stage++) { 974 PetscInt stage_id; 975 const char *stage_name; 976 977 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 978 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 979 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"] = {}\n", stage_name)); 980 PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"summary\"] = {}\n", stage_name)); 981 for (PetscInt event = 0; event < numEvents; event++) { 982 PetscEventPerfInfo *eventInfo = &zero_info; 983 PetscBool is_zero = PETSC_FALSE; 984 PetscInt event_id; 985 const char *event_name; 986 987 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 988 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 989 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &eventInfo)); 990 is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE; 991 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm)); 992 if (!is_zero) { PetscCall(PetscViewerASCIIPrintf(viewer, "Stages[\"%s\"][\"%s\"] = {}\n", stage_name, event_name)); } 993 } 994 } 995 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 996 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 997 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalTimes[%d] = %g\n", rank, locTotalTime)); 998 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessages[%d] = %g\n", rank, (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct))); 999 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMessageLens[%d] = %g\n", rank, (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len))); 1000 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalReductions[%d] = %g\n", rank, numRed)); 1001 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalFlop[%d] = %g\n", rank, petsc_TotalFlops)); 1002 { 1003 PetscInt num_objects; 1004 1005 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 1006 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalObjects[%d] = %d\n", rank, (int)num_objects)); 1007 } 1008 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "LocalMemory[%d] = %g\n", rank, maxMem)); 1009 PetscCall(PetscViewerFlush(viewer)); 1010 for (PetscInt stage = 0; stage < numStages; stage++) { 1011 PetscEventPerfInfo *stage_perf_info = &zero_info; 1012 PetscInt stage_id; 1013 const char *stage_name; 1014 1015 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1016 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1017 if (stage_id >= 0) { 1018 PetscStagePerf *stage_info; 1019 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info)); 1020 stage_perf_info = &stage_info->perfInfo; 1021 } 1022 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"summary\"][%d] = {\"time\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g}\n", stage_name, rank, stage_perf_info->time, 1023 stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops)); 1024 for (PetscInt event = 0; event < numEvents; event++) { 1025 PetscEventPerfInfo *eventInfo = &zero_info; 1026 PetscBool is_zero = PETSC_FALSE; 1027 PetscInt event_id; 1028 const char *event_name; 1029 1030 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1031 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1032 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &eventInfo)); 1033 is_zero = eventInfo->count == 0 ? PETSC_TRUE : PETSC_FALSE; 1034 PetscCall(PetscMemcmp(eventInfo, &zero_info, sizeof(zero_info), &is_zero)); 1035 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm)); 1036 if (!is_zero) { 1037 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "Stages[\"%s\"][\"%s\"][%d] = {\"count\" : %d, \"time\" : %g, \"syncTime\" : %g, \"numMessages\" : %g, \"messageLength\" : %g, \"numReductions\" : %g, \"flop\" : %g", stage_name, event_name, rank, 1038 eventInfo->count, eventInfo->time, eventInfo->syncTime, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops)); 1039 if (eventInfo->dof[0] >= 0.) { 1040 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"dof\" : [")); 1041 for (PetscInt d = 0; d < 8; ++d) { 1042 if (d > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1043 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->dof[d])); 1044 } 1045 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1046 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", \"error\" : [")); 1047 for (PetscInt e = 0; e < 8; ++e) { 1048 if (e > 0) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ", ")); 1049 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%g", eventInfo->errors[e])); 1050 } 1051 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "]")); 1052 } 1053 } 1054 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "}\n")); 1055 } 1056 } 1057 PetscCall(PetscViewerFlush(viewer)); 1058 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1059 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1060 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1061 PetscFunctionReturn(PETSC_SUCCESS); 1062 } 1063 1064 /* 1065 PetscLogView_CSV - Each process prints the times for its own events in Comma-Separated Value Format 1066 */ 1067 static PetscErrorCode PetscLogHandlerView_Default_CSV(PetscLogHandler handler, PetscViewer viewer) 1068 { 1069 PetscLogDouble locTotalTime, maxMem; 1070 PetscInt numStages, numEvents, stage, event; 1071 MPI_Comm comm = PetscObjectComm((PetscObject)viewer); 1072 PetscMPIInt rank, size; 1073 PetscLogGlobalNames global_stages, global_events; 1074 PetscLogState state; 1075 PetscEventPerfInfo zero_info; 1076 1077 PetscFunctionBegin; 1078 PetscCall(PetscLogHandlerGetState(handler, &state)); 1079 PetscCallMPI(MPI_Comm_size(comm, &size)); 1080 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1081 /* Must preserve reduction count before we go on */ 1082 /* Get the total elapsed time */ 1083 PetscCall(PetscTime(&locTotalTime)); 1084 locTotalTime -= petsc_BaseTime; 1085 PetscCall(PetscMallocGetMaximumUsage(&maxMem)); 1086 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 1087 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 1088 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 1089 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 1090 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 1091 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1092 PetscCall(PetscViewerASCIIPrintf(viewer, "Stage Name,Event Name,Rank,Count,Time,Num Messages,Message Length,Num Reductions,FLOP,dof0,dof1,dof2,dof3,dof4,dof5,dof6,dof7,e0,e1,e2,e3,e4,e5,e6,e7,%d\n", size)); 1093 PetscCall(PetscViewerFlush(viewer)); 1094 for (stage = 0; stage < numStages; stage++) { 1095 PetscEventPerfInfo *stage_perf_info; 1096 PetscInt stage_id; 1097 const char *stage_name; 1098 1099 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1100 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1101 stage_perf_info = &zero_info; 1102 if (stage_id >= 0) { 1103 PetscStagePerf *stage_info; 1104 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_info)); 1105 stage_perf_info = &stage_info->perfInfo; 1106 } 1107 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,summary,%d,1,%g,%g,%g,%g,%g\n", stage_name, rank, stage_perf_info->time, stage_perf_info->numMessages, stage_perf_info->messageLength, stage_perf_info->numReductions, stage_perf_info->flops)); 1108 for (event = 0; event < numEvents; event++) { 1109 PetscEventPerfInfo *eventInfo = &zero_info; 1110 PetscBool is_zero = PETSC_FALSE; 1111 PetscInt event_id; 1112 const char *event_name; 1113 1114 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1115 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1116 if (event_id >= 0 && stage_id >= 0) PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &eventInfo)); 1117 PetscCall(PetscMemcmp(eventInfo, &zero_info, sizeof(zero_info), &is_zero)); 1118 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm)); 1119 if (!is_zero) { 1120 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s,%s,%d,%d,%g,%g,%g,%g,%g", stage_name, event_name, rank, eventInfo->count, eventInfo->time, eventInfo->numMessages, eventInfo->messageLength, eventInfo->numReductions, eventInfo->flops)); 1121 if (eventInfo->dof[0] >= 0.) { 1122 for (PetscInt d = 0; d < 8; ++d) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->dof[d])); 1123 for (PetscInt e = 0; e < 8; ++e) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, ",%g", eventInfo->errors[e])); 1124 } 1125 } 1126 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1127 } 1128 } 1129 PetscCall(PetscViewerFlush(viewer)); 1130 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1131 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1132 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1133 PetscFunctionReturn(PETSC_SUCCESS); 1134 } 1135 1136 static PetscErrorCode PetscLogViewWarnSync(MPI_Comm comm, FILE *fd) 1137 { 1138 PetscFunctionBegin; 1139 if (!PetscLogSyncOn) PetscFunctionReturn(PETSC_SUCCESS); 1140 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1141 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1142 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1143 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1144 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1145 PetscCall(PetscFPrintf(comm, fd, " # This program was run with logging synchronization. #\n")); 1146 PetscCall(PetscFPrintf(comm, fd, " # This option provides more meaningful imbalance #\n")); 1147 PetscCall(PetscFPrintf(comm, fd, " # figures at the expense of slowing things down and #\n")); 1148 PetscCall(PetscFPrintf(comm, fd, " # providing a distorted view of the overall runtime. #\n")); 1149 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1150 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1151 PetscFunctionReturn(PETSC_SUCCESS); 1152 } 1153 1154 static PetscErrorCode PetscLogViewWarnDebugging(MPI_Comm comm, FILE *fd) 1155 { 1156 PetscFunctionBegin; 1157 if (PetscDefined(USE_DEBUG)) { 1158 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1159 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1160 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1161 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1162 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1163 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with a debugging option. #\n")); 1164 PetscCall(PetscFPrintf(comm, fd, " # To get timing results run ./configure #\n")); 1165 PetscCall(PetscFPrintf(comm, fd, " # using --with-debugging=no, the performance will #\n")); 1166 PetscCall(PetscFPrintf(comm, fd, " # be generally two or three times faster. #\n")); 1167 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1168 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1169 } 1170 PetscFunctionReturn(PETSC_SUCCESS); 1171 } 1172 1173 static PetscErrorCode PetscLogViewWarnNoGpuAwareMpi(MPI_Comm comm, FILE *fd) 1174 { 1175 #if defined(PETSC_HAVE_DEVICE) 1176 PetscMPIInt size; 1177 PetscBool deviceInitialized = PETSC_FALSE; 1178 1179 PetscFunctionBegin; 1180 PetscCallMPI(MPI_Comm_size(comm, &size)); 1181 for (int i = PETSC_DEVICE_HOST + 1; i < PETSC_DEVICE_MAX; ++i) { 1182 const PetscDeviceType dtype = PetscDeviceTypeCast(i); 1183 if (PetscDeviceInitialized(dtype)) { /* a non-host device was initialized */ 1184 deviceInitialized = PETSC_TRUE; 1185 break; 1186 } 1187 } 1188 /* the last condition says petsc is configured with device but it is a pure CPU run, so don't print misleading warnings */ 1189 if (use_gpu_aware_mpi || size == 1 || !deviceInitialized) PetscFunctionReturn(PETSC_SUCCESS); 1190 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1191 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1192 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1193 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1194 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1195 PetscCall(PetscFPrintf(comm, fd, " # This code was compiled with GPU support and you've #\n")); 1196 PetscCall(PetscFPrintf(comm, fd, " # created PETSc/GPU objects, but you intentionally #\n")); 1197 PetscCall(PetscFPrintf(comm, fd, " # used -use_gpu_aware_mpi 0, requiring PETSc to copy #\n")); 1198 PetscCall(PetscFPrintf(comm, fd, " # additional data between the GPU and CPU. To obtain #\n")); 1199 PetscCall(PetscFPrintf(comm, fd, " # meaningful timing results on multi-rank runs, use #\n")); 1200 PetscCall(PetscFPrintf(comm, fd, " # GPU-aware MPI instead. #\n")); 1201 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1202 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1203 PetscFunctionReturn(PETSC_SUCCESS); 1204 #else 1205 return PETSC_SUCCESS; 1206 #endif 1207 } 1208 1209 static PetscErrorCode PetscLogViewWarnGpuTime(MPI_Comm comm, FILE *fd) 1210 { 1211 #if defined(PETSC_HAVE_DEVICE) 1212 1213 PetscFunctionBegin; 1214 if (!PetscLogGpuTimeFlag || petsc_gflops == 0) PetscFunctionReturn(PETSC_SUCCESS); 1215 PetscCall(PetscFPrintf(comm, fd, "\n\n")); 1216 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n")); 1217 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1218 PetscCall(PetscFPrintf(comm, fd, " # WARNING!!! #\n")); 1219 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1220 PetscCall(PetscFPrintf(comm, fd, " # This code was run with -log_view_gpu_time #\n")); 1221 PetscCall(PetscFPrintf(comm, fd, " # This provides accurate timing within the GPU kernels #\n")); 1222 PetscCall(PetscFPrintf(comm, fd, " # but can slow down the entire computation by a #\n")); 1223 PetscCall(PetscFPrintf(comm, fd, " # measurable amount. For fastest runs we recommend #\n")); 1224 PetscCall(PetscFPrintf(comm, fd, " # not using this option. #\n")); 1225 PetscCall(PetscFPrintf(comm, fd, " # #\n")); 1226 PetscCall(PetscFPrintf(comm, fd, " ##########################################################\n\n\n")); 1227 PetscFunctionReturn(PETSC_SUCCESS); 1228 #else 1229 return PETSC_SUCCESS; 1230 #endif 1231 } 1232 1233 static PetscErrorCode PetscLogHandlerView_Default_Info(PetscLogHandler handler, PetscViewer viewer) 1234 { 1235 FILE *fd; 1236 PetscLogHandler_Default def = (PetscLogHandler_Default)handler->data; 1237 char arch[128], hostname[128], username[128], pname[PETSC_MAX_PATH_LEN], date[128]; 1238 PetscLogDouble locTotalTime, TotalTime, TotalFlops; 1239 PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; 1240 PetscLogDouble stageTime, flops, flopr, mem, mess, messLen, red; 1241 PetscLogDouble fracTime, fracFlops, fracMessages, fracLength, fracReductions, fracMess, fracMessLen, fracRed; 1242 PetscLogDouble fracStageTime, fracStageFlops, fracStageMess, fracStageMessLen, fracStageRed; 1243 PetscLogDouble min, max, tot, ratio, avg, x, y; 1244 PetscLogDouble minf, maxf, totf, ratf, mint, maxt, tott, ratt, ratC, totm, totml, totr, mal, malmax, emalmax; 1245 #if defined(PETSC_HAVE_DEVICE) 1246 PetscLogEvent KSP_Solve, SNES_Solve, TS_Step, TAO_Solve; /* These need to be fixed to be some events registered with certain objects */ 1247 PetscLogDouble cct, gct, csz, gsz, gmaxt, gflops, gflopr, fracgflops; 1248 #endif 1249 PetscMPIInt minC, maxC; 1250 PetscMPIInt size, rank; 1251 PetscBool *localStageUsed, *stageUsed; 1252 PetscBool *localStageVisible, *stageVisible; 1253 PetscInt numStages, numEvents; 1254 int stage, oclass; 1255 PetscLogEvent event; 1256 char version[256]; 1257 MPI_Comm comm; 1258 #if defined(PETSC_HAVE_DEVICE) 1259 PetscInt64 nas = 0x7FF0000000000002; 1260 #endif 1261 PetscLogGlobalNames global_stages, global_events; 1262 PetscEventPerfInfo zero_info; 1263 PetscLogState state; 1264 1265 PetscFunctionBegin; 1266 PetscCall(PetscLogHandlerGetState(handler, &state)); 1267 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 1268 PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm)); 1269 PetscCall(PetscViewerASCIIGetPointer(viewer, &fd)); 1270 PetscCallMPI(MPI_Comm_size(comm, &size)); 1271 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1272 /* Get the total elapsed time */ 1273 PetscCall(PetscTime(&locTotalTime)); 1274 locTotalTime -= petsc_BaseTime; 1275 1276 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1277 PetscCall(PetscFPrintf(comm, fd, "*** WIDEN YOUR WINDOW TO 160 CHARACTERS. Use 'enscript -r -fCourier9' to print this document ***\n")); 1278 PetscCall(PetscFPrintf(comm, fd, "****************************************************************************************************************************************************************\n")); 1279 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------ PETSc Performance Summary: ------------------------------------------------------------------\n\n")); 1280 PetscCall(PetscLogViewWarnSync(comm, fd)); 1281 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1282 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1283 PetscCall(PetscLogViewWarnGpuTime(comm, fd)); 1284 PetscCall(PetscGetArchType(arch, sizeof(arch))); 1285 PetscCall(PetscGetHostName(hostname, sizeof(hostname))); 1286 PetscCall(PetscGetUserName(username, sizeof(username))); 1287 PetscCall(PetscGetProgramName(pname, sizeof(pname))); 1288 PetscCall(PetscGetDate(date, sizeof(date))); 1289 PetscCall(PetscGetVersion(version, sizeof(version))); 1290 if (size == 1) { 1291 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processor, by %s %s\n", pname, arch, hostname, size, username, date)); 1292 } else { 1293 PetscCall(PetscFPrintf(comm, fd, "%s on a %s named %s with %d processors, by %s %s\n", pname, arch, hostname, size, username, date)); 1294 } 1295 #if defined(PETSC_HAVE_OPENMP) 1296 PetscCall(PetscFPrintf(comm, fd, "Using %" PetscInt_FMT " OpenMP threads\n", PetscNumOMPThreads)); 1297 #endif 1298 PetscCall(PetscFPrintf(comm, fd, "Using %s\n", version)); 1299 1300 /* Must preserve reduction count before we go on */ 1301 red = petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; 1302 1303 /* Calculate summary information */ 1304 PetscCall(PetscFPrintf(comm, fd, "\n Max Max/Min Avg Total\n")); 1305 /* Time */ 1306 PetscCall(MPIU_Allreduce(&locTotalTime, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1307 PetscCall(MPIU_Allreduce(&locTotalTime, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1308 PetscCall(MPIU_Allreduce(&locTotalTime, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1309 avg = tot / ((PetscLogDouble)size); 1310 if (min != 0.0) ratio = max / min; 1311 else ratio = 0.0; 1312 PetscCall(PetscFPrintf(comm, fd, "Time (sec): %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1313 TotalTime = tot; 1314 /* Objects */ 1315 { 1316 PetscInt num_objects; 1317 1318 PetscCall(PetscLogObjectArrayGetSize(def->petsc_objects, &num_objects, NULL)); 1319 avg = (PetscLogDouble)num_objects; 1320 } 1321 PetscCall(MPIU_Allreduce(&avg, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1322 PetscCall(MPIU_Allreduce(&avg, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1323 PetscCall(MPIU_Allreduce(&avg, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1324 avg = tot / ((PetscLogDouble)size); 1325 if (min != 0.0) ratio = max / min; 1326 else ratio = 0.0; 1327 PetscCall(PetscFPrintf(comm, fd, "Objects: %5.3e %7.3f %5.3e\n", max, ratio, avg)); 1328 /* Flops */ 1329 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1330 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1331 PetscCall(MPIU_Allreduce(&petsc_TotalFlops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1332 avg = tot / ((PetscLogDouble)size); 1333 if (min != 0.0) ratio = max / min; 1334 else ratio = 0.0; 1335 PetscCall(PetscFPrintf(comm, fd, "Flops: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1336 TotalFlops = tot; 1337 /* Flops/sec -- Must talk to Barry here */ 1338 if (locTotalTime != 0.0) flops = petsc_TotalFlops / locTotalTime; 1339 else flops = 0.0; 1340 PetscCall(MPIU_Allreduce(&flops, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1341 PetscCall(MPIU_Allreduce(&flops, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1342 PetscCall(MPIU_Allreduce(&flops, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1343 avg = tot / ((PetscLogDouble)size); 1344 if (min != 0.0) ratio = max / min; 1345 else ratio = 0.0; 1346 PetscCall(PetscFPrintf(comm, fd, "Flops/sec: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1347 /* Memory */ 1348 PetscCall(PetscMallocGetMaximumUsage(&mem)); 1349 if (mem > 0.0) { 1350 PetscCall(MPIU_Allreduce(&mem, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1351 PetscCall(MPIU_Allreduce(&mem, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1352 PetscCall(MPIU_Allreduce(&mem, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1353 avg = tot / ((PetscLogDouble)size); 1354 if (min != 0.0) ratio = max / min; 1355 else ratio = 0.0; 1356 PetscCall(PetscFPrintf(comm, fd, "Memory (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1357 } 1358 /* Messages */ 1359 mess = 0.5 * (petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct); 1360 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1361 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1362 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1363 avg = tot / ((PetscLogDouble)size); 1364 if (min != 0.0) ratio = max / min; 1365 else ratio = 0.0; 1366 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Count: %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1367 numMessages = tot; 1368 /* Message Lengths */ 1369 mess = 0.5 * (petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len); 1370 PetscCall(MPIU_Allreduce(&mess, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1371 PetscCall(MPIU_Allreduce(&mess, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1372 PetscCall(MPIU_Allreduce(&mess, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1373 if (numMessages != 0) avg = tot / numMessages; 1374 else avg = 0.0; 1375 if (min != 0.0) ratio = max / min; 1376 else ratio = 0.0; 1377 PetscCall(PetscFPrintf(comm, fd, "MPI Msg Len (bytes): %5.3e %7.3f %5.3e %5.3e\n", max, ratio, avg, tot)); 1378 messageLength = tot; 1379 /* Reductions */ 1380 PetscCall(MPIU_Allreduce(&red, &min, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1381 PetscCall(MPIU_Allreduce(&red, &max, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1382 PetscCall(MPIU_Allreduce(&red, &tot, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1383 if (min != 0.0) ratio = max / min; 1384 else ratio = 0.0; 1385 PetscCall(PetscFPrintf(comm, fd, "MPI Reductions: %5.3e %7.3f\n", max, ratio)); 1386 numReductions = red; /* wrong because uses count from process zero */ 1387 PetscCall(PetscFPrintf(comm, fd, "\nFlop counting convention: 1 flop = 1 real number operation of type (multiply/divide/add/subtract)\n")); 1388 PetscCall(PetscFPrintf(comm, fd, " e.g., VecAXPY() for real vectors of length N --> 2N flops\n")); 1389 PetscCall(PetscFPrintf(comm, fd, " and VecAXPY() for complex vectors of length N --> 8N flops\n")); 1390 1391 PetscCall(PetscLogRegistryCreateGlobalStageNames(comm, state->registry, &global_stages)); 1392 PetscCall(PetscLogRegistryCreateGlobalEventNames(comm, state->registry, &global_events)); 1393 PetscCall(PetscLogGlobalNamesGetSize(global_stages, NULL, &numStages)); 1394 PetscCall(PetscLogGlobalNamesGetSize(global_events, NULL, &numEvents)); 1395 PetscCall(PetscMemzero(&zero_info, sizeof(zero_info))); 1396 PetscCall(PetscMalloc1(numStages, &localStageUsed)); 1397 PetscCall(PetscMalloc1(numStages, &stageUsed)); 1398 PetscCall(PetscMalloc1(numStages, &localStageVisible)); 1399 PetscCall(PetscMalloc1(numStages, &stageVisible)); 1400 if (numStages > 0) { 1401 for (stage = 0; stage < numStages; stage++) { 1402 PetscInt stage_id; 1403 1404 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1405 if (stage_id >= 0) { 1406 PetscStagePerf *stage_info; 1407 1408 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_info)); 1409 localStageUsed[stage] = stage_info->used; 1410 localStageVisible[stage] = stage_info->perfInfo.visible; 1411 } else { 1412 localStageUsed[stage] = PETSC_FALSE; 1413 localStageVisible[stage] = PETSC_TRUE; 1414 } 1415 } 1416 PetscCall(MPIU_Allreduce(localStageUsed, stageUsed, numStages, MPIU_BOOL, MPI_LOR, comm)); 1417 PetscCall(MPIU_Allreduce(localStageVisible, stageVisible, numStages, MPIU_BOOL, MPI_LAND, comm)); 1418 for (stage = 0; stage < numStages; stage++) { 1419 if (stageUsed[stage] && stageVisible[stage]) { 1420 PetscCall(PetscFPrintf(comm, fd, "\nSummary of Stages: ----- Time ------ ----- Flop ------ --- Messages --- -- Message Lengths -- -- Reductions --\n")); 1421 PetscCall(PetscFPrintf(comm, fd, " Avg %%Total Avg %%Total Count %%Total Avg %%Total Count %%Total\n")); 1422 break; 1423 } 1424 } 1425 for (stage = 0; stage < numStages; stage++) { 1426 PetscInt stage_id; 1427 PetscEventPerfInfo *stage_info; 1428 const char *stage_name; 1429 1430 if (!(stageUsed[stage] && stageVisible[stage])) continue; 1431 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1432 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1433 stage_info = &zero_info; 1434 if (localStageUsed[stage]) { 1435 PetscStagePerf *stage_perf_info; 1436 1437 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage, &stage_perf_info)); 1438 stage_info = &stage_perf_info->perfInfo; 1439 } 1440 PetscCall(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1441 PetscCall(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1442 PetscCall(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1443 PetscCall(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1444 PetscCall(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1445 mess *= 0.5; 1446 messLen *= 0.5; 1447 red /= size; 1448 if (TotalTime != 0.0) fracTime = stageTime / TotalTime; 1449 else fracTime = 0.0; 1450 if (TotalFlops != 0.0) fracFlops = flops / TotalFlops; 1451 else fracFlops = 0.0; 1452 /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ 1453 if (numMessages != 0.0) fracMessages = mess / numMessages; 1454 else fracMessages = 0.0; 1455 if (mess != 0.0) avgMessLen = messLen / mess; 1456 else avgMessLen = 0.0; 1457 if (messageLength != 0.0) fracLength = messLen / messageLength; 1458 else fracLength = 0.0; 1459 if (numReductions != 0.0) fracReductions = red / numReductions; 1460 else fracReductions = 0.0; 1461 PetscCall(PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%%\n", stage, stage_name, stageTime / size, 100.0 * fracTime, flops, 100.0 * fracFlops, mess, 100.0 * fracMessages, avgMessLen, 100.0 * fracLength, red, 100.0 * fracReductions)); 1462 } 1463 } 1464 1465 PetscCall(PetscFPrintf(comm, fd, "\n------------------------------------------------------------------------------------------------------------------------\n")); 1466 PetscCall(PetscFPrintf(comm, fd, "See the 'Profiling' chapter of the users' manual for details on interpreting output.\n")); 1467 PetscCall(PetscFPrintf(comm, fd, "Phase summary info:\n")); 1468 PetscCall(PetscFPrintf(comm, fd, " Count: number of times phase was executed\n")); 1469 PetscCall(PetscFPrintf(comm, fd, " Time and Flop: Max - maximum over all processors\n")); 1470 PetscCall(PetscFPrintf(comm, fd, " Ratio - ratio of maximum to minimum over all processors\n")); 1471 PetscCall(PetscFPrintf(comm, fd, " Mess: number of messages sent\n")); 1472 PetscCall(PetscFPrintf(comm, fd, " AvgLen: average message length (bytes)\n")); 1473 PetscCall(PetscFPrintf(comm, fd, " Reduct: number of global reductions\n")); 1474 PetscCall(PetscFPrintf(comm, fd, " Global: entire computation\n")); 1475 PetscCall(PetscFPrintf(comm, fd, " Stage: stages of a computation. Set stages with PetscLogStagePush() and PetscLogStagePop().\n")); 1476 PetscCall(PetscFPrintf(comm, fd, " %%T - percent time in this phase %%F - percent flop in this phase\n")); 1477 PetscCall(PetscFPrintf(comm, fd, " %%M - percent messages in this phase %%L - percent message lengths in this phase\n")); 1478 PetscCall(PetscFPrintf(comm, fd, " %%R - percent reductions in this phase\n")); 1479 PetscCall(PetscFPrintf(comm, fd, " Total Mflop/s: 10e-6 * (sum of flop over all processors)/(max time over all processors)\n")); 1480 if (PetscLogMemory) { 1481 PetscCall(PetscFPrintf(comm, fd, " Memory usage is summed over all MPI processes, it is given in mega-bytes\n")); 1482 PetscCall(PetscFPrintf(comm, fd, " Malloc Mbytes: Memory allocated and kept during event (sum over all calls to event). May be negative\n")); 1483 PetscCall(PetscFPrintf(comm, fd, " EMalloc Mbytes: extra memory allocated during event and then freed (maximum over all calls to events). Never negative\n")); 1484 PetscCall(PetscFPrintf(comm, fd, " MMalloc Mbytes: Increase in high water mark of allocated memory (sum over all calls to event). Never negative\n")); 1485 PetscCall(PetscFPrintf(comm, fd, " RMI Mbytes: Increase in resident memory (sum over all calls to event)\n")); 1486 } 1487 #if defined(PETSC_HAVE_DEVICE) 1488 PetscCall(PetscFPrintf(comm, fd, " GPU Mflop/s: 10e-6 * (sum of flop on GPU over all processors)/(max GPU time over all processors)\n")); 1489 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Count: total number of CPU to GPU copies per processor\n")); 1490 PetscCall(PetscFPrintf(comm, fd, " CpuToGpu Size (Mbytes): 10e-6 * (total size of CPU to GPU copies per processor)\n")); 1491 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Count: total number of GPU to CPU copies per processor\n")); 1492 PetscCall(PetscFPrintf(comm, fd, " GpuToCpu Size (Mbytes): 10e-6 * (total size of GPU to CPU copies per processor)\n")); 1493 PetscCall(PetscFPrintf(comm, fd, " GPU %%F: percent flops on GPU in this event\n")); 1494 #endif 1495 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------\n")); 1496 1497 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1498 1499 /* Report events */ 1500 PetscCall(PetscFPrintf(comm, fd, "Event Count Time (sec) Flop --- Global --- --- Stage ---- Total")); 1501 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Malloc EMalloc MMalloc RMI")); 1502 #if defined(PETSC_HAVE_DEVICE) 1503 PetscCall(PetscFPrintf(comm, fd, " GPU - CpuToGpu - - GpuToCpu - GPU")); 1504 #endif 1505 PetscCall(PetscFPrintf(comm, fd, "\n")); 1506 PetscCall(PetscFPrintf(comm, fd, " Max Ratio Max Ratio Max Ratio Mess AvgLen Reduct %%T %%F %%M %%L %%R %%T %%F %%M %%L %%R Mflop/s")); 1507 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " Mbytes Mbytes Mbytes Mbytes")); 1508 #if defined(PETSC_HAVE_DEVICE) 1509 PetscCall(PetscFPrintf(comm, fd, " Mflop/s Count Size Count Size %%F")); 1510 #endif 1511 PetscCall(PetscFPrintf(comm, fd, "\n")); 1512 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1513 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1514 #if defined(PETSC_HAVE_DEVICE) 1515 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1516 #endif 1517 PetscCall(PetscFPrintf(comm, fd, "\n")); 1518 1519 #if defined(PETSC_HAVE_DEVICE) 1520 /* this indirect way of accessing these values is needed when PETSc is build with multiple libraries since the symbols are not in libpetscsys */ 1521 PetscCall(PetscLogStateGetEventFromName(state, "TAOSolve", &TAO_Solve)); 1522 PetscCall(PetscLogStateGetEventFromName(state, "TSStep", &TS_Step)); 1523 PetscCall(PetscLogStateGetEventFromName(state, "SNESSolve", &SNES_Solve)); 1524 PetscCall(PetscLogStateGetEventFromName(state, "KSPSolve", &KSP_Solve)); 1525 #endif 1526 1527 for (stage = 0; stage < numStages; stage++) { 1528 PetscInt stage_id; 1529 PetscEventPerfInfo *stage_info; 1530 const char *stage_name; 1531 1532 if (!(stageVisible[stage] && stageUsed[stage])) continue; 1533 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_stages, stage, &stage_id)); 1534 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1535 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stage_name)); 1536 stage_info = &zero_info; 1537 if (localStageUsed[stage]) { 1538 PetscStagePerf *stage_perf_info; 1539 1540 PetscCall(PetscLogHandlerDefaultGetStageInfo(handler, stage_id, &stage_perf_info)); 1541 stage_info = &stage_perf_info->perfInfo; 1542 } 1543 PetscCall(MPIU_Allreduce(&stage_info->time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1544 PetscCall(MPIU_Allreduce(&stage_info->flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1545 PetscCall(MPIU_Allreduce(&stage_info->numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1546 PetscCall(MPIU_Allreduce(&stage_info->messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1547 PetscCall(MPIU_Allreduce(&stage_info->numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1548 mess *= 0.5; 1549 messLen *= 0.5; 1550 red /= size; 1551 1552 for (event = 0; event < numEvents; event++) { 1553 PetscInt event_id; 1554 PetscEventPerfInfo *event_info = &zero_info; 1555 PetscBool is_zero = PETSC_FALSE; 1556 const char *event_name; 1557 1558 PetscCall(PetscLogGlobalNamesGlobalGetLocal(global_events, event, &event_id)); 1559 PetscCall(PetscLogGlobalNamesGlobalGetName(global_events, event, &event_name)); 1560 if (event_id >= 0 && stage_id >= 0) { PetscCall(PetscLogHandlerDefaultGetEventPerfInfo(handler, stage_id, event_id, &event_info)); } 1561 PetscCall(PetscMemcmp(event_info, &zero_info, sizeof(zero_info), &is_zero)); 1562 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &is_zero, 1, MPIU_BOOL, MPI_LAND, comm)); 1563 if (!is_zero) { 1564 flopr = event_info->flops; 1565 PetscCall(MPIU_Allreduce(&flopr, &minf, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1566 PetscCall(MPIU_Allreduce(&flopr, &maxf, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1567 PetscCall(MPIU_Allreduce(&event_info->flops, &totf, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1568 PetscCall(MPIU_Allreduce(&event_info->time, &mint, 1, MPIU_PETSCLOGDOUBLE, MPI_MIN, comm)); 1569 PetscCall(MPIU_Allreduce(&event_info->time, &maxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1570 PetscCall(MPIU_Allreduce(&event_info->time, &tott, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1571 PetscCall(MPIU_Allreduce(&event_info->numMessages, &totm, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1572 PetscCall(MPIU_Allreduce(&event_info->messageLength, &totml, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1573 PetscCall(MPIU_Allreduce(&event_info->numReductions, &totr, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1574 PetscCall(MPIU_Allreduce(&event_info->count, &minC, 1, MPI_INT, MPI_MIN, comm)); 1575 PetscCall(MPIU_Allreduce(&event_info->count, &maxC, 1, MPI_INT, MPI_MAX, comm)); 1576 if (PetscLogMemory) { 1577 PetscCall(MPIU_Allreduce(&event_info->memIncrease, &mem, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1578 PetscCall(MPIU_Allreduce(&event_info->mallocSpace, &mal, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1579 PetscCall(MPIU_Allreduce(&event_info->mallocIncrease, &malmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1580 PetscCall(MPIU_Allreduce(&event_info->mallocIncreaseEvent, &emalmax, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1581 } 1582 #if defined(PETSC_HAVE_DEVICE) 1583 PetscCall(MPIU_Allreduce(&event_info->CpuToGpuCount, &cct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1584 PetscCall(MPIU_Allreduce(&event_info->GpuToCpuCount, &gct, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1585 PetscCall(MPIU_Allreduce(&event_info->CpuToGpuSize, &csz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1586 PetscCall(MPIU_Allreduce(&event_info->GpuToCpuSize, &gsz, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1587 PetscCall(MPIU_Allreduce(&event_info->GpuFlops, &gflops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm)); 1588 PetscCall(MPIU_Allreduce(&event_info->GpuTime, &gmaxt, 1, MPIU_PETSCLOGDOUBLE, MPI_MAX, comm)); 1589 #endif 1590 if (mint < 0.0) { 1591 PetscCall(PetscFPrintf(comm, fd, "WARNING!!! Minimum time %g over all processors for %s is negative! This happens\n on some machines whose times cannot handle too rapid calls.!\n artificially changing minimum to zero.\n", mint, event_name)); 1592 mint = 0; 1593 } 1594 PetscCheck(minf >= 0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Minimum flop %g over all processors for %s is negative! Not possible!", minf, event_name); 1595 #if defined(PETSC_HAVE_DEVICE) 1596 /* Put NaN into the time for all events that may not be time accurately since they may happen asynchronously on the GPU */ 1597 if (!PetscLogGpuTimeFlag && petsc_gflops > 0) { 1598 memcpy(&gmaxt, &nas, sizeof(PetscLogDouble)); 1599 if (event_id != SNES_Solve && event_id != KSP_Solve && event_id != TS_Step && event_id != TAO_Solve) { 1600 memcpy(&mint, &nas, sizeof(PetscLogDouble)); 1601 memcpy(&maxt, &nas, sizeof(PetscLogDouble)); 1602 } 1603 } 1604 #endif 1605 totm *= 0.5; 1606 totml *= 0.5; 1607 totr /= size; 1608 1609 if (maxC != 0) { 1610 if (minC != 0) ratC = ((PetscLogDouble)maxC) / minC; 1611 else ratC = 0.0; 1612 if (mint != 0.0) ratt = maxt / mint; 1613 else ratt = 0.0; 1614 if (minf != 0.0) ratf = maxf / minf; 1615 else ratf = 0.0; 1616 if (TotalTime != 0.0) fracTime = tott / TotalTime; 1617 else fracTime = 0.0; 1618 if (TotalFlops != 0.0) fracFlops = totf / TotalFlops; 1619 else fracFlops = 0.0; 1620 if (stageTime != 0.0) fracStageTime = tott / stageTime; 1621 else fracStageTime = 0.0; 1622 if (flops != 0.0) fracStageFlops = totf / flops; 1623 else fracStageFlops = 0.0; 1624 if (numMessages != 0.0) fracMess = totm / numMessages; 1625 else fracMess = 0.0; 1626 if (messageLength != 0.0) fracMessLen = totml / messageLength; 1627 else fracMessLen = 0.0; 1628 if (numReductions != 0.0) fracRed = totr / numReductions; 1629 else fracRed = 0.0; 1630 if (mess != 0.0) fracStageMess = totm / mess; 1631 else fracStageMess = 0.0; 1632 if (messLen != 0.0) fracStageMessLen = totml / messLen; 1633 else fracStageMessLen = 0.0; 1634 if (red != 0.0) fracStageRed = totr / red; 1635 else fracStageRed = 0.0; 1636 if (totm != 0.0) totml /= totm; 1637 else totml = 0.0; 1638 if (maxt != 0.0) flopr = totf / maxt; 1639 else flopr = 0.0; 1640 if (fracStageTime > 1.0 || fracStageFlops > 1.0 || fracStageMess > 1.0 || fracStageMessLen > 1.0 || fracStageRed > 1.0) 1641 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f Multiple stages %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, PetscAbs(flopr) / 1.0e6)); 1642 else 1643 PetscCall(PetscFPrintf(comm, fd, "%-16s %7d %3.1f %5.4e %3.1f %3.2e %3.1f %2.1e %2.1e %2.1e %2.0f %2.0f %2.0f %2.0f %2.0f %3.0f %2.0f %2.0f %2.0f %2.0f %5.0f", event_name, maxC, ratC, maxt, ratt, maxf, ratf, totm, totml, totr, 100.0 * fracTime, 100.0 * fracFlops, 100.0 * fracMess, 100.0 * fracMessLen, 100.0 * fracRed, 100.0 * fracStageTime, 100.0 * fracStageFlops, 100.0 * fracStageMess, 100.0 * fracStageMessLen, 100.0 * fracStageRed, PetscAbs(flopr) / 1.0e6)); 1644 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, " %5.0f %5.0f %5.0f %5.0f", mal / 1.0e6, emalmax / 1.0e6, malmax / 1.0e6, mem / 1.0e6)); 1645 #if defined(PETSC_HAVE_DEVICE) 1646 if (totf != 0.0) fracgflops = gflops / totf; 1647 else fracgflops = 0.0; 1648 if (gmaxt != 0.0) gflopr = gflops / gmaxt; 1649 else gflopr = 0.0; 1650 PetscCall(PetscFPrintf(comm, fd, " %5.0f %4.0f %3.2e %4.0f %3.2e % 2.0f", PetscAbs(gflopr) / 1.0e6, cct / size, csz / (1.0e6 * size), gct / size, gsz / (1.0e6 * size), 100.0 * fracgflops)); 1651 #endif 1652 PetscCall(PetscFPrintf(comm, fd, "\n")); 1653 } 1654 } 1655 } 1656 } 1657 1658 /* Memory usage and object creation */ 1659 PetscCall(PetscFPrintf(comm, fd, "------------------------------------------------------------------------------------------------------------------------")); 1660 if (PetscLogMemory) PetscCall(PetscFPrintf(comm, fd, "-----------------------------")); 1661 #if defined(PETSC_HAVE_DEVICE) 1662 PetscCall(PetscFPrintf(comm, fd, "---------------------------------------")); 1663 #endif 1664 PetscCall(PetscFPrintf(comm, fd, "\n")); 1665 PetscCall(PetscFPrintf(comm, fd, "\n")); 1666 1667 /* Right now, only stages on the first processor are reported here, meaning only objects associated with 1668 the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then 1669 stats for stages local to processor sets. 1670 */ 1671 /* We should figure out the longest object name here (now 20 characters) */ 1672 PetscCall(PetscFPrintf(comm, fd, "Object Type Creations Destructions. Reports information only for process 0.\n")); 1673 for (stage = 0; stage < numStages; stage++) { 1674 const char *stage_name; 1675 1676 PetscCall(PetscLogGlobalNamesGlobalGetName(global_stages, stage, &stage_name)); 1677 PetscCall(PetscFPrintf(comm, fd, "\n--- Event Stage %d: %s\n\n", stage, stage_name)); 1678 if (localStageUsed[stage]) { 1679 PetscInt num_classes; 1680 1681 PetscCall(PetscLogStateGetNumClasses(state, &num_classes)); 1682 for (oclass = 0; oclass < num_classes; oclass++) { 1683 PetscClassPerf *class_perf_info; 1684 1685 PetscCall(PetscLogHandlerDefaultGetClassPerf(handler, stage, oclass, &class_perf_info)); 1686 if ((class_perf_info->creations > 0) || (class_perf_info->destructions > 0)) { 1687 PetscLogClassInfo class_reg_info; 1688 1689 PetscCall(PetscLogStateClassGetInfo(state, oclass, &class_reg_info)); 1690 PetscCall(PetscFPrintf(comm, fd, "%20s %5d %5d\n", class_reg_info.name, class_perf_info->creations, class_perf_info->destructions)); 1691 } 1692 } 1693 } 1694 } 1695 1696 PetscCall(PetscFree(localStageUsed)); 1697 PetscCall(PetscFree(stageUsed)); 1698 PetscCall(PetscFree(localStageVisible)); 1699 PetscCall(PetscFree(stageVisible)); 1700 PetscCall(PetscLogGlobalNamesDestroy(&global_stages)); 1701 PetscCall(PetscLogGlobalNamesDestroy(&global_events)); 1702 1703 /* Information unrelated to this particular run */ 1704 PetscCall(PetscFPrintf(comm, fd, "========================================================================================================================\n")); 1705 PetscCall(PetscTime(&y)); 1706 PetscCall(PetscTime(&x)); 1707 PetscCall(PetscTime(&y)); 1708 PetscCall(PetscTime(&y)); 1709 PetscCall(PetscTime(&y)); 1710 PetscCall(PetscTime(&y)); 1711 PetscCall(PetscTime(&y)); 1712 PetscCall(PetscTime(&y)); 1713 PetscCall(PetscTime(&y)); 1714 PetscCall(PetscTime(&y)); 1715 PetscCall(PetscTime(&y)); 1716 PetscCall(PetscTime(&y)); 1717 PetscCall(PetscFPrintf(comm, fd, "Average time to get PetscTime(): %g\n", (y - x) / 10.0)); 1718 /* MPI information */ 1719 if (size > 1) { 1720 MPI_Status status; 1721 PetscMPIInt tag; 1722 MPI_Comm newcomm; 1723 1724 PetscCallMPI(MPI_Barrier(comm)); 1725 PetscCall(PetscTime(&x)); 1726 PetscCallMPI(MPI_Barrier(comm)); 1727 PetscCallMPI(MPI_Barrier(comm)); 1728 PetscCallMPI(MPI_Barrier(comm)); 1729 PetscCallMPI(MPI_Barrier(comm)); 1730 PetscCallMPI(MPI_Barrier(comm)); 1731 PetscCall(PetscTime(&y)); 1732 PetscCall(PetscFPrintf(comm, fd, "Average time for MPI_Barrier(): %g\n", (y - x) / 5.0)); 1733 PetscCall(PetscCommDuplicate(comm, &newcomm, &tag)); 1734 PetscCallMPI(MPI_Barrier(comm)); 1735 if (rank) { 1736 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, rank - 1, tag, newcomm, &status)); 1737 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, (rank + 1) % size, tag, newcomm)); 1738 } else { 1739 PetscCall(PetscTime(&x)); 1740 PetscCallMPI(MPI_Send(NULL, 0, MPI_INT, 1, tag, newcomm)); 1741 PetscCallMPI(MPI_Recv(NULL, 0, MPI_INT, size - 1, tag, newcomm, &status)); 1742 PetscCall(PetscTime(&y)); 1743 PetscCall(PetscFPrintf(comm, fd, "Average time for zero size MPI_Send(): %g\n", (y - x) / size)); 1744 } 1745 PetscCall(PetscCommDestroy(&newcomm)); 1746 } 1747 PetscCall(PetscOptionsView(NULL, viewer)); 1748 1749 /* Machine and compile information */ 1750 if (PetscDefined(USE_FORTRAN_KERNELS)) { 1751 PetscCall(PetscFPrintf(comm, fd, "Compiled with FORTRAN kernels\n")); 1752 } else { 1753 PetscCall(PetscFPrintf(comm, fd, "Compiled without FORTRAN kernels\n")); 1754 } 1755 if (PetscDefined(USE_64BIT_INDICES)) { 1756 PetscCall(PetscFPrintf(comm, fd, "Compiled with 64-bit PetscInt\n")); 1757 } else if (PetscDefined(USE___FLOAT128)) { 1758 PetscCall(PetscFPrintf(comm, fd, "Compiled with 32-bit PetscInt\n")); 1759 } 1760 if (PetscDefined(USE_REAL_SINGLE)) { 1761 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision PetscScalar and PetscReal\n")); 1762 } else if (PetscDefined(USE___FLOAT128)) { 1763 PetscCall(PetscFPrintf(comm, fd, "Compiled with 128 bit precision PetscScalar and PetscReal\n")); 1764 } 1765 if (PetscDefined(USE_REAL_MAT_SINGLE)) { 1766 PetscCall(PetscFPrintf(comm, fd, "Compiled with single precision matrices\n")); 1767 } else { 1768 PetscCall(PetscFPrintf(comm, fd, "Compiled with full precision matrices (default)\n")); 1769 } 1770 PetscCall(PetscFPrintf(comm, fd, "sizeof(short) %d sizeof(int) %d sizeof(long) %d sizeof(void*) %d sizeof(PetscScalar) %d sizeof(PetscInt) %d\n", (int)sizeof(short), (int)sizeof(int), (int)sizeof(long), (int)sizeof(void *), (int)sizeof(PetscScalar), (int)sizeof(PetscInt))); 1771 1772 PetscCall(PetscFPrintf(comm, fd, "Configure options: %s", petscconfigureoptions)); 1773 PetscCall(PetscFPrintf(comm, fd, "%s", petscmachineinfo)); 1774 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerinfo)); 1775 PetscCall(PetscFPrintf(comm, fd, "%s", petsccompilerflagsinfo)); 1776 PetscCall(PetscFPrintf(comm, fd, "%s", petsclinkerinfo)); 1777 1778 /* Cleanup */ 1779 PetscCall(PetscFPrintf(comm, fd, "\n")); 1780 PetscCall(PetscLogViewWarnNoGpuAwareMpi(comm, fd)); 1781 PetscCall(PetscLogViewWarnDebugging(comm, fd)); 1782 PetscCall(PetscFPTrapPop()); 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 static PetscErrorCode PetscLogHandlerView_Default(PetscLogHandler handler, PetscViewer viewer) 1787 { 1788 PetscViewerFormat format; 1789 PetscFunctionBegin; 1790 PetscCall(PetscViewerGetFormat(viewer, &format)); 1791 if (format == PETSC_VIEWER_DEFAULT || format == PETSC_VIEWER_ASCII_INFO) { 1792 PetscCall(PetscLogHandlerView_Default_Info(handler, viewer)); 1793 } else if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1794 PetscCall(PetscLogHandlerView_Default_Detailed(handler, viewer)); 1795 } else if (format == PETSC_VIEWER_ASCII_CSV) { 1796 PetscCall(PetscLogHandlerView_Default_CSV(handler, viewer)); 1797 } 1798 PetscFunctionReturn(PETSC_SUCCESS); 1799 } 1800 1801 /*MC 1802 PETSC_LOG_HANDLER_DEFAULT - PETSC_LOG_DEFAULT = "default" - A `PetscLogHandler` that collects data for PETSc 1803 default profiling log viewers (`PetscLogView()` and `PetscLogDump()`). 1804 1805 Options Database Keys: 1806 + -log_include_actions - include a growing list of actions (event beginnings and endings, object creations and destructions) in `PetscLogDump()` (`PetscLogActions()`). 1807 - -log_include_objects - include a growing list of object creations and destructions in `PetscLogDump()` (`PetscLogObjects()`). 1808 1809 Level: developer 1810 1811 .seealso: [](ch_profiling), `PetscLogHandler` 1812 M*/ 1813 1814 PETSC_INTERN PetscErrorCode PetscLogHandlerCreate_Default(PetscLogHandler handler) 1815 { 1816 PetscFunctionBegin; 1817 PetscCall(PetscLogHandlerContextCreate_Default((PetscLogHandler_Default *)&handler->data)); 1818 handler->ops->destroy = PetscLogHandlerDestroy_Default; 1819 handler->ops->eventbegin = PetscLogHandlerEventBegin_Default; 1820 handler->ops->eventend = PetscLogHandlerEventEnd_Default; 1821 handler->ops->eventsync = PetscLogHandlerEventSync_Default; 1822 handler->ops->objectcreate = PetscLogHandlerObjectCreate_Default; 1823 handler->ops->objectdestroy = PetscLogHandlerObjectDestroy_Default; 1824 handler->ops->stagepush = PetscLogHandlerStagePush_Default; 1825 handler->ops->stagepop = PetscLogHandlerStagePop_Default; 1826 handler->ops->view = PetscLogHandlerView_Default; 1827 PetscFunctionReturn(PETSC_SUCCESS); 1828 } 1829