1 /* 2 Defines profile/logging in PETSc. 3 */ 4 #ifndef PETSCLOG_H 5 #define PETSCLOG_H 6 7 #include <petscsys.h> 8 #include <petsctime.h> 9 10 /* SUBMANSEC = Sys */ 11 12 /* General logging of information; different from event logging */ 13 PETSC_EXTERN PetscErrorCode PetscInfo_Private(const char[], PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(3, 4); 14 #if defined(PETSC_USE_INFO) 15 #define PetscInfo(A, ...) PetscInfo_Private(PETSC_FUNCTION_NAME, ((PetscObject)A), __VA_ARGS__) 16 #else 17 #define PetscInfo(A, ...) 0 18 #endif 19 20 #define PetscInfo1(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 21 #define PetscInfo2(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 22 #define PetscInfo3(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 23 #define PetscInfo4(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 24 #define PetscInfo5(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 25 #define PetscInfo6(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 26 #define PetscInfo7(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 27 #define PetscInfo8(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 28 #define PetscInfo9(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 29 30 /*E 31 PetscInfoCommFlag - Describes the method by which to filter `PetscInfo()` by communicator size 32 33 Used as an input for `PetscInfoSetFilterCommSelf()` 34 35 $ `PETSC_INFO_COMM_ALL` - Default uninitialized value. `PetscInfo()` will not filter based on 36 communicator size (i.e. will print for all communicators) 37 $ `PETSC_INFO_COMM_NO_SELF` - `PetscInfo()` will NOT print for communicators with size = 1 (i.e. *_COMM_SELF) 38 $ `PETSC_INFO_COMM_ONLY_SELF` - `PetscInfo()` will ONLY print for communicators with size = 1 39 40 Level: intermediate 41 42 .seealso: `PetscInfo()`, `PetscInfoSetFromOptions()`, `PetscInfoSetFilterCommSelf()` 43 E*/ 44 typedef enum { 45 PETSC_INFO_COMM_ALL = -1, 46 PETSC_INFO_COMM_NO_SELF = 0, 47 PETSC_INFO_COMM_ONLY_SELF = 1 48 } PetscInfoCommFlag; 49 50 PETSC_EXTERN const char *const PetscInfoCommFlags[]; 51 PETSC_EXTERN PetscErrorCode PetscInfoDeactivateClass(PetscClassId); 52 PETSC_EXTERN PetscErrorCode PetscInfoActivateClass(PetscClassId); 53 PETSC_EXTERN PetscErrorCode PetscInfoEnabled(PetscClassId, PetscBool *); 54 PETSC_EXTERN PetscErrorCode PetscInfoAllow(PetscBool); 55 PETSC_EXTERN PetscErrorCode PetscInfoSetFile(const char[], const char[]); 56 PETSC_EXTERN PetscErrorCode PetscInfoGetFile(char **, FILE **); 57 PETSC_EXTERN PetscErrorCode PetscInfoSetClasses(PetscBool, PetscInt, const char *const *); 58 PETSC_EXTERN PetscErrorCode PetscInfoGetClass(const char *, PetscBool *); 59 PETSC_EXTERN PetscErrorCode PetscInfoGetInfo(PetscBool *, PetscBool *, PetscBool *, PetscBool *, PetscInfoCommFlag *); 60 PETSC_EXTERN PetscErrorCode PetscInfoProcessClass(const char[], PetscInt, const PetscClassId[]); 61 PETSC_EXTERN PetscErrorCode PetscInfoSetFilterCommSelf(PetscInfoCommFlag); 62 PETSC_EXTERN PetscErrorCode PetscInfoSetFromOptions(PetscOptions); 63 PETSC_EXTERN PetscErrorCode PetscInfoDestroy(void); 64 PETSC_EXTERN PetscBool PetscLogPrintInfo; /* if true, indicates PetscInfo() is turned on */ 65 66 /*MC 67 PetscLogEvent - id used to identify PETSc or user events which timed portions (blocks of executable) 68 code. 69 70 Level: intermediate 71 72 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStage` 73 M*/ 74 typedef int PetscLogEvent; 75 76 /*MC 77 PetscLogStage - id used to identify user stages (phases, sections) of runs - for logging 78 79 Level: intermediate 80 81 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEvent` 82 M*/ 83 typedef int PetscLogStage; 84 85 #define PETSC_EVENT 1311311 86 PETSC_EXTERN PetscLogEvent PETSC_LARGEST_EVENT; 87 88 /* Global flop counter */ 89 PETSC_EXTERN PetscLogDouble petsc_TotalFlops; 90 PETSC_EXTERN PetscLogDouble petsc_tmp_flops; 91 92 /* We must make the following structures available to access the event 93 activation flags in the PetscLogEventBegin/End() macros. These are not part of the PETSc public 94 API and are not intended to be used by other parts of PETSc or by users. 95 96 The code that manipulates these structures is in src/sys/logging/utils. 97 */ 98 typedef struct _n_PetscIntStack *PetscIntStack; 99 100 /* -----------------------------------------------------------------------------------------------------*/ 101 /* 102 PetscClassRegInfo, PetscClassPerfInfo - Each class has two data structures associated with it. The first has 103 static information about it, the second collects statistics on how many objects of the class are created, 104 how much memory they use, etc. 105 106 PetscClassRegLog, PetscClassPerfLog - arrays of the PetscClassRegInfo and PetscClassPerfInfo for all classes. 107 */ 108 typedef struct { 109 char *name; /* The class name */ 110 PetscClassId classid; /* The integer identifying this class */ 111 } PetscClassRegInfo; 112 113 typedef struct { 114 PetscClassId id; /* The integer identifying this class */ 115 int creations; /* The number of objects of this class created */ 116 int destructions; /* The number of objects of this class destroyed */ 117 PetscLogDouble mem; /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */ 118 PetscLogDouble descMem; /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */ 119 } PetscClassPerfInfo; 120 121 typedef struct _n_PetscClassRegLog *PetscClassRegLog; 122 struct _n_PetscClassRegLog { 123 int numClasses; /* The number of classes registered */ 124 int maxClasses; /* The maximum number of classes */ 125 PetscClassRegInfo *classInfo; /* The structure for class information (classids are monotonicly increasing) */ 126 }; 127 128 typedef struct _n_PetscClassPerfLog *PetscClassPerfLog; 129 struct _n_PetscClassPerfLog { 130 int numClasses; /* The number of logging classes */ 131 int maxClasses; /* The maximum number of classes */ 132 PetscClassPerfInfo *classInfo; /* The structure for class information (classids are monotonicly increasing) */ 133 }; 134 /* -----------------------------------------------------------------------------------------------------*/ 135 /* 136 PetscEventRegInfo, PetscEventPerfInfo - Each event has two data structures associated with it. The first has 137 static information about it, the second collects statistics on how many times the event is used, how 138 much time it takes, etc. 139 140 PetscEventRegLog, PetscEventPerfLog - an array of all PetscEventRegInfo and PetscEventPerfInfo for all events. There is one 141 of these for each stage. 142 143 */ 144 typedef struct { 145 char *name; /* The name of this event */ 146 PetscClassId classid; /* The class the event is associated with */ 147 PetscBool collective; /* Flag this event as collective */ 148 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 149 void *timer; /* Associated external tool timer for this event */ 150 #endif 151 #if defined(PETSC_HAVE_MPE) 152 int mpe_id_begin; /* MPE IDs that define the event */ 153 int mpe_id_end; 154 #endif 155 } PetscEventRegInfo; 156 157 typedef struct { 158 int id; /* The integer identifying this event */ 159 PetscBool active; /* The flag to activate logging */ 160 PetscBool visible; /* The flag to print info in summary */ 161 int depth; /* The nesting depth of the event call */ 162 int count; /* The number of times this event was executed */ 163 PetscLogDouble flops, flops2, flopsTmp; /* The flops and flops^2 used in this event */ 164 PetscLogDouble time, time2, timeTmp; /* The time and time^2 taken for this event */ 165 PetscLogDouble syncTime; /* The synchronization barrier time */ 166 PetscLogDouble dof[8]; /* The number of degrees of freedom associated with this event */ 167 PetscLogDouble errors[8]; /* The errors (user-defined) associated with this event */ 168 PetscLogDouble numMessages; /* The number of messages in this event */ 169 PetscLogDouble messageLength; /* The total message lengths in this event */ 170 PetscLogDouble numReductions; /* The number of reductions in this event */ 171 PetscLogDouble memIncrease; /* How much the resident memory has increased in this event */ 172 PetscLogDouble mallocIncrease; /* How much the maximum malloced space has increased in this event */ 173 PetscLogDouble mallocSpace; /* How much the space was malloced and kept during this event */ 174 PetscLogDouble mallocIncreaseEvent; /* Maximum of the high water mark with in event minus memory available at the end of the event */ 175 #if defined(PETSC_HAVE_DEVICE) 176 PetscLogDouble CpuToGpuCount; /* The total number of CPU to GPU copies */ 177 PetscLogDouble GpuToCpuCount; /* The total number of GPU to CPU copies */ 178 PetscLogDouble CpuToGpuSize; /* The total size of CPU to GPU copies */ 179 PetscLogDouble GpuToCpuSize; /* The total size of GPU to CPU copies */ 180 PetscLogDouble GpuFlops; /* The flops done on a GPU in this event */ 181 PetscLogDouble GpuTime; /* The time spent on a GPU in this event */ 182 #endif 183 } PetscEventPerfInfo; 184 185 typedef struct _n_PetscEventRegLog *PetscEventRegLog; 186 struct _n_PetscEventRegLog { 187 int numEvents; /* The number of registered events */ 188 int maxEvents; /* The maximum number of events */ 189 PetscEventRegInfo *eventInfo; /* The registration information for each event */ 190 }; 191 192 typedef struct _n_PetscEventPerfLog *PetscEventPerfLog; 193 struct _n_PetscEventPerfLog { 194 int numEvents; /* The number of logging events */ 195 int maxEvents; /* The maximum number of events */ 196 PetscEventPerfInfo *eventInfo; /* The performance information for each event */ 197 }; 198 /* ------------------------------------------------------------------------------------------------------------*/ 199 /* 200 PetscStageInfo - Contains all the information about a particular stage. 201 202 PetscStageLog - An array of PetscStageInfo for each registered stage. There is a single one of these in the code. 203 */ 204 typedef struct _PetscStageInfo { 205 char *name; /* The stage name */ 206 PetscBool used; /* The stage was pushed on this processor */ 207 PetscEventPerfInfo perfInfo; /* The stage performance information */ 208 PetscEventPerfLog eventLog; /* The event information for this stage */ 209 PetscClassPerfLog classLog; /* The class information for this stage */ 210 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 211 void *timer; /* Associated external tool timer for this stage */ 212 #endif 213 } PetscStageInfo; 214 215 typedef struct _n_PetscStageLog *PetscStageLog; 216 struct _n_PetscStageLog { 217 int numStages; /* The number of registered stages */ 218 int maxStages; /* The maximum number of stages */ 219 PetscIntStack stack; /* The stack for active stages */ 220 int curStage; /* The current stage (only used in macros so we don't call PetscIntStackTop) */ 221 PetscStageInfo *stageInfo; /* The information for each stage */ 222 PetscEventRegLog eventLog; /* The registered events */ 223 PetscClassRegLog classLog; /* The registered classes */ 224 }; 225 /* -----------------------------------------------------------------------------------------------------*/ 226 227 PETSC_DEPRECATED_FUNCTION("PetscLogObjectParent() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectParent(PetscObject o, PetscObject p) 228 { 229 (void)o; 230 (void)p; 231 return 0; 232 } 233 234 PETSC_DEPRECATED_FUNCTION("PetscLogObjectMemory() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectMemory(PetscObject o, PetscLogDouble m) 235 { 236 (void)o; 237 (void)m; 238 return 0; 239 } 240 241 #if defined(PETSC_USE_LOG) /* --- Logging is turned on --------------------------------*/ 242 PETSC_EXTERN PetscStageLog petsc_stageLog; 243 PETSC_EXTERN PetscErrorCode PetscLogGetStageLog(PetscStageLog *); 244 PETSC_EXTERN PetscErrorCode PetscStageLogGetCurrent(PetscStageLog, int *); 245 PETSC_EXTERN PetscErrorCode PetscStageLogGetEventPerfLog(PetscStageLog, int, PetscEventPerfLog *); 246 247 /* 248 Flop counting: We count each arithmetic operation (e.g., addition, multiplication) separately. 249 250 For the complex numbers version, note that 251 1 complex addition = 2 flops 252 1 complex multiplication = 6 flops, 253 where we define 1 flop as that for a double precision scalar. We roughly approximate 254 flop counting for complex numbers by multiplying the total flops by 4; this corresponds 255 to the assumption that we're counting mostly additions and multiplications -- and 256 roughly the same number of each. More accurate counting could be done by distinguishing 257 among the various arithmetic operations. 258 */ 259 260 #if defined(PETSC_USE_COMPLEX) 261 #define PETSC_FLOPS_PER_OP 4.0 262 #else 263 #define PETSC_FLOPS_PER_OP 1.0 264 #endif 265 266 /*@C 267 PetscLogFlops - Log how many flops are performed in a calculation 268 269 Input Parameter: 270 . flops - the number of flops 271 272 Notes: 273 To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double, 274 not an integer. Use PetscLogFlops(4.0*n) not PetscLogFlops(4*n) 275 276 Level: intermediate 277 278 .seealso: `PetscLogView()`, `PetscLogGpuFlops()` 279 @*/ 280 281 static inline PetscErrorCode PetscLogFlops(PetscLogDouble n) 282 { 283 PetscFunctionBegin; 284 #if defined(PETSC_USE_DEBUG) 285 PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops"); 286 #endif 287 petsc_TotalFlops += PETSC_FLOPS_PER_OP * n; 288 PetscFunctionReturn(0); 289 } 290 291 PETSC_EXTERN PetscErrorCode PetscGetFlops(PetscLogDouble *); 292 293 #if defined(PETSC_HAVE_MPE) 294 PETSC_EXTERN PetscErrorCode PetscLogMPEBegin(void); 295 PETSC_EXTERN PetscErrorCode PetscLogMPEDump(const char[]); 296 #endif 297 298 PETSC_EXTERN PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 299 PETSC_EXTERN PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 300 PETSC_EXTERN PetscErrorCode (*PetscLogPHC)(PetscObject); 301 PETSC_EXTERN PetscErrorCode (*PetscLogPHD)(PetscObject); 302 303 #define PetscLogObjectParents(p, n, d) PetscMacroReturnStandard(for (int _i = 0; _i < (n); ++_i) PetscCall(PetscLogObjectParent((PetscObject)(p), (PetscObject)(d)[_i]));) 304 #define PetscLogObjectCreate(h) ((PetscLogPHC) ? (*PetscLogPHC)((PetscObject)(h)) : 0) 305 #define PetscLogObjectDestroy(h) ((PetscLogPHD) ? (*PetscLogPHD)((PetscObject)(h)) : 0) 306 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3); 307 308 /* Initialization functions */ 309 PETSC_EXTERN PetscErrorCode PetscLogDefaultBegin(void); 310 PETSC_EXTERN PetscErrorCode PetscLogAllBegin(void); 311 PETSC_EXTERN PetscErrorCode PetscLogNestedBegin(void); 312 PETSC_EXTERN PetscErrorCode PetscLogTraceBegin(FILE *); 313 PETSC_EXTERN PetscErrorCode PetscLogActions(PetscBool); 314 PETSC_EXTERN PetscErrorCode PetscLogObjects(PetscBool); 315 PETSC_EXTERN PetscErrorCode PetscLogSetThreshold(PetscLogDouble, PetscLogDouble *); 316 PETSC_EXTERN PetscErrorCode PetscLogSet(PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject)); 317 318 /* Output functions */ 319 PETSC_EXTERN PetscErrorCode PetscLogView(PetscViewer); 320 PETSC_EXTERN PetscErrorCode PetscLogViewFromOptions(void); 321 PETSC_EXTERN PetscErrorCode PetscLogDump(const char[]); 322 323 /* Status checking functions */ 324 PETSC_EXTERN PetscErrorCode PetscLogIsActive(PetscBool *); 325 326 /* Stage functions */ 327 PETSC_EXTERN PetscErrorCode PetscLogStageRegister(const char[], PetscLogStage *); 328 PETSC_EXTERN PetscErrorCode PetscLogStagePush(PetscLogStage); 329 PETSC_EXTERN PetscErrorCode PetscLogStagePop(void); 330 PETSC_EXTERN PetscErrorCode PetscLogStageSetActive(PetscLogStage, PetscBool); 331 PETSC_EXTERN PetscErrorCode PetscLogStageGetActive(PetscLogStage, PetscBool *); 332 PETSC_EXTERN PetscErrorCode PetscLogStageSetVisible(PetscLogStage, PetscBool); 333 PETSC_EXTERN PetscErrorCode PetscLogStageGetVisible(PetscLogStage, PetscBool *); 334 PETSC_EXTERN PetscErrorCode PetscLogStageGetId(const char[], PetscLogStage *); 335 336 /* Event functions */ 337 PETSC_EXTERN PetscErrorCode PetscLogEventRegister(const char[], PetscClassId, PetscLogEvent *); 338 PETSC_EXTERN PetscErrorCode PetscLogEventSetCollective(PetscLogEvent, PetscBool); 339 PETSC_EXTERN PetscErrorCode PetscLogEventIncludeClass(PetscClassId); 340 PETSC_EXTERN PetscErrorCode PetscLogEventExcludeClass(PetscClassId); 341 PETSC_EXTERN PetscErrorCode PetscLogEventActivate(PetscLogEvent); 342 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivate(PetscLogEvent); 343 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent); 344 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent); 345 PETSC_EXTERN PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent, PetscBool); 346 PETSC_EXTERN PetscErrorCode PetscLogEventActivateClass(PetscClassId); 347 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivateClass(PetscClassId); 348 PETSC_EXTERN PetscErrorCode PetscLogEventGetId(const char[], PetscLogEvent *); 349 PETSC_EXTERN PetscErrorCode PetscLogEventGetPerfInfo(int, PetscLogEvent, PetscEventPerfInfo *); 350 PETSC_EXTERN PetscErrorCode PetscLogEventSetDof(PetscLogEvent, PetscInt, PetscLogDouble); 351 PETSC_EXTERN PetscErrorCode PetscLogEventSetError(PetscLogEvent, PetscInt, PetscLogDouble); 352 PETSC_EXTERN PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent); 353 PETSC_EXTERN PetscErrorCode PetscLogPopCurrentEvent_Internal(void); 354 355 /* Global counters */ 356 PETSC_EXTERN PetscLogDouble petsc_irecv_ct; 357 PETSC_EXTERN PetscLogDouble petsc_isend_ct; 358 PETSC_EXTERN PetscLogDouble petsc_recv_ct; 359 PETSC_EXTERN PetscLogDouble petsc_send_ct; 360 PETSC_EXTERN PetscLogDouble petsc_irecv_len; 361 PETSC_EXTERN PetscLogDouble petsc_isend_len; 362 PETSC_EXTERN PetscLogDouble petsc_recv_len; 363 PETSC_EXTERN PetscLogDouble petsc_send_len; 364 PETSC_EXTERN PetscLogDouble petsc_allreduce_ct; 365 PETSC_EXTERN PetscLogDouble petsc_gather_ct; 366 PETSC_EXTERN PetscLogDouble petsc_scatter_ct; 367 PETSC_EXTERN PetscLogDouble petsc_wait_ct; 368 PETSC_EXTERN PetscLogDouble petsc_wait_any_ct; 369 PETSC_EXTERN PetscLogDouble petsc_wait_all_ct; 370 PETSC_EXTERN PetscLogDouble petsc_sum_of_waits_ct; 371 372 PETSC_EXTERN PetscBool PetscLogMemory; 373 374 PETSC_EXTERN PetscBool PetscLogSyncOn; /* true if logging synchronization is enabled */ 375 PETSC_EXTERN PetscErrorCode PetscLogEventSynchronize(PetscLogEvent, MPI_Comm); 376 377 #define PetscLogEventSync(e, comm) \ 378 (((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? PetscLogEventSynchronize((e), (comm)) : 0)) 379 380 #define PetscLogEventBegin(e, o1, o2, o3, o4) \ 381 ((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLB)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPushCurrentEvent_Internal(e)) : 0) 382 383 #define PetscLogEventEnd(e, o1, o2, o3, o4) \ 384 ((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLE)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPopCurrentEvent_Internal()) : 0) 385 386 PETSC_EXTERN PetscErrorCode PetscLogEventGetFlops(PetscLogEvent, PetscLogDouble *); 387 PETSC_EXTERN PetscErrorCode PetscLogEventZeroFlops(PetscLogEvent); 388 389 /* 390 These are used internally in the PETSc routines to keep a count of MPI messages and 391 their sizes. 392 393 This does not work for MPI-Uni because our include/petsc/mpiuni/mpi.h file 394 uses macros to defined the MPI operations. 395 396 It does not work correctly from HP-UX because it processes the 397 macros in a way that sometimes it double counts, hence 398 PETSC_HAVE_BROKEN_RECURSIVE_MACRO 399 400 It does not work with Windows because winmpich lacks MPI_Type_size() 401 */ 402 #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined(PETSC_HAVE_MPI_MISSING_TYPESIZE) 403 /* 404 Logging of MPI activities 405 */ 406 static inline PetscErrorCode PetscMPITypeSize(PetscInt count, MPI_Datatype type, PetscLogDouble *length) 407 { 408 PetscMPIInt typesize; 409 410 if (type == MPI_DATATYPE_NULL) return 0; 411 PetscCallMPI(MPI_Type_size(type, &typesize)); 412 *length += (PetscLogDouble)(count * typesize); 413 return 0; 414 } 415 416 static inline PetscErrorCode PetscMPITypeSizeComm(MPI_Comm comm, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length) 417 { 418 PetscMPIInt typesize, size, p; 419 420 if (type == MPI_DATATYPE_NULL) return 0; 421 PetscCallMPI(MPI_Comm_size(comm, &size)); 422 PetscCallMPI(MPI_Type_size(type, &typesize)); 423 for (p = 0; p < size; ++p) *length += (PetscLogDouble)(counts[p] * typesize); 424 return 0; 425 } 426 427 static inline PetscErrorCode PetscMPITypeSizeCount(PetscInt n, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length) 428 { 429 PetscMPIInt typesize, p; 430 431 if (type == MPI_DATATYPE_NULL) return 0; 432 PetscCallMPI(MPI_Type_size(type, &typesize)); 433 for (p = 0; p < n; ++p) *length += (PetscLogDouble)(counts[p] * typesize); 434 return 0; 435 } 436 437 /* 438 Returns 1 if the communicator is parallel else zero 439 */ 440 static inline int PetscMPIParallelComm(MPI_Comm comm) 441 { 442 PetscMPIInt size; 443 MPI_Comm_size(comm, &size); 444 return size > 1; 445 } 446 447 #define MPI_Irecv(buf, count, datatype, source, tag, comm, request) ((petsc_irecv_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || MPI_Irecv((buf), (count), (datatype), (source), (tag), (comm), (request))) 448 449 #define MPI_Irecv_c(buf, count, datatype, source, tag, comm, request) ((petsc_irecv_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || MPI_Irecv_c((buf), (count), (datatype), (source), (tag), (comm), (request))) 450 451 #define MPI_Isend(buf, count, datatype, dest, tag, comm, request) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || MPI_Isend((buf), (count), (datatype), (dest), (tag), (comm), (request))) 452 453 #define MPI_Isend_c(buf, count, datatype, dest, tag, comm, request) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || MPI_Isend_c((buf), (count), (datatype), (dest), (tag), (comm), (request))) 454 455 #define MPI_Startall_irecv(count, datatype, number, requests) ((petsc_irecv_ct += (PetscLogDouble)(number), 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || ((number) && MPI_Startall((number), (requests)))) 456 457 #define MPI_Startall_isend(count, datatype, number, requests) ((petsc_isend_ct += (PetscLogDouble)(number), 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || ((number) && MPI_Startall((number), (requests)))) 458 459 #define MPI_Start_isend(count, datatype, requests) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_isend_len)) || MPI_Start((requests))) 460 461 #define MPI_Recv(buf, count, datatype, source, tag, comm, status) ((petsc_recv_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len)) || MPI_Recv((buf), (count), (datatype), (source), (tag), (comm), (status))) 462 463 #define MPI_Recv_c(buf, count, datatype, source, tag, comm, status) ((petsc_recv_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len)) || MPI_Recv_c((buf), (count), (datatype), (source), (tag), (comm), (status))) 464 465 #define MPI_Send(buf, count, datatype, dest, tag, comm) ((petsc_send_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_send_len)) || MPI_Send((buf), (count), (datatype), (dest), (tag), (comm))) 466 467 #define MPI_Send_c(buf, count, datatype, dest, tag, comm) ((petsc_send_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_send_len)) || MPI_Send_c((buf), (count), (datatype), (dest), (tag), (comm))) 468 469 #define MPI_Wait(request, status) ((petsc_wait_ct++, petsc_sum_of_waits_ct++, 0) || MPI_Wait((request), (status))) 470 471 #define MPI_Waitany(a, b, c, d) ((petsc_wait_any_ct++, petsc_sum_of_waits_ct++, 0) || MPI_Waitany((a), (b), (c), (d))) 472 473 #define MPI_Waitall(count, array_of_requests, array_of_statuses) ((petsc_wait_all_ct++, petsc_sum_of_waits_ct += (PetscLogDouble)(count), 0) || MPI_Waitall((count), (array_of_requests), (array_of_statuses))) 474 475 #define MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm) (petsc_allreduce_ct += PetscMPIParallelComm((comm)), MPI_Allreduce((sendbuf), (recvbuf), (count), (datatype), (op), (comm))) 476 477 #define MPI_Bcast(buffer, count, datatype, root, comm) ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || MPI_Bcast((buffer), (count), (datatype), (root), (comm))) 478 479 #define MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm) ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || MPI_Reduce_scatter_block((sendbuf), (recvbuf), (recvcount), (datatype), (op), (comm))) 480 481 #define MPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) \ 482 ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Alltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm))) 483 484 #define MPI_Alltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm) \ 485 ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len)) || MPI_Alltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm))) 486 487 #define MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Allgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm))) 488 489 #define MPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm) \ 490 ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Allgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm))) 491 492 #define MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \ 493 ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Gather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm))) 494 495 #define MPI_Gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm) \ 496 ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Gatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm))) 497 498 #define MPI_Scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \ 499 ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Scatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm))) 500 501 #define MPI_Scatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm) \ 502 ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Scatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm))) 503 504 #define MPI_Ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \ 505 ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Ialltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request))) 506 507 #define MPI_Ialltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm, request) \ 508 ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len)) || MPI_Ialltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm), (request))) 509 510 #define MPI_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \ 511 ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Iallgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request))) 512 513 #define MPI_Iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm, request) \ 514 ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Iallgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm), (request))) 515 516 #define MPI_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \ 517 ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Igather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request))) 518 519 #define MPI_Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm, request) \ 520 ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Igatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm), (request))) 521 522 #define MPI_Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \ 523 ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Iscatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request))) 524 525 #define MPI_Iscatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \ 526 ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Iscatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request))) 527 528 #else 529 530 #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests))) 531 532 #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests))) 533 534 #define MPI_Start_isend(count, datatype, requests) (MPI_Start((requests))) 535 536 #endif /* !MPIUNI_H && ! PETSC_HAVE_BROKEN_RECURSIVE_MACRO */ 537 538 #else /* ---Logging is turned off --------------------------------------------*/ 539 540 #define PetscLogMemory PETSC_FALSE 541 542 #define PetscLogFlops(n) 0 543 #define PetscGetFlops(a) (*(a) = 0.0, 0) 544 545 #define PetscLogStageRegister(a, b) 0 546 #define PetscLogStagePush(a) 0 547 #define PetscLogStagePop() 0 548 #define PetscLogStageSetActive(a, b) 0 549 #define PetscLogStageGetActive(a, b) 0 550 #define PetscLogStageGetVisible(a, b) 0 551 #define PetscLogStageSetVisible(a, b) 0 552 #define PetscLogStageGetId(a, b) (*(b) = 0, 0) 553 554 #define PetscLogEventRegister(a, b, c) 0 555 #define PetscLogEventSetCollective(a, b) 0 556 #define PetscLogEventIncludeClass(a) 0 557 #define PetscLogEventExcludeClass(a) 0 558 #define PetscLogEventActivate(a) 0 559 #define PetscLogEventDeactivate(a) 0 560 #define PetscLogEventDeactivatePush(a) 0 561 #define PetscLogEventDeactivatePop(a) 0 562 #define PetscLogEventActivateClass(a) 0 563 #define PetscLogEventDeactivateClass(a) 0 564 #define PetscLogEventSetActiveAll(a, b) 0 565 #define PetscLogEventGetId(a, b) (*(b) = 0, 0) 566 #define PetscLogEventGetPerfInfo(a, b, c) 0 567 #define PetscLogEventSetDof(a, b, c) 0 568 #define PetscLogEventSetError(a, b, c) 0 569 570 #define PetscLogPLB 0 571 #define PetscLogPLE 0 572 #define PetscLogPHC 0 573 #define PetscLogPHD 0 574 575 #define PetscLogObjectParents(p, n, c) 0 576 #define PetscLogObjectCreate(h) 0 577 #define PetscLogObjectDestroy(h) 0 578 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3); 579 580 #define PetscLogDefaultBegin() 0 581 #define PetscLogAllBegin() 0 582 #define PetscLogNestedBegin() 0 583 #define PetscLogTraceBegin(file) 0 584 #define PetscLogActions(a) 0 585 #define PetscLogObjects(a) 0 586 #define PetscLogSetThreshold(a, b) 0 587 #define PetscLogSet(lb, le) 0 588 #define PetscLogIsActive(flag) (*(flag) = PETSC_FALSE, 0) 589 590 #define PetscLogView(viewer) 0 591 #define PetscLogViewFromOptions() 0 592 #define PetscLogDump(c) 0 593 594 #define PetscLogEventSync(e, comm) 0 595 #define PetscLogEventBegin(e, o1, o2, o3, o4) 0 596 #define PetscLogEventEnd(e, o1, o2, o3, o4) 0 597 598 /* If PETSC_USE_LOG is NOT defined, these still need to be! */ 599 #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall(number, requests)) 600 #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall(number, requests)) 601 #define MPI_Start_isend(count, datatype, requests) MPI_Start(requests) 602 603 #endif /* PETSC_USE_LOG */ 604 605 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_DEVICE) 606 607 /* Global GPU counters */ 608 PETSC_EXTERN PetscLogDouble petsc_ctog_ct; 609 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct; 610 PETSC_EXTERN PetscLogDouble petsc_ctog_sz; 611 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz; 612 PETSC_EXTERN PetscLogDouble petsc_ctog_ct_scalar; 613 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct_scalar; 614 PETSC_EXTERN PetscLogDouble petsc_ctog_sz_scalar; 615 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz_scalar; 616 PETSC_EXTERN PetscLogDouble petsc_gflops; 617 PETSC_EXTERN PetscLogDouble petsc_gtime; 618 619 static inline PetscErrorCode PetscLogCpuToGpu(PetscLogDouble size) 620 { 621 PetscFunctionBegin; 622 petsc_ctog_ct += 1; 623 petsc_ctog_sz += size; 624 PetscFunctionReturn(0); 625 } 626 627 static inline PetscErrorCode PetscLogGpuToCpu(PetscLogDouble size) 628 { 629 PetscFunctionBegin; 630 petsc_gtoc_ct += 1; 631 petsc_gtoc_sz += size; 632 PetscFunctionReturn(0); 633 } 634 635 static inline PetscErrorCode PetscLogCpuToGpuScalar(PetscLogDouble size) 636 { 637 PetscFunctionBegin; 638 petsc_ctog_ct_scalar += 1; 639 petsc_ctog_sz_scalar += size; 640 PetscFunctionReturn(0); 641 } 642 643 static inline PetscErrorCode PetscLogGpuToCpuScalar(PetscLogDouble size) 644 { 645 PetscFunctionBegin; 646 petsc_gtoc_ct_scalar += 1; 647 petsc_gtoc_sz_scalar += size; 648 PetscFunctionReturn(0); 649 } 650 651 /*@C 652 PetscLogGpuFlops - Log how many flops are performed in a calculation on the device 653 654 Input Parameter: 655 . flops - the number of flops 656 657 Notes: 658 To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double, 659 not an integer. Use PetscLogFlops(4.0*n) not PetscLogFlops(4*n) 660 661 The values are also added to the total flop count for the MPI rank that is set with `PetscLogFlops()`; hence the number of flops 662 just on the CPU would be the value from set from `PetscLogFlops()` minus the value set from `PetscLogGpuFlops()` 663 664 Level: intermediate 665 666 .seealso: `PetscLogView()`, `PetscLogFlops()`, `PetscLogGpuTimeBegin()`, `PetscLogGpuTimeEnd()` 667 @*/ 668 static inline PetscErrorCode PetscLogGpuFlops(PetscLogDouble n) 669 { 670 PetscFunctionBegin; 671 PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops"); 672 petsc_TotalFlops += PETSC_FLOPS_PER_OP * n; 673 petsc_gflops += PETSC_FLOPS_PER_OP * n; 674 PetscFunctionReturn(0); 675 } 676 677 static inline PetscErrorCode PetscLogGpuTimeAdd(PetscLogDouble t) 678 { 679 PetscFunctionBegin; 680 petsc_gtime += t; 681 PetscFunctionReturn(0); 682 } 683 684 PETSC_EXTERN PetscErrorCode PetscLogGpuTime(void); 685 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeBegin(void); 686 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeEnd(void); 687 688 #else 689 690 #define PetscLogCpuToGpu(a) 0 691 #define PetscLogGpuToCpu(a) 0 692 #define PetscLogCpuToGpuScalar(a) 0 693 #define PetscLogGpuToCpuScalar(a) 0 694 #define PetscLogGpuFlops(a) 0 695 #define PetscLogGpuTime() 0 696 #define PetscLogGpuTimeAdd(a) 0 697 #define PetscLogGpuTimeBegin() 0 698 #define PetscLogGpuTimeEnd() 0 699 700 #endif /* PETSC_USE_LOG && PETSC_HAVE_DEVICE */ 701 702 #define PetscPreLoadBegin(flag, name) \ 703 do { \ 704 PetscBool PetscPreLoading = flag; \ 705 int PetscPreLoadMax, PetscPreLoadIt; \ 706 PetscLogStage _stageNum; \ 707 PetscCall(PetscOptionsGetBool(NULL, NULL, "-preload", &PetscPreLoading, NULL)); \ 708 PetscPreLoadMax = (int)(PetscPreLoading); \ 709 PetscPreLoadingUsed = PetscPreLoading ? PETSC_TRUE : PetscPreLoadingUsed; \ 710 for (PetscPreLoadIt = 0; PetscPreLoadIt <= PetscPreLoadMax; PetscPreLoadIt++) { \ 711 PetscPreLoadingOn = PetscPreLoading; \ 712 PetscCall(PetscBarrier(NULL)); \ 713 if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \ 714 else PetscCall(PetscLogStageRegister(name, &_stageNum)); \ 715 PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \ 716 PetscCall(PetscLogStagePush(_stageNum)); 717 718 #define PetscPreLoadEnd() \ 719 PetscCall(PetscLogStagePop()); \ 720 PetscPreLoading = PETSC_FALSE; \ 721 } \ 722 } \ 723 while (0) 724 725 #define PetscPreLoadStage(name) \ 726 do { \ 727 PetscCall(PetscLogStagePop()); \ 728 if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \ 729 else PetscCall(PetscLogStageRegister(name, &_stageNum)); \ 730 PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \ 731 PetscCall(PetscLogStagePush(_stageNum)); \ 732 } while (0) 733 734 /* some vars for logging */ 735 PETSC_EXTERN PetscBool PetscPreLoadingUsed; /* true if we are or have done preloading */ 736 PETSC_EXTERN PetscBool PetscPreLoadingOn; /* true if we are currently in a preloading calculation */ 737 738 #endif 739