1 /* 2 Defines profile/logging in PETSc. 3 */ 4 #if !defined(PETSCLOG_H) 5 #define PETSCLOG_H 6 7 #include <petscsys.h> 8 #include <petsctime.h> 9 10 /* SUBMANSEC = Sys */ 11 12 /* General logging of information; different from event logging */ 13 PETSC_EXTERN PetscErrorCode PetscInfo_Private(const char[], PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(3, 4); 14 #if defined(PETSC_USE_INFO) 15 #define PetscInfo(A, ...) PetscInfo_Private(PETSC_FUNCTION_NAME, ((PetscObject)A), __VA_ARGS__) 16 #else 17 #define PetscInfo(A, ...) 0 18 #endif 19 20 #define PetscInfo1(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 21 #define PetscInfo2(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 22 #define PetscInfo3(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 23 #define PetscInfo4(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 24 #define PetscInfo5(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 25 #define PetscInfo6(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 26 #define PetscInfo7(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 27 #define PetscInfo8(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 28 #define PetscInfo9(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 29 30 /*E 31 PetscInfoCommFlag - Describes the method by which to filter `PetscInfo()` by communicator size 32 33 Used as an input for `PetscInfoSetFilterCommSelf()` 34 35 $ `PETSC_INFO_COMM_ALL` - Default uninitialized value. `PetscInfo()` will not filter based on 36 communicator size (i.e. will print for all communicators) 37 $ `PETSC_INFO_COMM_NO_SELF` - `PetscInfo()` will NOT print for communicators with size = 1 (i.e. *_COMM_SELF) 38 $ `PETSC_INFO_COMM_ONLY_SELF` - `PetscInfo()` will ONLY print for communicators with size = 1 39 40 Level: intermediate 41 42 .seealso: `PetscInfo()`, `PetscInfoSetFromOptions()`, `PetscInfoSetFilterCommSelf()` 43 E*/ 44 typedef enum { 45 PETSC_INFO_COMM_ALL = -1, 46 PETSC_INFO_COMM_NO_SELF = 0, 47 PETSC_INFO_COMM_ONLY_SELF = 1 48 } PetscInfoCommFlag; 49 50 PETSC_EXTERN const char *const PetscInfoCommFlags[]; 51 PETSC_EXTERN PetscErrorCode PetscInfoDeactivateClass(PetscClassId); 52 PETSC_EXTERN PetscErrorCode PetscInfoActivateClass(PetscClassId); 53 PETSC_EXTERN PetscErrorCode PetscInfoEnabled(PetscClassId, PetscBool *); 54 PETSC_EXTERN PetscErrorCode PetscInfoAllow(PetscBool); 55 PETSC_EXTERN PetscErrorCode PetscInfoSetFile(const char[], const char[]); 56 PETSC_EXTERN PetscErrorCode PetscInfoGetFile(char **, FILE **); 57 PETSC_EXTERN PetscErrorCode PetscInfoSetClasses(PetscBool, PetscInt, const char *const *); 58 PETSC_EXTERN PetscErrorCode PetscInfoGetClass(const char *, PetscBool *); 59 PETSC_EXTERN PetscErrorCode PetscInfoGetInfo(PetscBool *, PetscBool *, PetscBool *, PetscBool *, PetscInfoCommFlag *); 60 PETSC_EXTERN PetscErrorCode PetscInfoProcessClass(const char[], PetscInt, const PetscClassId[]); 61 PETSC_EXTERN PetscErrorCode PetscInfoSetFilterCommSelf(PetscInfoCommFlag); 62 PETSC_EXTERN PetscErrorCode PetscInfoSetFromOptions(PetscOptions); 63 PETSC_EXTERN PetscErrorCode PetscInfoDestroy(void); 64 PETSC_EXTERN PetscBool PetscLogPrintInfo; /* if true, indicates PetscInfo() is turned on */ 65 66 /*MC 67 PetscLogEvent - id used to identify PETSc or user events which timed portions (blocks of executable) 68 code. 69 70 Level: intermediate 71 72 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStage` 73 M*/ 74 typedef int PetscLogEvent; 75 76 /*MC 77 PetscLogStage - id used to identify user stages (phases, sections) of runs - for logging 78 79 Level: intermediate 80 81 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEvent` 82 M*/ 83 typedef int PetscLogStage; 84 85 #define PETSC_EVENT 1311311 86 PETSC_EXTERN PetscLogEvent PETSC_LARGEST_EVENT; 87 88 /* Global flop counter */ 89 PETSC_EXTERN PetscLogDouble petsc_TotalFlops; 90 PETSC_EXTERN PetscLogDouble petsc_tmp_flops; 91 92 /* We must make the following structures available to access the event 93 activation flags in the PetscLogEventBegin/End() macros. These are not part of the PETSc public 94 API and are not intended to be used by other parts of PETSc or by users. 95 96 The code that manipulates these structures is in src/sys/logging/utils. 97 */ 98 typedef struct _n_PetscIntStack *PetscIntStack; 99 100 /* -----------------------------------------------------------------------------------------------------*/ 101 /* 102 PetscClassRegInfo, PetscClassPerfInfo - Each class has two data structures associated with it. The first has 103 static information about it, the second collects statistics on how many objects of the class are created, 104 how much memory they use, etc. 105 106 PetscClassRegLog, PetscClassPerfLog - arrays of the PetscClassRegInfo and PetscClassPerfInfo for all classes. 107 */ 108 typedef struct { 109 char *name; /* The class name */ 110 PetscClassId classid; /* The integer identifying this class */ 111 } PetscClassRegInfo; 112 113 typedef struct { 114 PetscClassId id; /* The integer identifying this class */ 115 int creations; /* The number of objects of this class created */ 116 int destructions; /* The number of objects of this class destroyed */ 117 PetscLogDouble mem; /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */ 118 PetscLogDouble descMem; /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */ 119 } PetscClassPerfInfo; 120 121 typedef struct _n_PetscClassRegLog *PetscClassRegLog; 122 struct _n_PetscClassRegLog { 123 int numClasses; /* The number of classes registered */ 124 int maxClasses; /* The maximum number of classes */ 125 PetscClassRegInfo *classInfo; /* The structure for class information (classids are monotonicly increasing) */ 126 }; 127 128 typedef struct _n_PetscClassPerfLog *PetscClassPerfLog; 129 struct _n_PetscClassPerfLog { 130 int numClasses; /* The number of logging classes */ 131 int maxClasses; /* The maximum number of classes */ 132 PetscClassPerfInfo *classInfo; /* The structure for class information (classids are monotonicly increasing) */ 133 }; 134 /* -----------------------------------------------------------------------------------------------------*/ 135 /* 136 PetscEventRegInfo, PetscEventPerfInfo - Each event has two data structures associated with it. The first has 137 static information about it, the second collects statistics on how many times the event is used, how 138 much time it takes, etc. 139 140 PetscEventRegLog, PetscEventPerfLog - an array of all PetscEventRegInfo and PetscEventPerfInfo for all events. There is one 141 of these for each stage. 142 143 */ 144 typedef struct { 145 char *name; /* The name of this event */ 146 PetscClassId classid; /* The class the event is associated with */ 147 PetscBool collective; /* Flag this event as collective */ 148 #if defined(PETSC_HAVE_MPE) 149 int mpe_id_begin; /* MPE IDs that define the event */ 150 int mpe_id_end; 151 #endif 152 } PetscEventRegInfo; 153 154 typedef struct { 155 int id; /* The integer identifying this event */ 156 PetscBool active; /* The flag to activate logging */ 157 PetscBool visible; /* The flag to print info in summary */ 158 int depth; /* The nesting depth of the event call */ 159 int count; /* The number of times this event was executed */ 160 PetscLogDouble flops, flops2, flopsTmp; /* The flops and flops^2 used in this event */ 161 PetscLogDouble time, time2, timeTmp; /* The time and time^2 taken for this event */ 162 PetscLogDouble syncTime; /* The synchronization barrier time */ 163 PetscLogDouble dof[8]; /* The number of degrees of freedom associated with this event */ 164 PetscLogDouble errors[8]; /* The errors (user-defined) associated with this event */ 165 PetscLogDouble numMessages; /* The number of messages in this event */ 166 PetscLogDouble messageLength; /* The total message lengths in this event */ 167 PetscLogDouble numReductions; /* The number of reductions in this event */ 168 PetscLogDouble memIncrease; /* How much the resident memory has increased in this event */ 169 PetscLogDouble mallocIncrease; /* How much the maximum malloced space has increased in this event */ 170 PetscLogDouble mallocSpace; /* How much the space was malloced and kept during this event */ 171 PetscLogDouble mallocIncreaseEvent; /* Maximum of the high water mark with in event minus memory available at the end of the event */ 172 #if defined(PETSC_HAVE_DEVICE) 173 PetscLogDouble CpuToGpuCount; /* The total number of CPU to GPU copies */ 174 PetscLogDouble GpuToCpuCount; /* The total number of GPU to CPU copies */ 175 PetscLogDouble CpuToGpuSize; /* The total size of CPU to GPU copies */ 176 PetscLogDouble GpuToCpuSize; /* The total size of GPU to CPU copies */ 177 PetscLogDouble GpuFlops; /* The flops done on a GPU in this event */ 178 PetscLogDouble GpuTime; /* The time spent on a GPU in this event */ 179 #endif 180 } PetscEventPerfInfo; 181 182 typedef struct _n_PetscEventRegLog *PetscEventRegLog; 183 struct _n_PetscEventRegLog { 184 int numEvents; /* The number of registered events */ 185 int maxEvents; /* The maximum number of events */ 186 PetscEventRegInfo *eventInfo; /* The registration information for each event */ 187 }; 188 189 typedef struct _n_PetscEventPerfLog *PetscEventPerfLog; 190 struct _n_PetscEventPerfLog { 191 int numEvents; /* The number of logging events */ 192 int maxEvents; /* The maximum number of events */ 193 PetscEventPerfInfo *eventInfo; /* The performance information for each event */ 194 }; 195 /* ------------------------------------------------------------------------------------------------------------*/ 196 /* 197 PetscStageInfo - Contains all the information about a particular stage. 198 199 PetscStageLog - An array of PetscStageInfo for each registered stage. There is a single one of these in the code. 200 */ 201 typedef struct _PetscStageInfo { 202 char *name; /* The stage name */ 203 PetscBool used; /* The stage was pushed on this processor */ 204 PetscEventPerfInfo perfInfo; /* The stage performance information */ 205 PetscEventPerfLog eventLog; /* The event information for this stage */ 206 PetscClassPerfLog classLog; /* The class information for this stage */ 207 } PetscStageInfo; 208 209 typedef struct _n_PetscStageLog *PetscStageLog; 210 struct _n_PetscStageLog { 211 int numStages; /* The number of registered stages */ 212 int maxStages; /* The maximum number of stages */ 213 PetscIntStack stack; /* The stack for active stages */ 214 int curStage; /* The current stage (only used in macros so we don't call PetscIntStackTop) */ 215 PetscStageInfo *stageInfo; /* The information for each stage */ 216 PetscEventRegLog eventLog; /* The registered events */ 217 PetscClassRegLog classLog; /* The registered classes */ 218 }; 219 /* -----------------------------------------------------------------------------------------------------*/ 220 221 PETSC_DEPRECATED_FUNCTION("PetscLogObjectParent() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectParent(PetscObject o, PetscObject p) { 222 (void)o; 223 (void)p; 224 return 0; 225 } 226 227 PETSC_DEPRECATED_FUNCTION("PetscLogObjectMemory() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectMemory(PetscObject o, PetscLogDouble m) { 228 (void)o; 229 (void)m; 230 return 0; 231 } 232 233 #if defined(PETSC_USE_LOG) /* --- Logging is turned on --------------------------------*/ 234 PETSC_EXTERN PetscStageLog petsc_stageLog; 235 PETSC_EXTERN PetscErrorCode PetscLogGetStageLog(PetscStageLog *); 236 PETSC_EXTERN PetscErrorCode PetscStageLogGetCurrent(PetscStageLog, int *); 237 PETSC_EXTERN PetscErrorCode PetscStageLogGetEventPerfLog(PetscStageLog, int, PetscEventPerfLog *); 238 239 /* 240 Flop counting: We count each arithmetic operation (e.g., addition, multiplication) separately. 241 242 For the complex numbers version, note that 243 1 complex addition = 2 flops 244 1 complex multiplication = 6 flops, 245 where we define 1 flop as that for a double precision scalar. We roughly approximate 246 flop counting for complex numbers by multiplying the total flops by 4; this corresponds 247 to the assumption that we're counting mostly additions and multiplications -- and 248 roughly the same number of each. More accurate counting could be done by distinguishing 249 among the various arithmetic operations. 250 */ 251 252 #if defined(PETSC_USE_COMPLEX) 253 #define PETSC_FLOPS_PER_OP 4.0 254 #else 255 #define PETSC_FLOPS_PER_OP 1.0 256 #endif 257 258 /*@C 259 PetscLogFlops - Log how many flops are performed in a calculation 260 261 Input Parameter: 262 . flops - the number of flops 263 264 Notes: 265 To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double, 266 not an integer. Use PetscLogFlops(4.0*n) not PetscLogFlops(4*n) 267 268 Level: intermediate 269 270 .seealso: `PetscLogView()`, `PetscLogGpuFlops()` 271 @*/ 272 273 static inline PetscErrorCode PetscLogFlops(PetscLogDouble n) { 274 PetscFunctionBegin; 275 #if defined(PETSC_USE_DEBUG) 276 PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops"); 277 #endif 278 petsc_TotalFlops += PETSC_FLOPS_PER_OP * n; 279 PetscFunctionReturn(0); 280 } 281 282 PETSC_EXTERN PetscErrorCode PetscGetFlops(PetscLogDouble *); 283 284 #if defined(PETSC_HAVE_MPE) 285 PETSC_EXTERN PetscErrorCode PetscLogMPEBegin(void); 286 PETSC_EXTERN PetscErrorCode PetscLogMPEDump(const char[]); 287 #endif 288 289 PETSC_EXTERN PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 290 PETSC_EXTERN PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 291 PETSC_EXTERN PetscErrorCode (*PetscLogPHC)(PetscObject); 292 PETSC_EXTERN PetscErrorCode (*PetscLogPHD)(PetscObject); 293 294 #define PetscLogObjectParents(p, n, d) PetscMacroReturnStandard(for (int _i = 0; _i < (n); ++_i) PetscCall(PetscLogObjectParent((PetscObject)(p), (PetscObject)(d)[_i]));) 295 #define PetscLogObjectCreate(h) ((PetscLogPHC) ? (*PetscLogPHC)((PetscObject)(h)) : 0) 296 #define PetscLogObjectDestroy(h) ((PetscLogPHD) ? (*PetscLogPHD)((PetscObject)(h)) : 0) 297 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3); 298 299 /* Initialization functions */ 300 PETSC_EXTERN PetscErrorCode PetscLogDefaultBegin(void); 301 PETSC_EXTERN PetscErrorCode PetscLogAllBegin(void); 302 PETSC_EXTERN PetscErrorCode PetscLogNestedBegin(void); 303 PETSC_EXTERN PetscErrorCode PetscLogTraceBegin(FILE *); 304 PETSC_EXTERN PetscErrorCode PetscLogActions(PetscBool); 305 PETSC_EXTERN PetscErrorCode PetscLogObjects(PetscBool); 306 PETSC_EXTERN PetscErrorCode PetscLogSetThreshold(PetscLogDouble, PetscLogDouble *); 307 PETSC_EXTERN PetscErrorCode PetscLogSet(PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject)); 308 309 /* Output functions */ 310 PETSC_EXTERN PetscErrorCode PetscLogView(PetscViewer); 311 PETSC_EXTERN PetscErrorCode PetscLogViewFromOptions(void); 312 PETSC_EXTERN PetscErrorCode PetscLogDump(const char[]); 313 314 /* Status checking functions */ 315 PETSC_EXTERN PetscErrorCode PetscLogIsActive(PetscBool *); 316 317 /* Stage functions */ 318 PETSC_EXTERN PetscErrorCode PetscLogStageRegister(const char[], PetscLogStage *); 319 PETSC_EXTERN PetscErrorCode PetscLogStagePush(PetscLogStage); 320 PETSC_EXTERN PetscErrorCode PetscLogStagePop(void); 321 PETSC_EXTERN PetscErrorCode PetscLogStageSetActive(PetscLogStage, PetscBool); 322 PETSC_EXTERN PetscErrorCode PetscLogStageGetActive(PetscLogStage, PetscBool *); 323 PETSC_EXTERN PetscErrorCode PetscLogStageSetVisible(PetscLogStage, PetscBool); 324 PETSC_EXTERN PetscErrorCode PetscLogStageGetVisible(PetscLogStage, PetscBool *); 325 PETSC_EXTERN PetscErrorCode PetscLogStageGetId(const char[], PetscLogStage *); 326 327 /* Event functions */ 328 PETSC_EXTERN PetscErrorCode PetscLogEventRegister(const char[], PetscClassId, PetscLogEvent *); 329 PETSC_EXTERN PetscErrorCode PetscLogEventSetCollective(PetscLogEvent, PetscBool); 330 PETSC_EXTERN PetscErrorCode PetscLogEventIncludeClass(PetscClassId); 331 PETSC_EXTERN PetscErrorCode PetscLogEventExcludeClass(PetscClassId); 332 PETSC_EXTERN PetscErrorCode PetscLogEventActivate(PetscLogEvent); 333 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivate(PetscLogEvent); 334 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent); 335 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent); 336 PETSC_EXTERN PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent, PetscBool); 337 PETSC_EXTERN PetscErrorCode PetscLogEventActivateClass(PetscClassId); 338 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivateClass(PetscClassId); 339 PETSC_EXTERN PetscErrorCode PetscLogEventGetId(const char[], PetscLogEvent *); 340 PETSC_EXTERN PetscErrorCode PetscLogEventGetPerfInfo(int, PetscLogEvent, PetscEventPerfInfo *); 341 PETSC_EXTERN PetscErrorCode PetscLogEventSetDof(PetscLogEvent, PetscInt, PetscLogDouble); 342 PETSC_EXTERN PetscErrorCode PetscLogEventSetError(PetscLogEvent, PetscInt, PetscLogDouble); 343 PETSC_EXTERN PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent); 344 PETSC_EXTERN PetscErrorCode PetscLogPopCurrentEvent_Internal(void); 345 346 /* Global counters */ 347 PETSC_EXTERN PetscLogDouble petsc_irecv_ct; 348 PETSC_EXTERN PetscLogDouble petsc_isend_ct; 349 PETSC_EXTERN PetscLogDouble petsc_recv_ct; 350 PETSC_EXTERN PetscLogDouble petsc_send_ct; 351 PETSC_EXTERN PetscLogDouble petsc_irecv_len; 352 PETSC_EXTERN PetscLogDouble petsc_isend_len; 353 PETSC_EXTERN PetscLogDouble petsc_recv_len; 354 PETSC_EXTERN PetscLogDouble petsc_send_len; 355 PETSC_EXTERN PetscLogDouble petsc_allreduce_ct; 356 PETSC_EXTERN PetscLogDouble petsc_gather_ct; 357 PETSC_EXTERN PetscLogDouble petsc_scatter_ct; 358 PETSC_EXTERN PetscLogDouble petsc_wait_ct; 359 PETSC_EXTERN PetscLogDouble petsc_wait_any_ct; 360 PETSC_EXTERN PetscLogDouble petsc_wait_all_ct; 361 PETSC_EXTERN PetscLogDouble petsc_sum_of_waits_ct; 362 363 PETSC_EXTERN PetscBool PetscLogMemory; 364 365 PETSC_EXTERN PetscBool PetscLogSyncOn; /* true if logging synchronization is enabled */ 366 PETSC_EXTERN PetscErrorCode PetscLogEventSynchronize(PetscLogEvent, MPI_Comm); 367 368 #define PetscLogEventSync(e, comm) \ 369 (((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? PetscLogEventSynchronize((e), (comm)) : 0)) 370 371 #define PetscLogEventBegin(e, o1, o2, o3, o4) \ 372 ((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLB)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPushCurrentEvent_Internal(e)) : 0) 373 374 #define PetscLogEventEnd(e, o1, o2, o3, o4) \ 375 ((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLE)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPopCurrentEvent_Internal()) : 0) 376 377 PETSC_EXTERN PetscErrorCode PetscLogEventGetFlops(PetscLogEvent, PetscLogDouble *); 378 PETSC_EXTERN PetscErrorCode PetscLogEventZeroFlops(PetscLogEvent); 379 380 /* 381 These are used internally in the PETSc routines to keep a count of MPI messages and 382 their sizes. 383 384 This does not work for MPI-Uni because our include/petsc/mpiuni/mpi.h file 385 uses macros to defined the MPI operations. 386 387 It does not work correctly from HP-UX because it processes the 388 macros in a way that sometimes it double counts, hence 389 PETSC_HAVE_BROKEN_RECURSIVE_MACRO 390 391 It does not work with Windows because winmpich lacks MPI_Type_size() 392 */ 393 #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined(PETSC_HAVE_MPI_MISSING_TYPESIZE) 394 /* 395 Logging of MPI activities 396 */ 397 static inline PetscErrorCode PetscMPITypeSize(PetscInt count, MPI_Datatype type, PetscLogDouble *length) { 398 PetscMPIInt typesize; 399 400 if (type == MPI_DATATYPE_NULL) return 0; 401 PetscCallMPI(MPI_Type_size(type, &typesize)); 402 *length += (PetscLogDouble)(count * typesize); 403 return 0; 404 } 405 406 static inline PetscErrorCode PetscMPITypeSizeComm(MPI_Comm comm, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length) { 407 PetscMPIInt typesize, size, p; 408 409 if (type == MPI_DATATYPE_NULL) return 0; 410 PetscCallMPI(MPI_Comm_size(comm, &size)); 411 PetscCallMPI(MPI_Type_size(type, &typesize)); 412 for (p = 0; p < size; ++p) *length += (PetscLogDouble)(counts[p] * typesize); 413 return 0; 414 } 415 416 static inline PetscErrorCode PetscMPITypeSizeCount(PetscInt n, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length) { 417 PetscMPIInt typesize, p; 418 419 if (type == MPI_DATATYPE_NULL) return 0; 420 PetscCallMPI(MPI_Type_size(type, &typesize)); 421 for (p = 0; p < n; ++p) *length += (PetscLogDouble)(counts[p] * typesize); 422 return 0; 423 } 424 425 /* 426 Returns 1 if the communicator is parallel else zero 427 */ 428 static inline int PetscMPIParallelComm(MPI_Comm comm) { 429 PetscMPIInt size; 430 MPI_Comm_size(comm, &size); 431 return size > 1; 432 } 433 434 #define MPI_Irecv(buf, count, datatype, source, tag, comm, request) ((petsc_irecv_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || MPI_Irecv((buf), (count), (datatype), (source), (tag), (comm), (request))) 435 436 #define MPI_Irecv_c(buf, count, datatype, source, tag, comm, request) ((petsc_irecv_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || MPI_Irecv_c((buf), (count), (datatype), (source), (tag), (comm), (request))) 437 438 #define MPI_Isend(buf, count, datatype, dest, tag, comm, request) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || MPI_Isend((buf), (count), (datatype), (dest), (tag), (comm), (request))) 439 440 #define MPI_Isend_c(buf, count, datatype, dest, tag, comm, request) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || MPI_Isend_c((buf), (count), (datatype), (dest), (tag), (comm), (request))) 441 442 #define MPI_Startall_irecv(count, datatype, number, requests) ((petsc_irecv_ct += (PetscLogDouble)(number), 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || ((number) && MPI_Startall((number), (requests)))) 443 444 #define MPI_Startall_isend(count, datatype, number, requests) ((petsc_isend_ct += (PetscLogDouble)(number), 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || ((number) && MPI_Startall((number), (requests)))) 445 446 #define MPI_Start_isend(count, datatype, requests) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_isend_len)) || MPI_Start((requests))) 447 448 #define MPI_Recv(buf, count, datatype, source, tag, comm, status) ((petsc_recv_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len)) || MPI_Recv((buf), (count), (datatype), (source), (tag), (comm), (status))) 449 450 #define MPI_Recv_c(buf, count, datatype, source, tag, comm, status) ((petsc_recv_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len)) || MPI_Recv_c((buf), (count), (datatype), (source), (tag), (comm), (status))) 451 452 #define MPI_Send(buf, count, datatype, dest, tag, comm) ((petsc_send_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_send_len)) || MPI_Send((buf), (count), (datatype), (dest), (tag), (comm))) 453 454 #define MPI_Send_c(buf, count, datatype, dest, tag, comm) ((petsc_send_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_send_len)) || MPI_Send_c((buf), (count), (datatype), (dest), (tag), (comm))) 455 456 #define MPI_Wait(request, status) ((petsc_wait_ct++, petsc_sum_of_waits_ct++, 0) || MPI_Wait((request), (status))) 457 458 #define MPI_Waitany(a, b, c, d) ((petsc_wait_any_ct++, petsc_sum_of_waits_ct++, 0) || MPI_Waitany((a), (b), (c), (d))) 459 460 #define MPI_Waitall(count, array_of_requests, array_of_statuses) ((petsc_wait_all_ct++, petsc_sum_of_waits_ct += (PetscLogDouble)(count), 0) || MPI_Waitall((count), (array_of_requests), (array_of_statuses))) 461 462 #define MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm) (petsc_allreduce_ct += PetscMPIParallelComm((comm)), MPI_Allreduce((sendbuf), (recvbuf), (count), (datatype), (op), (comm))) 463 464 #define MPI_Bcast(buffer, count, datatype, root, comm) ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || MPI_Bcast((buffer), (count), (datatype), (root), (comm))) 465 466 #define MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm) ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || MPI_Reduce_scatter_block((sendbuf), (recvbuf), (recvcount), (datatype), (op), (comm))) 467 468 #define MPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) \ 469 ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Alltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm))) 470 471 #define MPI_Alltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm) \ 472 ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len)) || MPI_Alltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm))) 473 474 #define MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Allgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm))) 475 476 #define MPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm) \ 477 ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Allgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm))) 478 479 #define MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \ 480 ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Gather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm))) 481 482 #define MPI_Gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm) \ 483 ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Gatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm))) 484 485 #define MPI_Scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \ 486 ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Scatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm))) 487 488 #define MPI_Scatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm) \ 489 ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Scatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm))) 490 491 #define MPI_Ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \ 492 ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Ialltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request))) 493 494 #define MPI_Ialltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm, request) \ 495 ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len)) || MPI_Ialltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm), (request))) 496 497 #define MPI_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \ 498 ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Iallgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request))) 499 500 #define MPI_Iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm, request) \ 501 ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Iallgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm), (request))) 502 503 #define MPI_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \ 504 ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Igather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request))) 505 506 #define MPI_Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm, request) \ 507 ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Igatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm), (request))) 508 509 #define MPI_Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \ 510 ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Iscatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request))) 511 512 #define MPI_Iscatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \ 513 ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Iscatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request))) 514 515 #else 516 517 #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests))) 518 519 #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests))) 520 521 #define MPI_Start_isend(count, datatype, requests) (MPI_Start((requests))) 522 523 #endif /* !MPIUNI_H && ! PETSC_HAVE_BROKEN_RECURSIVE_MACRO */ 524 525 #else /* ---Logging is turned off --------------------------------------------*/ 526 527 #define PetscLogMemory PETSC_FALSE 528 529 #define PetscLogFlops(n) 0 530 #define PetscGetFlops(a) (*(a) = 0.0, 0) 531 532 #define PetscLogStageRegister(a, b) 0 533 #define PetscLogStagePush(a) 0 534 #define PetscLogStagePop() 0 535 #define PetscLogStageSetActive(a, b) 0 536 #define PetscLogStageGetActive(a, b) 0 537 #define PetscLogStageGetVisible(a, b) 0 538 #define PetscLogStageSetVisible(a, b) 0 539 #define PetscLogStageGetId(a, b) (*(b) = 0, 0) 540 541 #define PetscLogEventRegister(a, b, c) 0 542 #define PetscLogEventSetCollective(a, b) 0 543 #define PetscLogEventIncludeClass(a) 0 544 #define PetscLogEventExcludeClass(a) 0 545 #define PetscLogEventActivate(a) 0 546 #define PetscLogEventDeactivate(a) 0 547 #define PetscLogEventDeactivatePush(a) 0 548 #define PetscLogEventDeactivatePop(a) 0 549 #define PetscLogEventActivateClass(a) 0 550 #define PetscLogEventDeactivateClass(a) 0 551 #define PetscLogEventSetActiveAll(a, b) 0 552 #define PetscLogEventGetId(a, b) (*(b) = 0, 0) 553 #define PetscLogEventGetPerfInfo(a, b, c) 0 554 #define PetscLogEventSetDof(a, b, c) 0 555 #define PetscLogEventSetError(a, b, c) 0 556 557 #define PetscLogPLB 0 558 #define PetscLogPLE 0 559 #define PetscLogPHC 0 560 #define PetscLogPHD 0 561 562 #define PetscLogObjectParents(p, n, c) 0 563 #define PetscLogObjectCreate(h) 0 564 #define PetscLogObjectDestroy(h) 0 565 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3); 566 567 #define PetscLogDefaultBegin() 0 568 #define PetscLogAllBegin() 0 569 #define PetscLogNestedBegin() 0 570 #define PetscLogTraceBegin(file) 0 571 #define PetscLogActions(a) 0 572 #define PetscLogObjects(a) 0 573 #define PetscLogSetThreshold(a, b) 0 574 #define PetscLogSet(lb, le) 0 575 #define PetscLogIsActive(flag) (*(flag) = PETSC_FALSE, 0) 576 577 #define PetscLogView(viewer) 0 578 #define PetscLogViewFromOptions() 0 579 #define PetscLogDump(c) 0 580 581 #define PetscLogEventSync(e, comm) 0 582 #define PetscLogEventBegin(e, o1, o2, o3, o4) 0 583 #define PetscLogEventEnd(e, o1, o2, o3, o4) 0 584 585 /* If PETSC_USE_LOG is NOT defined, these still need to be! */ 586 #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall(number, requests)) 587 #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall(number, requests)) 588 #define MPI_Start_isend(count, datatype, requests) MPI_Start(requests) 589 590 #endif /* PETSC_USE_LOG */ 591 592 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_DEVICE) 593 594 /* Global GPU counters */ 595 PETSC_EXTERN PetscLogDouble petsc_ctog_ct; 596 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct; 597 PETSC_EXTERN PetscLogDouble petsc_ctog_sz; 598 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz; 599 PETSC_EXTERN PetscLogDouble petsc_ctog_ct_scalar; 600 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct_scalar; 601 PETSC_EXTERN PetscLogDouble petsc_ctog_sz_scalar; 602 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz_scalar; 603 PETSC_EXTERN PetscLogDouble petsc_gflops; 604 PETSC_EXTERN PetscLogDouble petsc_gtime; 605 606 static inline PetscErrorCode PetscLogCpuToGpu(PetscLogDouble size) { 607 PetscFunctionBegin; 608 petsc_ctog_ct += 1; 609 petsc_ctog_sz += size; 610 PetscFunctionReturn(0); 611 } 612 613 static inline PetscErrorCode PetscLogGpuToCpu(PetscLogDouble size) { 614 PetscFunctionBegin; 615 petsc_gtoc_ct += 1; 616 petsc_gtoc_sz += size; 617 PetscFunctionReturn(0); 618 } 619 620 static inline PetscErrorCode PetscLogCpuToGpuScalar(PetscLogDouble size) { 621 PetscFunctionBegin; 622 petsc_ctog_ct_scalar += 1; 623 petsc_ctog_sz_scalar += size; 624 PetscFunctionReturn(0); 625 } 626 627 static inline PetscErrorCode PetscLogGpuToCpuScalar(PetscLogDouble size) { 628 PetscFunctionBegin; 629 petsc_gtoc_ct_scalar += 1; 630 petsc_gtoc_sz_scalar += size; 631 PetscFunctionReturn(0); 632 } 633 634 /*@C 635 PetscLogGpuFlops - Log how many flops are performed in a calculation on the device 636 637 Input Parameter: 638 . flops - the number of flops 639 640 Notes: 641 To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double, 642 not an integer. Use PetscLogFlops(4.0*n) not PetscLogFlops(4*n) 643 644 The values are also added to the total flop count for the MPI rank that is set with `PetscLogFlops()`; hence the number of flops 645 just on the CPU would be the value from set from `PetscLogFlops()` minus the value set from `PetscLogGpuFlops()` 646 647 Level: intermediate 648 649 .seealso: `PetscLogView()`, `PetscLogFlops()`, `PetscLogGpuTimeBegin()`, `PetscLogGpuTimeEnd()` 650 @*/ 651 static inline PetscErrorCode PetscLogGpuFlops(PetscLogDouble n) { 652 PetscFunctionBegin; 653 PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops"); 654 petsc_TotalFlops += PETSC_FLOPS_PER_OP * n; 655 petsc_gflops += PETSC_FLOPS_PER_OP * n; 656 PetscFunctionReturn(0); 657 } 658 659 static inline PetscErrorCode PetscLogGpuTimeAdd(PetscLogDouble t) { 660 PetscFunctionBegin; 661 petsc_gtime += t; 662 PetscFunctionReturn(0); 663 } 664 665 PETSC_EXTERN PetscErrorCode PetscLogGpuTime(void); 666 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeBegin(void); 667 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeEnd(void); 668 669 #else 670 671 #define PetscLogCpuToGpu(a) 0 672 #define PetscLogGpuToCpu(a) 0 673 #define PetscLogCpuToGpuScalar(a) 0 674 #define PetscLogGpuToCpuScalar(a) 0 675 #define PetscLogGpuFlops(a) 0 676 #define PetscLogGpuTime() 0 677 #define PetscLogGpuTimeAdd(a) 0 678 #define PetscLogGpuTimeBegin() 0 679 #define PetscLogGpuTimeEnd() 0 680 681 #endif /* PETSC_USE_LOG && PETSC_HAVE_DEVICE */ 682 683 #define PetscPreLoadBegin(flag, name) \ 684 do { \ 685 PetscBool PetscPreLoading = flag; \ 686 int PetscPreLoadMax, PetscPreLoadIt; \ 687 PetscLogStage _stageNum; \ 688 PetscCall(PetscOptionsGetBool(NULL, NULL, "-preload", &PetscPreLoading, NULL)); \ 689 PetscPreLoadMax = (int)(PetscPreLoading); \ 690 PetscPreLoadingUsed = PetscPreLoading ? PETSC_TRUE : PetscPreLoadingUsed; \ 691 for (PetscPreLoadIt = 0; PetscPreLoadIt <= PetscPreLoadMax; PetscPreLoadIt++) { \ 692 PetscPreLoadingOn = PetscPreLoading; \ 693 PetscCall(PetscBarrier(NULL)); \ 694 if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \ 695 else PetscCall(PetscLogStageRegister(name, &_stageNum)); \ 696 PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \ 697 PetscCall(PetscLogStagePush(_stageNum)); 698 699 #define PetscPreLoadEnd() \ 700 PetscCall(PetscLogStagePop()); \ 701 PetscPreLoading = PETSC_FALSE; \ 702 } \ 703 } \ 704 while (0) 705 706 #define PetscPreLoadStage(name) \ 707 do { \ 708 PetscCall(PetscLogStagePop()); \ 709 if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \ 710 else PetscCall(PetscLogStageRegister(name, &_stageNum)); \ 711 PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \ 712 PetscCall(PetscLogStagePush(_stageNum)); \ 713 } while (0) 714 715 /* some vars for logging */ 716 PETSC_EXTERN PetscBool PetscPreLoadingUsed; /* true if we are or have done preloading */ 717 PETSC_EXTERN PetscBool PetscPreLoadingOn; /* true if we are currently in a preloading calculation */ 718 719 #endif 720