1 /* 2 Defines profile/logging in PETSc. 3 */ 4 #ifndef PETSCLOG_H 5 #define PETSCLOG_H 6 7 #include <petscsys.h> 8 #include <petsctime.h> 9 10 /* SUBMANSEC = Sys */ 11 12 /* General logging of information; different from event logging */ 13 PETSC_EXTERN PetscErrorCode PetscInfo_Private(const char[], PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(3, 4); 14 #if defined(PETSC_USE_INFO) 15 #define PetscInfo(A, ...) PetscInfo_Private(PETSC_FUNCTION_NAME, ((PetscObject)A), __VA_ARGS__) 16 #else 17 #define PetscInfo(A, ...) 0 18 #endif 19 20 #define PetscInfo1(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 21 #define PetscInfo2(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 22 #define PetscInfo3(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 23 #define PetscInfo4(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 24 #define PetscInfo5(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 25 #define PetscInfo6(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 26 #define PetscInfo7(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 27 #define PetscInfo8(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 28 #define PetscInfo9(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 29 30 /*E 31 PetscInfoCommFlag - Describes the method by which to filter `PetscInfo()` by communicator size 32 33 Used as an input for `PetscInfoSetFilterCommSelf()` 34 35 $ `PETSC_INFO_COMM_ALL` - Default uninitialized value. `PetscInfo()` will not filter based on 36 communicator size (i.e. will print for all communicators) 37 $ `PETSC_INFO_COMM_NO_SELF` - `PetscInfo()` will NOT print for communicators with size = 1 (i.e. *_COMM_SELF) 38 $ `PETSC_INFO_COMM_ONLY_SELF` - `PetscInfo()` will ONLY print for communicators with size = 1 39 40 Level: intermediate 41 42 .seealso: `PetscInfo()`, `PetscInfoSetFromOptions()`, `PetscInfoSetFilterCommSelf()` 43 E*/ 44 typedef enum { 45 PETSC_INFO_COMM_ALL = -1, 46 PETSC_INFO_COMM_NO_SELF = 0, 47 PETSC_INFO_COMM_ONLY_SELF = 1 48 } PetscInfoCommFlag; 49 50 PETSC_EXTERN const char *const PetscInfoCommFlags[]; 51 PETSC_EXTERN PetscErrorCode PetscInfoDeactivateClass(PetscClassId); 52 PETSC_EXTERN PetscErrorCode PetscInfoActivateClass(PetscClassId); 53 PETSC_EXTERN PetscErrorCode PetscInfoEnabled(PetscClassId, PetscBool *); 54 PETSC_EXTERN PetscErrorCode PetscInfoAllow(PetscBool); 55 PETSC_EXTERN PetscErrorCode PetscInfoSetFile(const char[], const char[]); 56 PETSC_EXTERN PetscErrorCode PetscInfoGetFile(char **, FILE **); 57 PETSC_EXTERN PetscErrorCode PetscInfoSetClasses(PetscBool, PetscInt, const char *const *); 58 PETSC_EXTERN PetscErrorCode PetscInfoGetClass(const char *, PetscBool *); 59 PETSC_EXTERN PetscErrorCode PetscInfoGetInfo(PetscBool *, PetscBool *, PetscBool *, PetscBool *, PetscInfoCommFlag *); 60 PETSC_EXTERN PetscErrorCode PetscInfoProcessClass(const char[], PetscInt, const PetscClassId[]); 61 PETSC_EXTERN PetscErrorCode PetscInfoSetFilterCommSelf(PetscInfoCommFlag); 62 PETSC_EXTERN PetscErrorCode PetscInfoSetFromOptions(PetscOptions); 63 PETSC_EXTERN PetscErrorCode PetscInfoDestroy(void); 64 PETSC_EXTERN PetscBool PetscLogPrintInfo; /* if true, indicates PetscInfo() is turned on */ 65 66 /*MC 67 PetscLogEvent - id used to identify PETSc or user events which timed portions (blocks of executable) 68 code. 69 70 Level: intermediate 71 72 .seealso: `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStage` 73 M*/ 74 typedef int PetscLogEvent; 75 76 /*MC 77 PetscLogStage - id used to identify user stages (phases, sections) of runs - for logging 78 79 Level: intermediate 80 81 .seealso: `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEvent` 82 M*/ 83 typedef int PetscLogStage; 84 85 #define PETSC_EVENT 1311311 86 PETSC_EXTERN PetscLogEvent PETSC_LARGEST_EVENT; 87 88 /* Global flop counter */ 89 PETSC_EXTERN PetscLogDouble petsc_TotalFlops; 90 PETSC_EXTERN PetscLogDouble petsc_tmp_flops; 91 92 /* We must make the following structures available to access the event 93 activation flags in the PetscLogEventBegin/End() macros. These are not part of the PETSc public 94 API and are not intended to be used by other parts of PETSc or by users. 95 96 The code that manipulates these structures is in src/sys/logging/utils. 97 */ 98 typedef struct _n_PetscIntStack *PetscIntStack; 99 100 /* -----------------------------------------------------------------------------------------------------*/ 101 /* 102 PetscClassRegInfo, PetscClassPerfInfo - Each class has two data structures associated with it. The first has 103 static information about it, the second collects statistics on how many objects of the class are created, 104 how much memory they use, etc. 105 106 PetscClassRegLog, PetscClassPerfLog - arrays of the PetscClassRegInfo and PetscClassPerfInfo for all classes. 107 */ 108 typedef struct { 109 char *name; /* The class name */ 110 PetscClassId classid; /* The integer identifying this class */ 111 } PetscClassRegInfo; 112 113 typedef struct { 114 PetscClassId id; /* The integer identifying this class */ 115 int creations; /* The number of objects of this class created */ 116 int destructions; /* The number of objects of this class destroyed */ 117 PetscLogDouble mem; /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */ 118 PetscLogDouble descMem; /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */ 119 } PetscClassPerfInfo; 120 121 typedef struct _n_PetscClassRegLog *PetscClassRegLog; 122 struct _n_PetscClassRegLog { 123 int numClasses; /* The number of classes registered */ 124 int maxClasses; /* The maximum number of classes */ 125 PetscClassRegInfo *classInfo; /* The structure for class information (classids are monotonicly increasing) */ 126 }; 127 128 typedef struct _n_PetscClassPerfLog *PetscClassPerfLog; 129 struct _n_PetscClassPerfLog { 130 int numClasses; /* The number of logging classes */ 131 int maxClasses; /* The maximum number of classes */ 132 PetscClassPerfInfo *classInfo; /* The structure for class information (classids are monotonicly increasing) */ 133 }; 134 /* -----------------------------------------------------------------------------------------------------*/ 135 /* 136 PetscEventRegInfo, PetscEventPerfInfo - Each event has two data structures associated with it. The first has 137 static information about it, the second collects statistics on how many times the event is used, how 138 much time it takes, etc. 139 140 PetscEventRegLog, PetscEventPerfLog - an array of all PetscEventRegInfo and PetscEventPerfInfo for all events. There is one 141 of these for each stage. 142 143 */ 144 typedef struct { 145 char *name; /* The name of this event */ 146 PetscClassId classid; /* The class the event is associated with */ 147 PetscBool collective; /* Flag this event as collective */ 148 #if defined(PETSC_HAVE_MPE) 149 int mpe_id_begin; /* MPE IDs that define the event */ 150 int mpe_id_end; 151 #endif 152 } PetscEventRegInfo; 153 154 typedef struct { 155 int id; /* The integer identifying this event */ 156 PetscBool active; /* The flag to activate logging */ 157 PetscBool visible; /* The flag to print info in summary */ 158 int depth; /* The nesting depth of the event call */ 159 int count; /* The number of times this event was executed */ 160 PetscLogDouble flops, flops2, flopsTmp; /* The flops and flops^2 used in this event */ 161 PetscLogDouble time, time2, timeTmp; /* The time and time^2 taken for this event */ 162 PetscLogDouble syncTime; /* The synchronization barrier time */ 163 PetscLogDouble dof[8]; /* The number of degrees of freedom associated with this event */ 164 PetscLogDouble errors[8]; /* The errors (user-defined) associated with this event */ 165 PetscLogDouble numMessages; /* The number of messages in this event */ 166 PetscLogDouble messageLength; /* The total message lengths in this event */ 167 PetscLogDouble numReductions; /* The number of reductions in this event */ 168 PetscLogDouble memIncrease; /* How much the resident memory has increased in this event */ 169 PetscLogDouble mallocIncrease; /* How much the maximum malloced space has increased in this event */ 170 PetscLogDouble mallocSpace; /* How much the space was malloced and kept during this event */ 171 PetscLogDouble mallocIncreaseEvent; /* Maximum of the high water mark with in event minus memory available at the end of the event */ 172 #if defined(PETSC_HAVE_DEVICE) 173 PetscLogDouble CpuToGpuCount; /* The total number of CPU to GPU copies */ 174 PetscLogDouble GpuToCpuCount; /* The total number of GPU to CPU copies */ 175 PetscLogDouble CpuToGpuSize; /* The total size of CPU to GPU copies */ 176 PetscLogDouble GpuToCpuSize; /* The total size of GPU to CPU copies */ 177 PetscLogDouble GpuFlops; /* The flops done on a GPU in this event */ 178 PetscLogDouble GpuTime; /* The time spent on a GPU in this event */ 179 #endif 180 } PetscEventPerfInfo; 181 182 typedef struct _n_PetscEventRegLog *PetscEventRegLog; 183 struct _n_PetscEventRegLog { 184 int numEvents; /* The number of registered events */ 185 int maxEvents; /* The maximum number of events */ 186 PetscEventRegInfo *eventInfo; /* The registration information for each event */ 187 }; 188 189 typedef struct _n_PetscEventPerfLog *PetscEventPerfLog; 190 struct _n_PetscEventPerfLog { 191 int numEvents; /* The number of logging events */ 192 int maxEvents; /* The maximum number of events */ 193 PetscEventPerfInfo *eventInfo; /* The performance information for each event */ 194 }; 195 /* ------------------------------------------------------------------------------------------------------------*/ 196 /* 197 PetscStageInfo - Contains all the information about a particular stage. 198 199 PetscStageLog - An array of PetscStageInfo for each registered stage. There is a single one of these in the code. 200 */ 201 typedef struct _PetscStageInfo { 202 char *name; /* The stage name */ 203 PetscBool used; /* The stage was pushed on this processor */ 204 PetscEventPerfInfo perfInfo; /* The stage performance information */ 205 PetscEventPerfLog eventLog; /* The event information for this stage */ 206 PetscClassPerfLog classLog; /* The class information for this stage */ 207 } PetscStageInfo; 208 209 typedef struct _n_PetscStageLog *PetscStageLog; 210 struct _n_PetscStageLog { 211 int numStages; /* The number of registered stages */ 212 int maxStages; /* The maximum number of stages */ 213 PetscIntStack stack; /* The stack for active stages */ 214 int curStage; /* The current stage (only used in macros so we don't call PetscIntStackTop) */ 215 PetscStageInfo *stageInfo; /* The information for each stage */ 216 PetscEventRegLog eventLog; /* The registered events */ 217 PetscClassRegLog classLog; /* The registered classes */ 218 }; 219 /* -----------------------------------------------------------------------------------------------------*/ 220 221 PETSC_DEPRECATED_FUNCTION("PetscLogObjectParent() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectParent(PetscObject o, PetscObject p) 222 { 223 (void)o; 224 (void)p; 225 return 0; 226 } 227 228 PETSC_DEPRECATED_FUNCTION("PetscLogObjectMemory() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectMemory(PetscObject o, PetscLogDouble m) 229 { 230 (void)o; 231 (void)m; 232 return 0; 233 } 234 235 #if defined(PETSC_USE_LOG) /* --- Logging is turned on --------------------------------*/ 236 PETSC_EXTERN PetscStageLog petsc_stageLog; 237 PETSC_EXTERN PetscErrorCode PetscLogGetStageLog(PetscStageLog *); 238 PETSC_EXTERN PetscErrorCode PetscStageLogGetCurrent(PetscStageLog, int *); 239 PETSC_EXTERN PetscErrorCode PetscStageLogGetEventPerfLog(PetscStageLog, int, PetscEventPerfLog *); 240 241 /* 242 Flop counting: We count each arithmetic operation (e.g., addition, multiplication) separately. 243 244 For the complex numbers version, note that 245 1 complex addition = 2 flops 246 1 complex multiplication = 6 flops, 247 where we define 1 flop as that for a double precision scalar. We roughly approximate 248 flop counting for complex numbers by multiplying the total flops by 4; this corresponds 249 to the assumption that we're counting mostly additions and multiplications -- and 250 roughly the same number of each. More accurate counting could be done by distinguishing 251 among the various arithmetic operations. 252 */ 253 254 #if defined(PETSC_USE_COMPLEX) 255 #define PETSC_FLOPS_PER_OP 4.0 256 #else 257 #define PETSC_FLOPS_PER_OP 1.0 258 #endif 259 260 /*@C 261 PetscLogFlops - Log how many flops are performed in a calculation 262 263 Input Parameter: 264 . flops - the number of flops 265 266 Notes: 267 To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double, 268 not an integer. Use PetscLogFlops(4.0*n) not PetscLogFlops(4*n) 269 270 Level: intermediate 271 272 .seealso: `PetscLogView()`, `PetscLogGpuFlops()` 273 @*/ 274 275 static inline PetscErrorCode PetscLogFlops(PetscLogDouble n) 276 { 277 PetscFunctionBegin; 278 #if defined(PETSC_USE_DEBUG) 279 PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops"); 280 #endif 281 petsc_TotalFlops += PETSC_FLOPS_PER_OP * n; 282 PetscFunctionReturn(0); 283 } 284 285 PETSC_EXTERN PetscErrorCode PetscGetFlops(PetscLogDouble *); 286 287 #if defined(PETSC_HAVE_MPE) 288 PETSC_EXTERN PetscErrorCode PetscLogMPEBegin(void); 289 PETSC_EXTERN PetscErrorCode PetscLogMPEDump(const char[]); 290 #endif 291 292 PETSC_EXTERN PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 293 PETSC_EXTERN PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 294 PETSC_EXTERN PetscErrorCode (*PetscLogPHC)(PetscObject); 295 PETSC_EXTERN PetscErrorCode (*PetscLogPHD)(PetscObject); 296 297 #define PetscLogObjectParents(p, n, d) PetscMacroReturnStandard(for (int _i = 0; _i < (n); ++_i) PetscCall(PetscLogObjectParent((PetscObject)(p), (PetscObject)(d)[_i]));) 298 #define PetscLogObjectCreate(h) ((PetscLogPHC) ? (*PetscLogPHC)((PetscObject)(h)) : 0) 299 #define PetscLogObjectDestroy(h) ((PetscLogPHD) ? (*PetscLogPHD)((PetscObject)(h)) : 0) 300 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3); 301 302 /* Initialization functions */ 303 PETSC_EXTERN PetscErrorCode PetscLogDefaultBegin(void); 304 PETSC_EXTERN PetscErrorCode PetscLogAllBegin(void); 305 PETSC_EXTERN PetscErrorCode PetscLogNestedBegin(void); 306 PETSC_EXTERN PetscErrorCode PetscLogTraceBegin(FILE *); 307 PETSC_EXTERN PetscErrorCode PetscLogActions(PetscBool); 308 PETSC_EXTERN PetscErrorCode PetscLogObjects(PetscBool); 309 PETSC_EXTERN PetscErrorCode PetscLogSetThreshold(PetscLogDouble, PetscLogDouble *); 310 PETSC_EXTERN PetscErrorCode PetscLogSet(PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject)); 311 312 /* Output functions */ 313 PETSC_EXTERN PetscErrorCode PetscLogView(PetscViewer); 314 PETSC_EXTERN PetscErrorCode PetscLogViewFromOptions(void); 315 PETSC_EXTERN PetscErrorCode PetscLogDump(const char[]); 316 317 /* Status checking functions */ 318 PETSC_EXTERN PetscErrorCode PetscLogIsActive(PetscBool *); 319 320 /* Stage functions */ 321 PETSC_EXTERN PetscErrorCode PetscLogStageRegister(const char[], PetscLogStage *); 322 PETSC_EXTERN PetscErrorCode PetscLogStagePush(PetscLogStage); 323 PETSC_EXTERN PetscErrorCode PetscLogStagePop(void); 324 PETSC_EXTERN PetscErrorCode PetscLogStageSetActive(PetscLogStage, PetscBool); 325 PETSC_EXTERN PetscErrorCode PetscLogStageGetActive(PetscLogStage, PetscBool *); 326 PETSC_EXTERN PetscErrorCode PetscLogStageSetVisible(PetscLogStage, PetscBool); 327 PETSC_EXTERN PetscErrorCode PetscLogStageGetVisible(PetscLogStage, PetscBool *); 328 PETSC_EXTERN PetscErrorCode PetscLogStageGetId(const char[], PetscLogStage *); 329 330 /* Event functions */ 331 PETSC_EXTERN PetscErrorCode PetscLogEventRegister(const char[], PetscClassId, PetscLogEvent *); 332 PETSC_EXTERN PetscErrorCode PetscLogEventSetCollective(PetscLogEvent, PetscBool); 333 PETSC_EXTERN PetscErrorCode PetscLogEventIncludeClass(PetscClassId); 334 PETSC_EXTERN PetscErrorCode PetscLogEventExcludeClass(PetscClassId); 335 PETSC_EXTERN PetscErrorCode PetscLogEventActivate(PetscLogEvent); 336 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivate(PetscLogEvent); 337 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent); 338 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent); 339 PETSC_EXTERN PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent, PetscBool); 340 PETSC_EXTERN PetscErrorCode PetscLogEventActivateClass(PetscClassId); 341 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivateClass(PetscClassId); 342 PETSC_EXTERN PetscErrorCode PetscLogEventGetId(const char[], PetscLogEvent *); 343 PETSC_EXTERN PetscErrorCode PetscLogEventGetPerfInfo(int, PetscLogEvent, PetscEventPerfInfo *); 344 PETSC_EXTERN PetscErrorCode PetscLogEventSetDof(PetscLogEvent, PetscInt, PetscLogDouble); 345 PETSC_EXTERN PetscErrorCode PetscLogEventSetError(PetscLogEvent, PetscInt, PetscLogDouble); 346 PETSC_EXTERN PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent); 347 PETSC_EXTERN PetscErrorCode PetscLogPopCurrentEvent_Internal(void); 348 349 /* Global counters */ 350 PETSC_EXTERN PetscLogDouble petsc_irecv_ct; 351 PETSC_EXTERN PetscLogDouble petsc_isend_ct; 352 PETSC_EXTERN PetscLogDouble petsc_recv_ct; 353 PETSC_EXTERN PetscLogDouble petsc_send_ct; 354 PETSC_EXTERN PetscLogDouble petsc_irecv_len; 355 PETSC_EXTERN PetscLogDouble petsc_isend_len; 356 PETSC_EXTERN PetscLogDouble petsc_recv_len; 357 PETSC_EXTERN PetscLogDouble petsc_send_len; 358 PETSC_EXTERN PetscLogDouble petsc_allreduce_ct; 359 PETSC_EXTERN PetscLogDouble petsc_gather_ct; 360 PETSC_EXTERN PetscLogDouble petsc_scatter_ct; 361 PETSC_EXTERN PetscLogDouble petsc_wait_ct; 362 PETSC_EXTERN PetscLogDouble petsc_wait_any_ct; 363 PETSC_EXTERN PetscLogDouble petsc_wait_all_ct; 364 PETSC_EXTERN PetscLogDouble petsc_sum_of_waits_ct; 365 366 PETSC_EXTERN PetscBool PetscLogMemory; 367 368 PETSC_EXTERN PetscBool PetscLogSyncOn; /* true if logging synchronization is enabled */ 369 PETSC_EXTERN PetscErrorCode PetscLogEventSynchronize(PetscLogEvent, MPI_Comm); 370 371 #define PetscLogEventSync(e, comm) \ 372 (((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? PetscLogEventSynchronize((e), (comm)) : 0)) 373 374 #define PetscLogEventBegin(e, o1, o2, o3, o4) \ 375 ((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLB)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPushCurrentEvent_Internal(e)) : 0) 376 377 #define PetscLogEventEnd(e, o1, o2, o3, o4) \ 378 ((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (((*PetscLogPLE)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPopCurrentEvent_Internal()) : 0) 379 380 PETSC_EXTERN PetscErrorCode PetscLogEventGetFlops(PetscLogEvent, PetscLogDouble *); 381 PETSC_EXTERN PetscErrorCode PetscLogEventZeroFlops(PetscLogEvent); 382 383 /* 384 These are used internally in the PETSc routines to keep a count of MPI messages and 385 their sizes. 386 387 This does not work for MPI-Uni because our include/petsc/mpiuni/mpi.h file 388 uses macros to defined the MPI operations. 389 390 It does not work correctly from HP-UX because it processes the 391 macros in a way that sometimes it double counts, hence 392 PETSC_HAVE_BROKEN_RECURSIVE_MACRO 393 394 It does not work with Windows because winmpich lacks MPI_Type_size() 395 */ 396 #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) && !defined(PETSC_HAVE_MPI_MISSING_TYPESIZE) 397 /* 398 Logging of MPI activities 399 */ 400 static inline PetscErrorCode PetscMPITypeSize(PetscInt count, MPI_Datatype type, PetscLogDouble *length) 401 { 402 PetscMPIInt typesize; 403 404 if (type == MPI_DATATYPE_NULL) return 0; 405 PetscCallMPI(MPI_Type_size(type, &typesize)); 406 *length += (PetscLogDouble)(count * typesize); 407 return 0; 408 } 409 410 static inline PetscErrorCode PetscMPITypeSizeComm(MPI_Comm comm, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length) 411 { 412 PetscMPIInt typesize, size, p; 413 414 if (type == MPI_DATATYPE_NULL) return 0; 415 PetscCallMPI(MPI_Comm_size(comm, &size)); 416 PetscCallMPI(MPI_Type_size(type, &typesize)); 417 for (p = 0; p < size; ++p) *length += (PetscLogDouble)(counts[p] * typesize); 418 return 0; 419 } 420 421 static inline PetscErrorCode PetscMPITypeSizeCount(PetscInt n, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length) 422 { 423 PetscMPIInt typesize, p; 424 425 if (type == MPI_DATATYPE_NULL) return 0; 426 PetscCallMPI(MPI_Type_size(type, &typesize)); 427 for (p = 0; p < n; ++p) *length += (PetscLogDouble)(counts[p] * typesize); 428 return 0; 429 } 430 431 /* 432 Returns 1 if the communicator is parallel else zero 433 */ 434 static inline int PetscMPIParallelComm(MPI_Comm comm) 435 { 436 PetscMPIInt size; 437 MPI_Comm_size(comm, &size); 438 return size > 1; 439 } 440 441 #define MPI_Irecv(buf, count, datatype, source, tag, comm, request) ((petsc_irecv_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || MPI_Irecv((buf), (count), (datatype), (source), (tag), (comm), (request))) 442 443 #define MPI_Irecv_c(buf, count, datatype, source, tag, comm, request) ((petsc_irecv_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || MPI_Irecv_c((buf), (count), (datatype), (source), (tag), (comm), (request))) 444 445 #define MPI_Isend(buf, count, datatype, dest, tag, comm, request) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || MPI_Isend((buf), (count), (datatype), (dest), (tag), (comm), (request))) 446 447 #define MPI_Isend_c(buf, count, datatype, dest, tag, comm, request) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || MPI_Isend_c((buf), (count), (datatype), (dest), (tag), (comm), (request))) 448 449 #define MPI_Startall_irecv(count, datatype, number, requests) ((petsc_irecv_ct += (PetscLogDouble)(number), 0) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len)) || ((number) && MPI_Startall((number), (requests)))) 450 451 #define MPI_Startall_isend(count, datatype, number, requests) ((petsc_isend_ct += (PetscLogDouble)(number), 0) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len)) || ((number) && MPI_Startall((number), (requests)))) 452 453 #define MPI_Start_isend(count, datatype, requests) ((petsc_isend_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_isend_len)) || MPI_Start((requests))) 454 455 #define MPI_Recv(buf, count, datatype, source, tag, comm, status) ((petsc_recv_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len)) || MPI_Recv((buf), (count), (datatype), (source), (tag), (comm), (status))) 456 457 #define MPI_Recv_c(buf, count, datatype, source, tag, comm, status) ((petsc_recv_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len)) || MPI_Recv_c((buf), (count), (datatype), (source), (tag), (comm), (status))) 458 459 #define MPI_Send(buf, count, datatype, dest, tag, comm) ((petsc_send_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_send_len)) || MPI_Send((buf), (count), (datatype), (dest), (tag), (comm))) 460 461 #define MPI_Send_c(buf, count, datatype, dest, tag, comm) ((petsc_send_ct++, 0) || PetscMPITypeSize((count), (datatype), (&petsc_send_len)) || MPI_Send_c((buf), (count), (datatype), (dest), (tag), (comm))) 462 463 #define MPI_Wait(request, status) ((petsc_wait_ct++, petsc_sum_of_waits_ct++, 0) || MPI_Wait((request), (status))) 464 465 #define MPI_Waitany(a, b, c, d) ((petsc_wait_any_ct++, petsc_sum_of_waits_ct++, 0) || MPI_Waitany((a), (b), (c), (d))) 466 467 #define MPI_Waitall(count, array_of_requests, array_of_statuses) ((petsc_wait_all_ct++, petsc_sum_of_waits_ct += (PetscLogDouble)(count), 0) || MPI_Waitall((count), (array_of_requests), (array_of_statuses))) 468 469 #define MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm) (petsc_allreduce_ct += PetscMPIParallelComm((comm)), MPI_Allreduce((sendbuf), (recvbuf), (count), (datatype), (op), (comm))) 470 471 #define MPI_Bcast(buffer, count, datatype, root, comm) ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || MPI_Bcast((buffer), (count), (datatype), (root), (comm))) 472 473 #define MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm) ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || MPI_Reduce_scatter_block((sendbuf), (recvbuf), (recvcount), (datatype), (op), (comm))) 474 475 #define MPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) \ 476 ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Alltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm))) 477 478 #define MPI_Alltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm) \ 479 ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len)) || MPI_Alltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm))) 480 481 #define MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Allgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm))) 482 483 #define MPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm) \ 484 ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Allgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm))) 485 486 #define MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \ 487 ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Gather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm))) 488 489 #define MPI_Gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm) \ 490 ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Gatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm))) 491 492 #define MPI_Scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \ 493 ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Scatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm))) 494 495 #define MPI_Scatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm) \ 496 ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Scatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm))) 497 498 #define MPI_Ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \ 499 ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Ialltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request))) 500 501 #define MPI_Ialltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm, request) \ 502 ((petsc_allreduce_ct += PetscMPIParallelComm((comm)), 0) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len)) || MPI_Ialltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm), (request))) 503 504 #define MPI_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \ 505 ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Iallgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request))) 506 507 #define MPI_Iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm, request) \ 508 ((petsc_gather_ct += PetscMPIParallelComm((comm)), 0) || MPI_Iallgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm), (request))) 509 510 #define MPI_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \ 511 ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Igather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request))) 512 513 #define MPI_Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm, request) \ 514 ((petsc_gather_ct++, 0) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len)) || MPI_Igatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm), (request))) 515 516 #define MPI_Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \ 517 ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Iscatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request))) 518 519 #define MPI_Iscatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \ 520 ((petsc_scatter_ct++, 0) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len)) || MPI_Iscatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request))) 521 522 #else 523 524 #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests))) 525 526 #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests))) 527 528 #define MPI_Start_isend(count, datatype, requests) (MPI_Start((requests))) 529 530 #endif /* !MPIUNI_H && ! PETSC_HAVE_BROKEN_RECURSIVE_MACRO */ 531 532 #else /* ---Logging is turned off --------------------------------------------*/ 533 534 #define PetscLogMemory PETSC_FALSE 535 536 #define PetscLogFlops(n) 0 537 #define PetscGetFlops(a) (*(a) = 0.0, 0) 538 539 #define PetscLogStageRegister(a, b) 0 540 #define PetscLogStagePush(a) 0 541 #define PetscLogStagePop() 0 542 #define PetscLogStageSetActive(a, b) 0 543 #define PetscLogStageGetActive(a, b) 0 544 #define PetscLogStageGetVisible(a, b) 0 545 #define PetscLogStageSetVisible(a, b) 0 546 #define PetscLogStageGetId(a, b) (*(b) = 0, 0) 547 548 #define PetscLogEventRegister(a, b, c) 0 549 #define PetscLogEventSetCollective(a, b) 0 550 #define PetscLogEventIncludeClass(a) 0 551 #define PetscLogEventExcludeClass(a) 0 552 #define PetscLogEventActivate(a) 0 553 #define PetscLogEventDeactivate(a) 0 554 #define PetscLogEventDeactivatePush(a) 0 555 #define PetscLogEventDeactivatePop(a) 0 556 #define PetscLogEventActivateClass(a) 0 557 #define PetscLogEventDeactivateClass(a) 0 558 #define PetscLogEventSetActiveAll(a, b) 0 559 #define PetscLogEventGetId(a, b) (*(b) = 0, 0) 560 #define PetscLogEventGetPerfInfo(a, b, c) 0 561 #define PetscLogEventSetDof(a, b, c) 0 562 #define PetscLogEventSetError(a, b, c) 0 563 564 #define PetscLogPLB 0 565 #define PetscLogPLE 0 566 #define PetscLogPHC 0 567 #define PetscLogPHD 0 568 569 #define PetscLogObjectParents(p, n, c) 0 570 #define PetscLogObjectCreate(h) 0 571 #define PetscLogObjectDestroy(h) 0 572 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3); 573 574 #define PetscLogDefaultBegin() 0 575 #define PetscLogAllBegin() 0 576 #define PetscLogNestedBegin() 0 577 #define PetscLogTraceBegin(file) 0 578 #define PetscLogActions(a) 0 579 #define PetscLogObjects(a) 0 580 #define PetscLogSetThreshold(a, b) 0 581 #define PetscLogSet(lb, le) 0 582 #define PetscLogIsActive(flag) (*(flag) = PETSC_FALSE, 0) 583 584 #define PetscLogView(viewer) 0 585 #define PetscLogViewFromOptions() 0 586 #define PetscLogDump(c) 0 587 588 #define PetscLogEventSync(e, comm) 0 589 #define PetscLogEventBegin(e, o1, o2, o3, o4) 0 590 #define PetscLogEventEnd(e, o1, o2, o3, o4) 0 591 592 /* If PETSC_USE_LOG is NOT defined, these still need to be! */ 593 #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall(number, requests)) 594 #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall(number, requests)) 595 #define MPI_Start_isend(count, datatype, requests) MPI_Start(requests) 596 597 #endif /* PETSC_USE_LOG */ 598 599 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_DEVICE) 600 601 /* Global GPU counters */ 602 PETSC_EXTERN PetscLogDouble petsc_ctog_ct; 603 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct; 604 PETSC_EXTERN PetscLogDouble petsc_ctog_sz; 605 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz; 606 PETSC_EXTERN PetscLogDouble petsc_ctog_ct_scalar; 607 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct_scalar; 608 PETSC_EXTERN PetscLogDouble petsc_ctog_sz_scalar; 609 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz_scalar; 610 PETSC_EXTERN PetscLogDouble petsc_gflops; 611 PETSC_EXTERN PetscLogDouble petsc_gtime; 612 613 static inline PetscErrorCode PetscLogCpuToGpu(PetscLogDouble size) 614 { 615 PetscFunctionBegin; 616 petsc_ctog_ct += 1; 617 petsc_ctog_sz += size; 618 PetscFunctionReturn(0); 619 } 620 621 static inline PetscErrorCode PetscLogGpuToCpu(PetscLogDouble size) 622 { 623 PetscFunctionBegin; 624 petsc_gtoc_ct += 1; 625 petsc_gtoc_sz += size; 626 PetscFunctionReturn(0); 627 } 628 629 static inline PetscErrorCode PetscLogCpuToGpuScalar(PetscLogDouble size) 630 { 631 PetscFunctionBegin; 632 petsc_ctog_ct_scalar += 1; 633 petsc_ctog_sz_scalar += size; 634 PetscFunctionReturn(0); 635 } 636 637 static inline PetscErrorCode PetscLogGpuToCpuScalar(PetscLogDouble size) 638 { 639 PetscFunctionBegin; 640 petsc_gtoc_ct_scalar += 1; 641 petsc_gtoc_sz_scalar += size; 642 PetscFunctionReturn(0); 643 } 644 645 /*@C 646 PetscLogGpuFlops - Log how many flops are performed in a calculation on the device 647 648 Input Parameter: 649 . flops - the number of flops 650 651 Notes: 652 To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double, 653 not an integer. Use PetscLogFlops(4.0*n) not PetscLogFlops(4*n) 654 655 The values are also added to the total flop count for the MPI rank that is set with `PetscLogFlops()`; hence the number of flops 656 just on the CPU would be the value from set from `PetscLogFlops()` minus the value set from `PetscLogGpuFlops()` 657 658 Level: intermediate 659 660 .seealso: `PetscLogView()`, `PetscLogFlops()`, `PetscLogGpuTimeBegin()`, `PetscLogGpuTimeEnd()` 661 @*/ 662 static inline PetscErrorCode PetscLogGpuFlops(PetscLogDouble n) 663 { 664 PetscFunctionBegin; 665 PetscCheck(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops"); 666 petsc_TotalFlops += PETSC_FLOPS_PER_OP * n; 667 petsc_gflops += PETSC_FLOPS_PER_OP * n; 668 PetscFunctionReturn(0); 669 } 670 671 static inline PetscErrorCode PetscLogGpuTimeAdd(PetscLogDouble t) 672 { 673 PetscFunctionBegin; 674 petsc_gtime += t; 675 PetscFunctionReturn(0); 676 } 677 678 PETSC_EXTERN PetscErrorCode PetscLogGpuTime(void); 679 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeBegin(void); 680 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeEnd(void); 681 682 #else 683 684 #define PetscLogCpuToGpu(a) 0 685 #define PetscLogGpuToCpu(a) 0 686 #define PetscLogCpuToGpuScalar(a) 0 687 #define PetscLogGpuToCpuScalar(a) 0 688 #define PetscLogGpuFlops(a) 0 689 #define PetscLogGpuTime() 0 690 #define PetscLogGpuTimeAdd(a) 0 691 #define PetscLogGpuTimeBegin() 0 692 #define PetscLogGpuTimeEnd() 0 693 694 #endif /* PETSC_USE_LOG && PETSC_HAVE_DEVICE */ 695 696 #define PetscPreLoadBegin(flag, name) \ 697 do { \ 698 PetscBool PetscPreLoading = flag; \ 699 int PetscPreLoadMax, PetscPreLoadIt; \ 700 PetscLogStage _stageNum; \ 701 PetscCall(PetscOptionsGetBool(NULL, NULL, "-preload", &PetscPreLoading, NULL)); \ 702 PetscPreLoadMax = (int)(PetscPreLoading); \ 703 PetscPreLoadingUsed = PetscPreLoading ? PETSC_TRUE : PetscPreLoadingUsed; \ 704 for (PetscPreLoadIt = 0; PetscPreLoadIt <= PetscPreLoadMax; PetscPreLoadIt++) { \ 705 PetscPreLoadingOn = PetscPreLoading; \ 706 PetscCall(PetscBarrier(NULL)); \ 707 if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \ 708 else PetscCall(PetscLogStageRegister(name, &_stageNum)); \ 709 PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \ 710 PetscCall(PetscLogStagePush(_stageNum)); 711 712 #define PetscPreLoadEnd() \ 713 PetscCall(PetscLogStagePop()); \ 714 PetscPreLoading = PETSC_FALSE; \ 715 } \ 716 } \ 717 while (0) 718 719 #define PetscPreLoadStage(name) \ 720 do { \ 721 PetscCall(PetscLogStagePop()); \ 722 if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \ 723 else PetscCall(PetscLogStageRegister(name, &_stageNum)); \ 724 PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \ 725 PetscCall(PetscLogStagePush(_stageNum)); \ 726 } while (0) 727 728 /* some vars for logging */ 729 PETSC_EXTERN PetscBool PetscPreLoadingUsed; /* true if we are or have done preloading */ 730 PETSC_EXTERN PetscBool PetscPreLoadingOn; /* true if we are currently in a preloading calculation */ 731 732 #endif 733