1 /* 2 Defines profile/logging in PETSc. 3 */ 4 #ifndef PETSCLOG_H 5 #define PETSCLOG_H 6 7 #include <petscsys.h> 8 #include <petsctime.h> 9 10 /* SUBMANSEC = Sys */ 11 12 /* General logging of information; different from event logging */ 13 PETSC_EXTERN PetscErrorCode PetscInfo_Private(const char[], PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(3, 4); 14 #if defined(PETSC_USE_INFO) 15 #define PetscInfo(A, ...) PetscInfo_Private(PETSC_FUNCTION_NAME, ((PetscObject)A), __VA_ARGS__) 16 #else 17 #define PetscInfo(A, ...) PETSC_SUCCESS 18 #endif 19 20 #define PetscInfo1(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 21 #define PetscInfo2(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 22 #define PetscInfo3(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 23 #define PetscInfo4(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 24 #define PetscInfo5(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 25 #define PetscInfo6(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 26 #define PetscInfo7(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 27 #define PetscInfo8(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 28 #define PetscInfo9(...) PETSC_DEPRECATED_MACRO("GCC warning \"Use PetscInfo() (since version 3.17)\"") PetscInfo(__VA_ARGS__) 29 30 /*E 31 PetscInfoCommFlag - Describes the method by which to filter `PetscInfo()` by communicator size 32 33 Values: 34 + `PETSC_INFO_COMM_ALL` - Default uninitialized value. `PetscInfo()` will not filter based on 35 communicator size (i.e. will print for all communicators) 36 . `PETSC_INFO_COMM_NO_SELF` - `PetscInfo()` will NOT print for communicators with size = 1 (i.e. *_COMM_SELF) 37 - `PETSC_INFO_COMM_ONLY_SELF` - `PetscInfo()` will ONLY print for communicators with size = 1 38 39 Level: intermediate 40 41 Note: 42 Used as an input for `PetscInfoSetFilterCommSelf()` 43 44 .seealso: `PetscInfo()`, `PetscInfoSetFromOptions()`, `PetscInfoSetFilterCommSelf()` 45 E*/ 46 typedef enum { 47 PETSC_INFO_COMM_ALL = -1, 48 PETSC_INFO_COMM_NO_SELF = 0, 49 PETSC_INFO_COMM_ONLY_SELF = 1 50 } PetscInfoCommFlag; 51 52 PETSC_EXTERN const char *const PetscInfoCommFlags[]; 53 PETSC_EXTERN PetscErrorCode PetscInfoDeactivateClass(PetscClassId); 54 PETSC_EXTERN PetscErrorCode PetscInfoActivateClass(PetscClassId); 55 PETSC_EXTERN PetscErrorCode PetscInfoEnabled(PetscClassId, PetscBool *); 56 PETSC_EXTERN PetscErrorCode PetscInfoAllow(PetscBool); 57 PETSC_EXTERN PetscErrorCode PetscInfoSetFile(const char[], const char[]); 58 PETSC_EXTERN PetscErrorCode PetscInfoGetFile(char **, FILE **); 59 PETSC_EXTERN PetscErrorCode PetscInfoSetClasses(PetscBool, PetscInt, const char *const *); 60 PETSC_EXTERN PetscErrorCode PetscInfoGetClass(const char *, PetscBool *); 61 PETSC_EXTERN PetscErrorCode PetscInfoGetInfo(PetscBool *, PetscBool *, PetscBool *, PetscBool *, PetscInfoCommFlag *); 62 PETSC_EXTERN PetscErrorCode PetscInfoProcessClass(const char[], PetscInt, const PetscClassId[]); 63 PETSC_EXTERN PetscErrorCode PetscInfoSetFilterCommSelf(PetscInfoCommFlag); 64 PETSC_EXTERN PetscErrorCode PetscInfoSetFromOptions(PetscOptions); 65 PETSC_EXTERN PetscErrorCode PetscInfoDestroy(void); 66 PETSC_EXTERN PetscBool PetscLogPrintInfo; /* if true, indicates PetscInfo() is turned on */ 67 68 /*MC 69 PetscLogEvent - id used to identify PETSc or user events which timed portions (blocks of executable) 70 code. 71 72 Level: intermediate 73 74 .seealso: [](ch_profiling), `PetscLogEventRegister()`, `PetscLogEventBegin()`, `PetscLogEventEnd()`, `PetscLogStage` 75 M*/ 76 typedef int PetscLogEvent; 77 78 /*MC 79 PetscLogStage - id used to identify user stages (phases, sections) of runs - for logging 80 81 Level: intermediate 82 83 .seealso: [](ch_profiling), `PetscLogStageRegister()`, `PetscLogStagePush()`, `PetscLogStagePop()`, `PetscLogEvent` 84 M*/ 85 typedef int PetscLogStage; 86 87 #define PETSC_EVENT 1311311 88 PETSC_EXTERN PetscLogEvent PETSC_LARGEST_EVENT; 89 90 /* Handle multithreading */ 91 #if defined(PETSC_HAVE_THREADSAFETY) 92 #if defined(__cplusplus) 93 #define PETSC_TLS thread_local 94 #else 95 #define PETSC_TLS _Thread_local 96 #endif 97 #define PETSC_EXTERN_TLS extern PETSC_TLS PETSC_VISIBILITY_PUBLIC 98 PETSC_EXTERN PetscErrorCode PetscAddLogDouble(PetscLogDouble *, PetscLogDouble *, PetscLogDouble); 99 PETSC_EXTERN PetscErrorCode PetscAddLogDoubleCnt(PetscLogDouble *, PetscLogDouble *, PetscLogDouble *, PetscLogDouble *, PetscLogDouble); 100 #else 101 #define PETSC_EXTERN_TLS PETSC_EXTERN 102 #define PETSC_TLS 103 #define PetscAddLogDouble(a, b, c) ((PetscErrorCode)((*(a) += (c), PETSC_SUCCESS) || ((*(b) += (c)), PETSC_SUCCESS))) 104 #define PetscAddLogDoubleCnt(a, b, c, d, e) ((PetscErrorCode)(PetscAddLogDouble(a, c, 1) || PetscAddLogDouble(b, d, e))) 105 #endif 106 107 /* We must make the following structures available to access the event 108 activation flags in the PetscLogEventBegin/End() macros. These are not part of the PETSc public 109 API and are not intended to be used by other parts of PETSc or by users. 110 111 The code that manipulates these structures is in src/sys/logging/utils. 112 */ 113 typedef struct _n_PetscIntStack *PetscIntStack; 114 115 /* -----------------------------------------------------------------------------------------------------*/ 116 /* 117 PetscClassRegInfo, PetscClassPerfInfo - Each class has two data structures associated with it. The first has 118 static information about it, the second collects statistics on how many objects of the class are created, 119 how much memory they use, etc. 120 121 PetscClassRegLog, PetscClassPerfLog - arrays of the PetscClassRegInfo and PetscClassPerfInfo for all classes. 122 */ 123 typedef struct { 124 char *name; /* The class name */ 125 PetscClassId classid; /* The integer identifying this class */ 126 } PetscClassRegInfo; 127 128 typedef struct { 129 PetscClassId id; /* The integer identifying this class */ 130 int creations; /* The number of objects of this class created */ 131 int destructions; /* The number of objects of this class destroyed */ 132 PetscLogDouble mem; /* The total memory allocated by objects of this class; this is completely wrong and should possibly be removed */ 133 PetscLogDouble descMem; /* The total memory allocated by descendents of these objects; this is completely wrong and should possibly be removed */ 134 } PetscClassPerfInfo; 135 136 typedef struct _n_PetscClassRegLog *PetscClassRegLog; 137 struct _n_PetscClassRegLog { 138 int numClasses; /* The number of classes registered */ 139 int maxClasses; /* The maximum number of classes */ 140 PetscClassRegInfo *classInfo; /* The structure for class information (classids are monotonicly increasing) */ 141 }; 142 143 typedef struct _n_PetscClassPerfLog *PetscClassPerfLog; 144 struct _n_PetscClassPerfLog { 145 int numClasses; /* The number of logging classes */ 146 int maxClasses; /* The maximum number of classes */ 147 PetscClassPerfInfo *classInfo; /* The structure for class information (classids are monotonicly increasing) */ 148 }; 149 /* -----------------------------------------------------------------------------------------------------*/ 150 /* 151 PetscEventRegInfo, PetscEventPerfInfo - Each event has two data structures associated with it. The first has 152 static information about it, the second collects statistics on how many times the event is used, how 153 much time it takes, etc. 154 155 PetscEventRegLog, PetscEventPerfLog - an array of all PetscEventRegInfo and PetscEventPerfInfo for all events. There is one 156 of these for each stage. 157 158 */ 159 typedef struct { 160 char *name; /* The name of this event */ 161 PetscClassId classid; /* The class the event is associated with */ 162 PetscBool collective; /* Flag this event as collective */ 163 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 164 void *timer; /* Associated external tool timer for this event */ 165 #endif 166 #if defined(PETSC_HAVE_MPE) 167 int mpe_id_begin; /* MPE IDs that define the event */ 168 int mpe_id_end; 169 #endif 170 } PetscEventRegInfo; 171 172 typedef struct { 173 int id; /* The integer identifying this event */ 174 PetscBool active; /* The flag to activate logging */ 175 PetscBool visible; /* The flag to print info in summary */ 176 int depth; /* The nesting depth of the event call */ 177 int count; /* The number of times this event was executed */ 178 PetscLogDouble flops; /* The flops used in this event */ 179 PetscLogDouble flops2; /* The square of flops used in this event */ 180 PetscLogDouble flopsTmp; /* The accumulator for flops used in this event */ 181 PetscLogDouble time; /* The time taken for this event */ 182 PetscLogDouble time2; /* The square of time taken for this event */ 183 PetscLogDouble timeTmp; /* The accumulator for time taken for this event */ 184 PetscLogDouble syncTime; /* The synchronization barrier time */ 185 PetscLogDouble dof[8]; /* The number of degrees of freedom associated with this event */ 186 PetscLogDouble errors[8]; /* The errors (user-defined) associated with this event */ 187 PetscLogDouble numMessages; /* The number of messages in this event */ 188 PetscLogDouble messageLength; /* The total message lengths in this event */ 189 PetscLogDouble numReductions; /* The number of reductions in this event */ 190 PetscLogDouble memIncrease; /* How much the resident memory has increased in this event */ 191 PetscLogDouble mallocIncrease; /* How much the maximum malloced space has increased in this event */ 192 PetscLogDouble mallocSpace; /* How much the space was malloced and kept during this event */ 193 PetscLogDouble mallocIncreaseEvent; /* Maximum of the high water mark with in event minus memory available at the end of the event */ 194 #if defined(PETSC_HAVE_DEVICE) 195 PetscLogDouble CpuToGpuCount; /* The total number of CPU to GPU copies */ 196 PetscLogDouble GpuToCpuCount; /* The total number of GPU to CPU copies */ 197 PetscLogDouble CpuToGpuSize; /* The total size of CPU to GPU copies */ 198 PetscLogDouble GpuToCpuSize; /* The total size of GPU to CPU copies */ 199 PetscLogDouble GpuFlops; /* The flops done on a GPU in this event */ 200 PetscLogDouble GpuTime; /* The time spent on a GPU in this event */ 201 #endif 202 } PetscEventPerfInfo; 203 204 typedef struct _n_PetscEventRegLog *PetscEventRegLog; 205 struct _n_PetscEventRegLog { 206 int numEvents; /* The number of registered events */ 207 int maxEvents; /* The maximum number of events */ 208 PetscEventRegInfo *eventInfo; /* The registration information for each event */ 209 }; 210 211 typedef struct _n_PetscEventPerfLog *PetscEventPerfLog; 212 struct _n_PetscEventPerfLog { 213 int numEvents; /* The number of logging events */ 214 int maxEvents; /* The maximum number of events */ 215 PetscEventPerfInfo *eventInfo; /* The performance information for each event */ 216 }; 217 /* ------------------------------------------------------------------------------------------------------------*/ 218 /* 219 PetscStageInfo - Contains all the information about a particular stage. 220 221 PetscStageLog - An array of PetscStageInfo for each registered stage. There is a single one of these in the code. 222 */ 223 typedef struct _PetscStageInfo { 224 char *name; /* The stage name */ 225 PetscBool used; /* The stage was pushed on this processor */ 226 PetscEventPerfInfo perfInfo; /* The stage performance information */ 227 PetscEventPerfLog eventLog; /* The event information for this stage */ 228 PetscClassPerfLog classLog; /* The class information for this stage */ 229 #if defined(PETSC_HAVE_TAU_PERFSTUBS) 230 void *timer; /* Associated external tool timer for this stage */ 231 #endif 232 } PetscStageInfo; 233 234 typedef struct _n_PetscStageLog *PetscStageLog; 235 struct _n_PetscStageLog { 236 int numStages; /* The number of registered stages */ 237 int maxStages; /* The maximum number of stages */ 238 PetscIntStack stack; /* The stack for active stages */ 239 int curStage; /* The current stage (only used in macros so we don't call PetscIntStackTop) */ 240 PetscStageInfo *stageInfo; /* The information for each stage */ 241 PetscEventRegLog eventLog; /* The registered events */ 242 PetscClassRegLog classLog; /* The registered classes */ 243 }; 244 /* -----------------------------------------------------------------------------------------------------*/ 245 246 PETSC_DEPRECATED_FUNCTION("PetscLogObjectParent() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectParent(PetscObject o, PetscObject p) 247 { 248 (void)o; 249 (void)p; 250 return PETSC_SUCCESS; 251 } 252 253 PETSC_DEPRECATED_FUNCTION("PetscLogObjectMemory() is deprecated (since version 3.18)") static inline PetscErrorCode PetscLogObjectMemory(PetscObject o, PetscLogDouble m) 254 { 255 (void)o; 256 (void)m; 257 return PETSC_SUCCESS; 258 } 259 260 #if defined(PETSC_USE_LOG) /* --- Logging is turned on --------------------------------*/ 261 PETSC_EXTERN PetscStageLog petsc_stageLog; 262 PETSC_EXTERN PetscErrorCode PetscLogGetStageLog(PetscStageLog *); 263 PETSC_EXTERN PetscErrorCode PetscStageLogGetCurrent(PetscStageLog, int *); 264 PETSC_EXTERN PetscErrorCode PetscStageLogGetEventPerfLog(PetscStageLog, int, PetscEventPerfLog *); 265 266 PETSC_EXTERN PetscErrorCode PetscGetFlops(PetscLogDouble *); 267 268 #if defined(PETSC_HAVE_MPE) 269 PETSC_EXTERN PetscErrorCode PetscLogMPEBegin(void); 270 PETSC_EXTERN PetscErrorCode PetscLogMPEDump(const char[]); 271 #endif 272 273 PETSC_EXTERN PetscErrorCode (*PetscLogPLB)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 274 PETSC_EXTERN PetscErrorCode (*PetscLogPLE)(PetscLogEvent, int, PetscObject, PetscObject, PetscObject, PetscObject); 275 PETSC_EXTERN PetscErrorCode (*PetscLogPHC)(PetscObject); 276 PETSC_EXTERN PetscErrorCode (*PetscLogPHD)(PetscObject); 277 278 #define PetscLogObjectParents(p, n, d) PetscMacroReturnStandard(for (int _i = 0; _i < (n); ++_i) PetscCall(PetscLogObjectParent((PetscObject)(p), (PetscObject)(d)[_i]));) 279 #define PetscLogObjectCreate(h) ((PetscLogPHC) ? (*PetscLogPHC)((PetscObject)(h)) : PETSC_SUCCESS) 280 #define PetscLogObjectDestroy(h) ((PetscLogPHD) ? (*PetscLogPHD)((PetscObject)(h)) : PETSC_SUCCESS) 281 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3); 282 283 /* Initialization functions */ 284 PETSC_EXTERN PetscErrorCode PetscLogDefaultBegin(void); 285 PETSC_EXTERN PetscErrorCode PetscLogAllBegin(void); 286 PETSC_EXTERN PetscErrorCode PetscLogNestedBegin(void); 287 PETSC_EXTERN PetscErrorCode PetscLogTraceBegin(FILE *); 288 PETSC_EXTERN PetscErrorCode PetscLogActions(PetscBool); 289 PETSC_EXTERN PetscErrorCode PetscLogObjects(PetscBool); 290 PETSC_EXTERN PetscErrorCode PetscLogSetThreshold(PetscLogDouble, PetscLogDouble *); 291 PETSC_EXTERN PetscErrorCode PetscLogSet(PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject), PetscErrorCode (*)(int, int, PetscObject, PetscObject, PetscObject, PetscObject)); 292 293 /* Output functions */ 294 PETSC_EXTERN PetscErrorCode PetscLogView(PetscViewer); 295 PETSC_EXTERN PetscErrorCode PetscLogViewFromOptions(void); 296 PETSC_EXTERN PetscErrorCode PetscLogDump(const char[]); 297 298 /* Status checking functions */ 299 PETSC_EXTERN PetscErrorCode PetscLogIsActive(PetscBool *); 300 301 /* Stage functions */ 302 PETSC_EXTERN PetscErrorCode PetscLogStageRegister(const char[], PetscLogStage *); 303 PETSC_EXTERN PetscErrorCode PetscLogStagePush(PetscLogStage); 304 PETSC_EXTERN PetscErrorCode PetscLogStagePop(void); 305 PETSC_EXTERN PetscErrorCode PetscLogStageSetActive(PetscLogStage, PetscBool); 306 PETSC_EXTERN PetscErrorCode PetscLogStageGetActive(PetscLogStage, PetscBool *); 307 PETSC_EXTERN PetscErrorCode PetscLogStageSetVisible(PetscLogStage, PetscBool); 308 PETSC_EXTERN PetscErrorCode PetscLogStageGetVisible(PetscLogStage, PetscBool *); 309 PETSC_EXTERN PetscErrorCode PetscLogStageGetId(const char[], PetscLogStage *); 310 311 /* Event functions */ 312 PETSC_EXTERN PetscErrorCode PetscLogEventRegister(const char[], PetscClassId, PetscLogEvent *); 313 PETSC_EXTERN PetscErrorCode PetscLogEventSetCollective(PetscLogEvent, PetscBool); 314 PETSC_EXTERN PetscErrorCode PetscLogEventIncludeClass(PetscClassId); 315 PETSC_EXTERN PetscErrorCode PetscLogEventExcludeClass(PetscClassId); 316 PETSC_EXTERN PetscErrorCode PetscLogEventActivate(PetscLogEvent); 317 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivate(PetscLogEvent); 318 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePush(PetscLogEvent); 319 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivatePop(PetscLogEvent); 320 PETSC_EXTERN PetscErrorCode PetscLogEventSetActiveAll(PetscLogEvent, PetscBool); 321 PETSC_EXTERN PetscErrorCode PetscLogEventActivateClass(PetscClassId); 322 PETSC_EXTERN PetscErrorCode PetscLogEventDeactivateClass(PetscClassId); 323 PETSC_EXTERN PetscErrorCode PetscLogEventGetId(const char[], PetscLogEvent *); 324 PETSC_EXTERN PetscErrorCode PetscLogEventGetPerfInfo(int, PetscLogEvent, PetscEventPerfInfo *); 325 PETSC_EXTERN PetscErrorCode PetscLogEventSetDof(PetscLogEvent, PetscInt, PetscLogDouble); 326 PETSC_EXTERN PetscErrorCode PetscLogEventSetError(PetscLogEvent, PetscInt, PetscLogDouble); 327 PETSC_EXTERN PetscErrorCode PetscLogPushCurrentEvent_Internal(PetscLogEvent); 328 PETSC_EXTERN PetscErrorCode PetscLogPopCurrentEvent_Internal(void); 329 330 PETSC_EXTERN PetscBool PetscLogMemory; 331 332 PETSC_EXTERN PetscBool PetscLogSyncOn; /* true if logging synchronization is enabled */ 333 PETSC_EXTERN PetscErrorCode PetscLogEventSynchronize(PetscLogEvent, MPI_Comm); 334 335 #define PetscLogEventSync(e, comm) \ 336 ((PetscErrorCode)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? PetscLogEventSynchronize((e), (comm)) : PETSC_SUCCESS))) 337 338 #define PetscLogEventBegin(e, o1, o2, o3, o4) \ 339 ((PetscErrorCode)((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (PetscErrorCode)(((*PetscLogPLB)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPushCurrentEvent_Internal(e)) : PETSC_SUCCESS)) 340 341 #define PetscLogEventEnd(e, o1, o2, o3, o4) \ 342 ((PetscErrorCode)((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog->curStage].perfInfo.active && petsc_stageLog->stageInfo[petsc_stageLog->curStage].eventLog->eventInfo[e].active) ? (PetscErrorCode)(((*PetscLogPLE)((e), 0, (PetscObject)(o1), (PetscObject)(o2), (PetscObject)(o3), (PetscObject)(o4))) || PetscLogPopCurrentEvent_Internal()) : PETSC_SUCCESS)) 343 344 PETSC_EXTERN PetscErrorCode PetscLogEventGetFlops(PetscLogEvent, PetscLogDouble *); 345 PETSC_EXTERN PetscErrorCode PetscLogEventZeroFlops(PetscLogEvent); 346 347 /* Global flop counter */ 348 PETSC_EXTERN PetscLogDouble petsc_TotalFlops; 349 PETSC_EXTERN PetscLogDouble petsc_irecv_ct; 350 PETSC_EXTERN PetscLogDouble petsc_isend_ct; 351 PETSC_EXTERN PetscLogDouble petsc_recv_ct; 352 PETSC_EXTERN PetscLogDouble petsc_send_ct; 353 PETSC_EXTERN PetscLogDouble petsc_irecv_len; 354 PETSC_EXTERN PetscLogDouble petsc_isend_len; 355 PETSC_EXTERN PetscLogDouble petsc_recv_len; 356 PETSC_EXTERN PetscLogDouble petsc_send_len; 357 PETSC_EXTERN PetscLogDouble petsc_allreduce_ct; 358 PETSC_EXTERN PetscLogDouble petsc_gather_ct; 359 PETSC_EXTERN PetscLogDouble petsc_scatter_ct; 360 PETSC_EXTERN PetscLogDouble petsc_wait_ct; 361 PETSC_EXTERN PetscLogDouble petsc_wait_any_ct; 362 PETSC_EXTERN PetscLogDouble petsc_wait_all_ct; 363 PETSC_EXTERN PetscLogDouble petsc_sum_of_waits_ct; 364 365 /* Thread local storage */ 366 PETSC_EXTERN_TLS PetscLogDouble petsc_TotalFlops_th; 367 PETSC_EXTERN_TLS PetscLogDouble petsc_irecv_ct_th; 368 PETSC_EXTERN_TLS PetscLogDouble petsc_isend_ct_th; 369 PETSC_EXTERN_TLS PetscLogDouble petsc_recv_ct_th; 370 PETSC_EXTERN_TLS PetscLogDouble petsc_send_ct_th; 371 PETSC_EXTERN_TLS PetscLogDouble petsc_irecv_len_th; 372 PETSC_EXTERN_TLS PetscLogDouble petsc_isend_len_th; 373 PETSC_EXTERN_TLS PetscLogDouble petsc_recv_len_th; 374 PETSC_EXTERN_TLS PetscLogDouble petsc_send_len_th; 375 PETSC_EXTERN_TLS PetscLogDouble petsc_allreduce_ct_th; 376 PETSC_EXTERN_TLS PetscLogDouble petsc_gather_ct_th; 377 PETSC_EXTERN_TLS PetscLogDouble petsc_scatter_ct_th; 378 PETSC_EXTERN_TLS PetscLogDouble petsc_wait_ct_th; 379 PETSC_EXTERN_TLS PetscLogDouble petsc_wait_any_ct_th; 380 PETSC_EXTERN_TLS PetscLogDouble petsc_wait_all_ct_th; 381 PETSC_EXTERN_TLS PetscLogDouble petsc_sum_of_waits_ct_th; 382 383 /* 384 Flop counting: We count each arithmetic operation (e.g., addition, multiplication) separately. 385 386 For the complex numbers version, note that 387 1 complex addition = 2 flops 388 1 complex multiplication = 6 flops, 389 where we define 1 flop as that for a double precision scalar. We roughly approximate 390 flop counting for complex numbers by multiplying the total flops by 4; this corresponds 391 to the assumption that we're counting mostly additions and multiplications -- and 392 roughly the same number of each. More accurate counting could be done by distinguishing 393 among the various arithmetic operations. 394 */ 395 396 #if defined(PETSC_USE_COMPLEX) 397 #define PETSC_FLOPS_PER_OP 4.0 398 #else 399 #define PETSC_FLOPS_PER_OP 1.0 400 #endif 401 402 /*@C 403 PetscLogFlops - Log how many flops are performed in a calculation 404 405 Input Parameter: 406 . flops - the number of flops 407 408 Level: intermediate 409 410 Note: 411 To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double, 412 not an integer. Use `PetscLogFlops`(4.0*n) not `PetscLogFlops`(4*n) 413 414 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogGpuFlops()` 415 @*/ 416 static inline PetscErrorCode PetscLogFlops(PetscLogDouble n) 417 { 418 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops"); 419 return PetscAddLogDouble(&petsc_TotalFlops, &petsc_TotalFlops_th, PETSC_FLOPS_PER_OP * n); 420 } 421 422 /* 423 These are used internally in the PETSc routines to keep a count of MPI messages and 424 their sizes. 425 426 This does not work for MPI-Uni because our include/petsc/mpiuni/mpi.h file 427 uses macros to defined the MPI operations. 428 429 It does not work correctly from HP-UX because it processes the 430 macros in a way that sometimes it double counts, hence 431 PETSC_HAVE_BROKEN_RECURSIVE_MACRO 432 433 It does not work with Windows because winmpich lacks MPI_Type_size() 434 */ 435 #if !defined(MPIUNI_H) && !defined(PETSC_HAVE_BROKEN_RECURSIVE_MACRO) 436 /* 437 Logging of MPI activities 438 */ 439 static inline PetscErrorCode PetscMPITypeSize(PetscInt count, MPI_Datatype type, PetscLogDouble *length, PetscLogDouble *length_th) 440 { 441 PetscMPIInt typesize; 442 443 if (type == MPI_DATATYPE_NULL) return PETSC_SUCCESS; 444 PetscCallMPI(MPI_Type_size(type, &typesize)); 445 return PetscAddLogDouble(length, length_th, (PetscLogDouble)(count * typesize)); 446 } 447 448 static inline PetscErrorCode PetscMPITypeSizeComm(MPI_Comm comm, const PetscMPIInt *counts, MPI_Datatype type, PetscLogDouble *length, PetscLogDouble *length_th) 449 { 450 PetscMPIInt typesize, size, p; 451 PetscLogDouble l; 452 453 if (type == MPI_DATATYPE_NULL) return PETSC_SUCCESS; 454 PetscCallMPI(MPI_Comm_size(comm, &size)); 455 PetscCallMPI(MPI_Type_size(type, &typesize)); 456 for (p = 0, l = 0.0; p < size; ++p) l += (PetscLogDouble)(counts[p] * typesize); 457 return PetscAddLogDouble(length, length_th, l); 458 } 459 460 /* 461 Returns 1 if the communicator is parallel else zero 462 */ 463 static inline int PetscMPIParallelComm(MPI_Comm comm) 464 { 465 PetscMPIInt size; 466 MPI_Comm_size(comm, &size); 467 return size > 1; 468 } 469 470 #define MPI_Irecv(buf, count, datatype, source, tag, comm, request) \ 471 (PetscAddLogDouble(&petsc_irecv_ct, &petsc_irecv_ct_th, 1) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len), &(petsc_irecv_len_th)) || MPI_Irecv((buf), (count), (datatype), (source), (tag), (comm), (request))) 472 473 #define MPI_Irecv_c(buf, count, datatype, source, tag, comm, request) \ 474 (PetscAddLogDouble(&petsc_irecv_ct, &petsc_irecv_ct_th, 1) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len), &(petsc_irecv_len_th)) || MPI_Irecv_c((buf), (count), (datatype), (source), (tag), (comm), (request))) 475 476 #define MPI_Isend(buf, count, datatype, dest, tag, comm, request) \ 477 (PetscAddLogDouble(&petsc_isend_ct, &petsc_isend_ct_th, 1) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len), &(petsc_isend_len_th)) || MPI_Isend((buf), (count), (datatype), (dest), (tag), (comm), (request))) 478 479 #define MPI_Isend_c(buf, count, datatype, dest, tag, comm, request) \ 480 (PetscAddLogDouble(&petsc_isend_ct, &petsc_isend_ct_th, 1) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len), &(petsc_isend_len_th)) || MPI_Isend_c((buf), (count), (datatype), (dest), (tag), (comm), (request))) 481 482 #define MPI_Startall_irecv(count, datatype, number, requests) \ 483 (PetscAddLogDouble(&petsc_irecv_ct, &petsc_irecv_ct_th, number) || PetscMPITypeSize((count), (datatype), &(petsc_irecv_len), &(petsc_irecv_len_th)) || ((number) && MPI_Startall((number), (requests)))) 484 485 #define MPI_Startall_isend(count, datatype, number, requests) \ 486 (PetscAddLogDouble(&petsc_isend_ct, &petsc_isend_ct_th, number) || PetscMPITypeSize((count), (datatype), &(petsc_isend_len), &(petsc_isend_len_th)) || ((number) && MPI_Startall((number), (requests)))) 487 488 #define MPI_Start_isend(count, datatype, requests) (PetscAddLogDouble(&petsc_isend_ct, &petsc_isend_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_isend_len), (&petsc_isend_len_th)) || MPI_Start((requests))) 489 490 #define MPI_Recv(buf, count, datatype, source, tag, comm, status) \ 491 (PetscAddLogDouble(&petsc_recv_ct, &petsc_recv_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len), (&petsc_recv_len_th)) || MPI_Recv((buf), (count), (datatype), (source), (tag), (comm), (status))) 492 493 #define MPI_Recv_c(buf, count, datatype, source, tag, comm, status) \ 494 (PetscAddLogDouble(&petsc_recv_ct, &petsc_recv_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_recv_len), &(petsc_recv_len_th)) || MPI_Recv_c((buf), (count), (datatype), (source), (tag), (comm), (status))) 495 496 #define MPI_Send(buf, count, datatype, dest, tag, comm) \ 497 (PetscAddLogDouble(&petsc_send_ct, &petsc_send_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Send((buf), (count), (datatype), (dest), (tag), (comm))) 498 499 #define MPI_Send_c(buf, count, datatype, dest, tag, comm) \ 500 (PetscAddLogDouble(&petsc_send_ct, &petsc_send_ct_th, 1) || PetscMPITypeSize((count), (datatype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Send_c((buf), (count), (datatype), (dest), (tag), (comm))) 501 502 #define MPI_Wait(request, status) (PetscAddLogDouble(&petsc_wait_ct, &petsc_wait_ct_th, 1) || PetscAddLogDouble(&petsc_sum_of_waits_ct, &petsc_sum_of_waits_ct_th, 1) || MPI_Wait((request), (status))) 503 504 #define MPI_Waitany(a, b, c, d) (PetscAddLogDouble(&petsc_wait_any_ct, &petsc_wait_any_ct_th, 1) || PetscAddLogDouble(&petsc_sum_of_waits_ct, &petsc_sum_of_waits_ct_th, 1) || MPI_Waitany((a), (b), (c), (d))) 505 506 #define MPI_Waitall(count, array_of_requests, array_of_statuses) \ 507 (PetscAddLogDouble(&petsc_wait_all_ct, &petsc_wait_all_ct_th, 1) || PetscAddLogDouble(&petsc_sum_of_waits_ct, &petsc_sum_of_waits_ct_th, count) || MPI_Waitall((count), (array_of_requests), (array_of_statuses))) 508 509 #define MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm) (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || MPI_Allreduce((sendbuf), (recvbuf), (count), (datatype), (op), (comm))) 510 511 #define MPI_Bcast(buffer, count, datatype, root, comm) (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || MPI_Bcast((buffer), (count), (datatype), (root), (comm))) 512 513 #define MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, datatype, op, comm) \ 514 (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || MPI_Reduce_scatter_block((sendbuf), (recvbuf), (recvcount), (datatype), (op), (comm))) 515 516 #define MPI_Alltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) \ 517 (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Alltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm))) 518 519 #define MPI_Alltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm) \ 520 (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Alltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm))) 521 522 #define MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm) \ 523 (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Allgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm))) 524 525 #define MPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm) \ 526 (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Allgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm))) 527 528 #define MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \ 529 (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, 1) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Gather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm))) 530 531 #define MPI_Gatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm) \ 532 (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, 1) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Gatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm))) 533 534 #define MPI_Scatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) \ 535 (PetscAddLogDouble(&petsc_scatter_ct, &petsc_scatter_ct_th, 1) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len), &(petsc_recv_len_th)) || MPI_Scatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm))) 536 537 #define MPI_Scatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm) \ 538 (PetscAddLogDouble(&petsc_scatter_ct, &petsc_scatter_ct_th, 1) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len), &(petsc_recv_len_th)) || MPI_Scatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm))) 539 540 #define MPI_Ialltoall(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \ 541 (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Ialltoall((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request))) 542 543 #define MPI_Ialltoallv(sendbuf, sendcnts, sdispls, sendtype, recvbuf, recvcnts, rdispls, recvtype, comm, request) \ 544 (PetscAddLogDouble(&petsc_allreduce_ct, &petsc_allreduce_ct_th, PetscMPIParallelComm(comm)) || PetscMPITypeSizeComm((comm), (sendcnts), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Ialltoallv((sendbuf), (sendcnts), (sdispls), (sendtype), (recvbuf), (recvcnts), (rdispls), (recvtype), (comm), (request))) 545 546 #define MPI_Iallgather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm, request) \ 547 (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Iallgather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (comm), (request))) 548 549 #define MPI_Iallgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, comm, request) \ 550 (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, PetscMPIParallelComm(comm)) || MPI_Iallgatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (comm), (request))) 551 552 #define MPI_Igather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \ 553 (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, 1) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Igather((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request))) 554 555 #define MPI_Igatherv(sendbuf, sendcount, sendtype, recvbuf, recvcount, displs, recvtype, root, comm, request) \ 556 (PetscAddLogDouble(&petsc_gather_ct, &petsc_gather_ct_th, 1) || PetscMPITypeSize((sendcount), (sendtype), (&petsc_send_len), (&petsc_send_len_th)) || MPI_Igatherv((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (displs), (recvtype), (root), (comm), (request))) 557 558 #define MPI_Iscatter(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \ 559 (PetscAddLogDouble(&petsc_scatter_ct, &petsc_scatter_ct_th, 1) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len), (&petsc_recv_len_th)) || MPI_Iscatter((sendbuf), (sendcount), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request))) 560 561 #define MPI_Iscatterv(sendbuf, sendcount, displs, sendtype, recvbuf, recvcount, recvtype, root, comm, request) \ 562 (PetscAddLogDouble(&petsc_scatter_ct, &petsc_scatter_ct_th, 1) || PetscMPITypeSize((recvcount), (recvtype), (&petsc_recv_len), (&petsc_recv_len_th)) || MPI_Iscatterv((sendbuf), (sendcount), (displs), (sendtype), (recvbuf), (recvcount), (recvtype), (root), (comm), (request))) 563 564 #else 565 566 #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests))) 567 568 #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall((number), (requests))) 569 570 #define MPI_Start_isend(count, datatype, requests) (MPI_Start((requests))) 571 572 #endif /* !MPIUNI_H && ! PETSC_HAVE_BROKEN_RECURSIVE_MACRO */ 573 574 #else /* ---Logging is turned off --------------------------------------------*/ 575 576 #define PetscLogMemory PETSC_FALSE 577 578 #define PetscLogFlops(n) ((void)(n), PETSC_SUCCESS) 579 #define PetscGetFlops(a) (*(a) = 0.0, PETSC_SUCCESS) 580 581 #define PetscLogStageRegister(a, b) PETSC_SUCCESS 582 #define PetscLogStagePush(a) PETSC_SUCCESS 583 #define PetscLogStagePop() PETSC_SUCCESS 584 #define PetscLogStageSetActive(a, b) PETSC_SUCCESS 585 #define PetscLogStageGetActive(a, b) PETSC_SUCCESS 586 #define PetscLogStageGetVisible(a, b) PETSC_SUCCESS 587 #define PetscLogStageSetVisible(a, b) PETSC_SUCCESS 588 #define PetscLogStageGetId(a, b) (*(b) = 0, PETSC_SUCCESS) 589 590 #define PetscLogEventRegister(a, b, c) PETSC_SUCCESS 591 #define PetscLogEventSetCollective(a, b) PETSC_SUCCESS 592 #define PetscLogEventIncludeClass(a) PETSC_SUCCESS 593 #define PetscLogEventExcludeClass(a) PETSC_SUCCESS 594 #define PetscLogEventActivate(a) PETSC_SUCCESS 595 #define PetscLogEventDeactivate(a) PETSC_SUCCESS 596 #define PetscLogEventDeactivatePush(a) PETSC_SUCCESS 597 #define PetscLogEventDeactivatePop(a) PETSC_SUCCESS 598 #define PetscLogEventActivateClass(a) PETSC_SUCCESS 599 #define PetscLogEventDeactivateClass(a) PETSC_SUCCESS 600 #define PetscLogEventSetActiveAll(a, b) PETSC_SUCCESS 601 #define PetscLogEventGetId(a, b) (*(b) = 0, PETSC_SUCCESS) 602 #define PetscLogEventGetPerfInfo(a, b, c) PETSC_SUCCESS 603 #define PetscLogEventSetDof(a, b, c) PETSC_SUCCESS 604 #define PetscLogEventSetError(a, b, c) PETSC_SUCCESS 605 606 #define PetscLogPLB PETSC_SUCCESS 607 #define PetscLogPLE PETSC_SUCCESS 608 #define PetscLogPHC PETSC_SUCCESS 609 #define PetscLogPHD PETSC_SUCCESS 610 611 #define PetscLogObjectParents(p, n, c) PETSC_SUCCESS 612 #define PetscLogObjectCreate(h) PETSC_SUCCESS 613 #define PetscLogObjectDestroy(h) PETSC_SUCCESS 614 PETSC_EXTERN PetscErrorCode PetscLogObjectState(PetscObject, const char[], ...) PETSC_ATTRIBUTE_FORMAT(2, 3); 615 616 #define PetscLogDefaultBegin() PETSC_SUCCESS 617 #define PetscLogAllBegin() PETSC_SUCCESS 618 #define PetscLogNestedBegin() PETSC_SUCCESS 619 #define PetscLogTraceBegin(file) PETSC_SUCCESS 620 #define PetscLogActions(a) PETSC_SUCCESS 621 #define PetscLogObjects(a) PETSC_SUCCESS 622 #define PetscLogSetThreshold(a, b) PETSC_SUCCESS 623 #define PetscLogSet(lb, le) PETSC_SUCCESS 624 #define PetscLogIsActive(flag) (*(flag) = PETSC_FALSE, PETSC_SUCCESS) 625 626 #define PetscLogView(viewer) PETSC_SUCCESS 627 #define PetscLogViewFromOptions() PETSC_SUCCESS 628 #define PetscLogDump(c) PETSC_SUCCESS 629 630 #define PetscLogEventSync(e, comm) PETSC_SUCCESS 631 #define PetscLogEventBegin(e, o1, o2, o3, o4) PETSC_SUCCESS 632 #define PetscLogEventEnd(e, o1, o2, o3, o4) PETSC_SUCCESS 633 634 /* If PETSC_USE_LOG is NOT defined, these still need to be! */ 635 #define MPI_Startall_irecv(count, datatype, number, requests) ((number) && MPI_Startall(number, requests)) 636 #define MPI_Startall_isend(count, datatype, number, requests) ((number) && MPI_Startall(number, requests)) 637 #define MPI_Start_isend(count, datatype, requests) MPI_Start(requests) 638 639 #endif /* PETSC_USE_LOG */ 640 641 #define PetscPreLoadBegin(flag, name) \ 642 do { \ 643 PetscBool PetscPreLoading = flag; \ 644 int PetscPreLoadMax, PetscPreLoadIt; \ 645 PetscLogStage _stageNum; \ 646 PetscCall(PetscOptionsGetBool(NULL, NULL, "-preload", &PetscPreLoading, NULL)); \ 647 PetscPreLoadMax = (int)(PetscPreLoading); \ 648 PetscPreLoadingUsed = PetscPreLoading ? PETSC_TRUE : PetscPreLoadingUsed; \ 649 for (PetscPreLoadIt = 0; PetscPreLoadIt <= PetscPreLoadMax; PetscPreLoadIt++) { \ 650 PetscPreLoadingOn = PetscPreLoading; \ 651 PetscCall(PetscBarrier(NULL)); \ 652 if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \ 653 else PetscCall(PetscLogStageRegister(name, &_stageNum)); \ 654 PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \ 655 PetscCall(PetscLogStagePush(_stageNum)); 656 657 #define PetscPreLoadEnd() \ 658 PetscCall(PetscLogStagePop()); \ 659 PetscPreLoading = PETSC_FALSE; \ 660 } \ 661 } \ 662 while (0) 663 664 #define PetscPreLoadStage(name) \ 665 do { \ 666 PetscCall(PetscLogStagePop()); \ 667 if (PetscPreLoadIt > 0) PetscCall(PetscLogStageGetId(name, &_stageNum)); \ 668 else PetscCall(PetscLogStageRegister(name, &_stageNum)); \ 669 PetscCall(PetscLogStageSetActive(_stageNum, (PetscBool)(!PetscPreLoadMax || PetscPreLoadIt))); \ 670 PetscCall(PetscLogStagePush(_stageNum)); \ 671 } while (0) 672 673 /* some vars for logging */ 674 PETSC_EXTERN PetscBool PetscPreLoadingUsed; /* true if we are or have done preloading */ 675 PETSC_EXTERN PetscBool PetscPreLoadingOn; /* true if we are currently in a preloading calculation */ 676 677 #if defined(PETSC_USE_LOG) && defined(PETSC_HAVE_DEVICE) 678 679 /* Global GPU counters */ 680 PETSC_EXTERN PetscLogDouble petsc_ctog_ct; 681 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct; 682 PETSC_EXTERN PetscLogDouble petsc_ctog_sz; 683 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz; 684 PETSC_EXTERN PetscLogDouble petsc_ctog_ct_scalar; 685 PETSC_EXTERN PetscLogDouble petsc_gtoc_ct_scalar; 686 PETSC_EXTERN PetscLogDouble petsc_ctog_sz_scalar; 687 PETSC_EXTERN PetscLogDouble petsc_gtoc_sz_scalar; 688 PETSC_EXTERN PetscLogDouble petsc_gflops; 689 PETSC_EXTERN PetscLogDouble petsc_gtime; 690 691 /* Thread local storage */ 692 PETSC_EXTERN_TLS PetscLogDouble petsc_ctog_ct_th; 693 PETSC_EXTERN_TLS PetscLogDouble petsc_gtoc_ct_th; 694 PETSC_EXTERN_TLS PetscLogDouble petsc_ctog_sz_th; 695 PETSC_EXTERN_TLS PetscLogDouble petsc_gtoc_sz_th; 696 PETSC_EXTERN_TLS PetscLogDouble petsc_ctog_ct_scalar_th; 697 PETSC_EXTERN_TLS PetscLogDouble petsc_gtoc_ct_scalar_th; 698 PETSC_EXTERN_TLS PetscLogDouble petsc_ctog_sz_scalar_th; 699 PETSC_EXTERN_TLS PetscLogDouble petsc_gtoc_sz_scalar_th; 700 PETSC_EXTERN_TLS PetscLogDouble petsc_gflops_th; 701 PETSC_EXTERN_TLS PetscLogDouble petsc_gtime_th; 702 703 PETSC_EXTERN PetscErrorCode PetscLogGpuTime(void); 704 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeBegin(void); 705 PETSC_EXTERN PetscErrorCode PetscLogGpuTimeEnd(void); 706 707 /*@C 708 PetscLogGpuFlops - Log how many flops are performed in a calculation on the device 709 710 Input Parameter: 711 . flops - the number of flops 712 713 Level: intermediate 714 715 Notes: 716 To limit the chance of integer overflow when multiplying by a constant, represent the constant as a double, 717 not an integer. Use `PetscLogFlops`(4.0*n) not `PetscLogFlops`(4*n) 718 719 The values are also added to the total flop count for the MPI rank that is set with `PetscLogFlops()`; hence the number of flops 720 just on the CPU would be the value from set from `PetscLogFlops()` minus the value set from `PetscLogGpuFlops()` 721 722 .seealso: [](ch_profiling), `PetscLogView()`, `PetscLogFlops()`, `PetscLogGpuTimeBegin()`, `PetscLogGpuTimeEnd()` 723 @*/ 724 static inline PetscErrorCode PetscLogGpuFlops(PetscLogDouble n) 725 { 726 PetscAssert(n >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot log negative flops"); 727 PetscCall(PetscAddLogDouble(&petsc_TotalFlops, &petsc_TotalFlops_th, PETSC_FLOPS_PER_OP * n)); 728 PetscCall(PetscAddLogDouble(&petsc_gflops, &petsc_gflops_th, PETSC_FLOPS_PER_OP * n)); 729 return PETSC_SUCCESS; 730 } 731 732 static inline PetscErrorCode PetscLogGpuTimeAdd(PetscLogDouble t) 733 { 734 return PetscAddLogDouble(&petsc_gtime, &petsc_gtime_th, t); 735 } 736 737 static inline PetscErrorCode PetscLogCpuToGpu(PetscLogDouble size) 738 { 739 return PetscAddLogDoubleCnt(&petsc_ctog_ct, &petsc_ctog_sz, &petsc_ctog_ct_th, &petsc_ctog_sz_th, size); 740 } 741 742 static inline PetscErrorCode PetscLogGpuToCpu(PetscLogDouble size) 743 { 744 return PetscAddLogDoubleCnt(&petsc_gtoc_ct, &petsc_gtoc_sz, &petsc_gtoc_ct_th, &petsc_gtoc_sz_th, size); 745 } 746 747 static inline PetscErrorCode PetscLogCpuToGpuScalar(PetscLogDouble size) 748 { 749 return PetscAddLogDoubleCnt(&petsc_ctog_ct_scalar, &petsc_ctog_sz_scalar, &petsc_ctog_ct_scalar_th, &petsc_ctog_sz_scalar_th, size); 750 } 751 752 static inline PetscErrorCode PetscLogGpuToCpuScalar(PetscLogDouble size) 753 { 754 return PetscAddLogDoubleCnt(&petsc_gtoc_ct_scalar, &petsc_gtoc_sz_scalar, &petsc_gtoc_ct_scalar_th, &petsc_gtoc_sz_scalar_th, size); 755 } 756 #else 757 758 #define PetscLogCpuToGpu(a) PETSC_SUCCESS 759 #define PetscLogGpuToCpu(a) PETSC_SUCCESS 760 #define PetscLogCpuToGpuScalar(a) PETSC_SUCCESS 761 #define PetscLogGpuToCpuScalar(a) PETSC_SUCCESS 762 #define PetscLogGpuFlops(a) PETSC_SUCCESS 763 #define PetscLogGpuTime() PETSC_SUCCESS 764 #define PetscLogGpuTimeAdd(a) PETSC_SUCCESS 765 #define PetscLogGpuTimeBegin() PETSC_SUCCESS 766 #define PetscLogGpuTimeEnd() PETSC_SUCCESS 767 768 #endif /* PETSC_USE_LOG && PETSC_HAVE_DEVICE */ 769 770 /* remove TLS defines */ 771 #undef PETSC_EXTERN_TLS 772 #undef PETSC_TLS 773 774 #endif 775