1 #include <petsc/private/logimpl.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/loghandlerimpl.h> 3 #include <petscdevice.h> 4 #include <nvToolsExt.h> 5 6 static PetscErrorCode PetscLogHandlerEventBegin_NVTX(PetscLogHandler handler, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 7 { 8 PetscLogState state; 9 PetscLogEventInfo info; 10 11 PetscFunctionBegin; 12 if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) { 13 PetscCall(PetscLogHandlerGetState(handler, &state)); 14 PetscCall(PetscLogStateEventGetInfo(state, event, &info)); 15 (void)nvtxRangePushA(info.name); 16 } 17 PetscFunctionReturn(PETSC_SUCCESS); 18 } 19 20 static PetscErrorCode PetscLogHandlerEventEnd_NVTX(PetscLogHandler handler, PetscLogEvent event, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) 21 { 22 PetscFunctionBegin; 23 if (PetscDeviceInitialized(PETSC_DEVICE_CUDA)) (void)nvtxRangePop(); 24 PetscFunctionReturn(PETSC_SUCCESS); 25 } 26 27 /*MC 28 PETSCLOGHANDLERNVTX - PETSCLOGHANDLERNVTX = "nvtx" - A 29 `PetscLogHandler` that creates an NVTX range (which appears in Nvidia Nsight 30 profiling) for each PETSc event. 31 32 Options Database Keys: 33 + -log_nvtx - start an nvtx log handler manually 34 - -log_nvtx 0 - stop the nvtx log handler from starting automatically in `PetscInitialize()` in a program run within an nsys profiling session (see Note) 35 36 Level: developer 37 38 Note: 39 If `PetscInitialize()` detects the environment variable `NSYS_PROFILING_SESSION_ID` (which is defined by `nsys 40 profile`) or `NVPROF_ID` (which is defined by `nvprof`) an instance of this log handler will automatically be 41 started. 42 43 .seealso: [](ch_profiling), `PetscLogHandler` 44 M*/ 45 46 PETSC_INTERN PetscErrorCode PetscLogHandlerCreate_NVTX(PetscLogHandler handler) 47 { 48 PetscFunctionBegin; 49 handler->ops->eventbegin = PetscLogHandlerEventBegin_NVTX; 50 handler->ops->eventend = PetscLogHandlerEventEnd_NVTX; 51 PetscCall(PetscInfo(handler, "nvtx log handler created\n")); 52 PetscFunctionReturn(PETSC_SUCCESS); 53 } 54