1 static char help[] = "Benchmarking cudaPointerGetAttributes() time\n"; 2 /* 3 Running example on Summit at OLCF: 4 # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS 5 $ jsrun -n1 -a1 -c7 -g1 -r1 ./ex2cu 6 Average cudaPointerGetAttributes() time = 0.31 microseconds 7 */ 8 #include <petscsys.h> 9 #include <petscdevice_cuda.h> 10 11 int main(int argc, char **argv) 12 { 13 PetscInt i, n = 4000; 14 cudaError_t cerr; 15 PetscScalar **ptrs; 16 PetscLogDouble tstart, tend, time; 17 struct cudaPointerAttributes attr; 18 19 PetscFunctionBeginUser; 20 PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 21 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); 22 PetscCallCUDA(cudaStreamSynchronize(NULL)); /* Initialize CUDA runtime to get more accurate timing below */ 23 24 PetscCall(PetscMalloc1(n, &ptrs)); 25 for (i = 0; i < n; i++) { 26 if (i % 2) PetscCall(PetscMalloc1(i + 16, &ptrs[i])); 27 else PetscCallCUDA(cudaMalloc((void **)&ptrs[i], (i + 16) * sizeof(PetscScalar))); 28 } 29 30 PetscCall(PetscTime(&tstart)); 31 for (i = 0; i < n; i++) { 32 cerr = cudaPointerGetAttributes(&attr, ptrs[i]); 33 if (cerr) cerr = cudaGetLastError(); 34 } 35 PetscCall(PetscTime(&tend)); 36 time = (tend - tstart) * 1e6 / n; 37 38 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Average cudaPointerGetAttributes() time = %.2f microseconds\n", time)); 39 40 for (i = 0; i < n; i++) { 41 if (i % 2) PetscCall(PetscFree(ptrs[i])); 42 else PetscCallCUDA(cudaFree(ptrs[i])); 43 } 44 PetscCall(PetscFree(ptrs)); 45 46 PetscCall(PetscFinalize()); 47 return 0; 48 } 49 50 /*TEST 51 build: 52 requires: cuda 53 54 test: 55 requires: cuda 56 args: -n 2 57 output_file: output/empty.out 58 filter: grep "DOES_NOT_EXIST" 59 60 TEST*/ 61