1 static char help[] = "Benchmarking cudaPointerGetAttributes() time\n";
2 /*
3 Running example on Summit at OLCF:
4 # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS
5 $ jsrun -n1 -a1 -c7 -g1 -r1 ./ex2cu
6 Average cudaPointerGetAttributes() time = 0.31 microseconds
7 */
8 #include <petscsys.h>
9 #include <petscdevice_cuda.h>
10
main(int argc,char ** argv)11 int main(int argc, char **argv)
12 {
13 PetscInt i, n = 4000;
14 cudaError_t cerr;
15 PetscScalar **ptrs;
16 PetscLogDouble tstart, tend, time;
17 struct cudaPointerAttributes attr;
18
19 PetscFunctionBeginUser;
20 PetscCall(PetscInitialize(&argc, &argv, NULL, help));
21 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
22 PetscCallCUDA(cudaStreamSynchronize(NULL)); /* Initialize CUDA runtime to get more accurate timing below */
23
24 PetscCall(PetscMalloc1(n, &ptrs));
25 for (i = 0; i < n; i++) {
26 if (i % 2) PetscCall(PetscMalloc1(i + 16, &ptrs[i]));
27 else PetscCallCUDA(cudaMalloc((void **)&ptrs[i], (i + 16) * sizeof(PetscScalar)));
28 }
29
30 PetscCall(PetscTime(&tstart));
31 for (i = 0; i < n; i++) {
32 cerr = cudaPointerGetAttributes(&attr, ptrs[i]);
33 if (cerr) cerr = cudaGetLastError();
34 }
35 PetscCall(PetscTime(&tend));
36 time = (tend - tstart) * 1e6 / n;
37
38 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Average cudaPointerGetAttributes() time = %.2f microseconds\n", time));
39
40 for (i = 0; i < n; i++) {
41 if (i % 2) PetscCall(PetscFree(ptrs[i]));
42 else PetscCallCUDA(cudaFree(ptrs[i]));
43 }
44 PetscCall(PetscFree(ptrs));
45
46 PetscCall(PetscFinalize());
47 return 0;
48 }
49
50 /*TEST
51 build:
52 requires: cuda
53
54 test:
55 requires: cuda
56 args: -n 2
57 output_file: output/empty.out
58 filter: grep "DOES_NOT_EXIST"
59
60 TEST*/
61