xref: /petsc/src/sys/objects/device/tests/ex2cu.cu (revision 3c859ba3a04a72e8efcb87bc7ffc046a6cbab413)
1 static char help[] = "Benchmarking cudaPointerGetAttributes() time\n";
2 /*
3   Running example on Summit at OLCF:
4   # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS
5   $ jsrun -n1 -a1 -c7 -g1 -r1  ./ex2cu
6     Average cudaPointerGetAttributes() time = 0.29 microseconds
7 */
8 #include <petscsys.h>
9 #include <petscdevice.h>
10 
11 int main(int argc,char **argv)
12 {
13   PetscErrorCode               ierr;
14   PetscInt                     i,n=2000;
15   cudaError_t                  cerr;
16   PetscScalar                  **ptrs;
17   PetscLogDouble               tstart,tend,time;
18   struct cudaPointerAttributes attr;
19 
20   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
21   ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
22 
23   ierr = PetscMalloc1(n,&ptrs);CHKERRQ(ierr);
24   for (i=0; i<n; i++) {
25     if (i%2) {ierr = PetscMalloc1(i+16,&ptrs[i]);CHKERRQ(ierr);}
26     else {cerr = cudaMalloc((void**)&ptrs[i],(i+16)*sizeof(PetscScalar));CHKERRCUDA(cerr);}
27   }
28 
29   ierr = PetscTime(&tstart);CHKERRQ(ierr);
30   for (i=0; i<n; i++) {
31     cerr = cudaPointerGetAttributes(&attr,ptrs[i]);
32     if (cerr) cudaGetLastError();
33   }
34   ierr = PetscTime(&tend);CHKERRQ(ierr);
35   time = (tend-tstart)*1e6/n;
36 
37   ierr = PetscPrintf(PETSC_COMM_WORLD,"Average cudaPointerGetAttributes() time = %.2f microseconds\n",time);CHKERRQ(ierr);
38 
39   for (i=0; i<n; i++) {
40     if (i%2) {ierr = PetscFree(ptrs[i]);CHKERRQ(ierr);}
41     else {cerr = cudaFree(ptrs[i]);CHKERRCUDA(cerr);}
42   }
43   ierr = PetscFree(ptrs);CHKERRQ(ierr);
44 
45   ierr = PetscFinalize();
46   return ierr;
47 }
48 
49 /*TEST
50   build:
51     requires: cuda
52 
53   test:
54     requires: cuda
55     args: -n 2
56     output_file: output/empty.out
57     filter: grep "DOES_NOT_EXIST"
58 
59 TEST*/
60