xref: /petsc/src/sys/objects/device/tests/ex2cu.cu (revision 732aec7a18f2199fb53bb9a2f3aef439a834ce31)
181b6088dSJunchao Zhang static char help[] = "Benchmarking cudaPointerGetAttributes() time\n";
281b6088dSJunchao Zhang /*
381b6088dSJunchao Zhang   Running example on Summit at OLCF:
481b6088dSJunchao Zhang   # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS
581b6088dSJunchao Zhang   $ jsrun -n1 -a1 -c7 -g1 -r1  ./ex2cu
69622a0a0SJunchao Zhang     Average cudaPointerGetAttributes() time = 0.31 microseconds
781b6088dSJunchao Zhang */
881b6088dSJunchao Zhang #include <petscsys.h>
90e6b6b59SJacob Faibussowitsch #include <petscdevice_cuda.h>
1081b6088dSJunchao Zhang 
main(int argc,char ** argv)11d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv)
12d71ae5a4SJacob Faibussowitsch {
139622a0a0SJunchao Zhang   PetscInt                     i, n = 4000;
1481b6088dSJunchao Zhang   cudaError_t                  cerr;
1581b6088dSJunchao Zhang   PetscScalar                **ptrs;
1681b6088dSJunchao Zhang   PetscLogDouble               tstart, tend, time;
1781b6088dSJunchao Zhang   struct cudaPointerAttributes attr;
1881b6088dSJunchao Zhang 
19327415f7SBarry Smith   PetscFunctionBeginUser;
20*c8025a54SPierre Jolivet   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
219566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
229622a0a0SJunchao Zhang   PetscCallCUDA(cudaStreamSynchronize(NULL)); /* Initialize CUDA runtime to get more accurate timing below */
2381b6088dSJunchao Zhang 
249566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &ptrs));
2581b6088dSJunchao Zhang   for (i = 0; i < n; i++) {
269566063dSJacob Faibussowitsch     if (i % 2) PetscCall(PetscMalloc1(i + 16, &ptrs[i]));
279566063dSJacob Faibussowitsch     else PetscCallCUDA(cudaMalloc((void **)&ptrs[i], (i + 16) * sizeof(PetscScalar)));
2881b6088dSJunchao Zhang   }
2981b6088dSJunchao Zhang 
309566063dSJacob Faibussowitsch   PetscCall(PetscTime(&tstart));
3181b6088dSJunchao Zhang   for (i = 0; i < n; i++) {
3281b6088dSJunchao Zhang     cerr = cudaPointerGetAttributes(&attr, ptrs[i]);
339622a0a0SJunchao Zhang     if (cerr) cerr = cudaGetLastError();
3481b6088dSJunchao Zhang   }
359566063dSJacob Faibussowitsch   PetscCall(PetscTime(&tend));
3681b6088dSJunchao Zhang   time = (tend - tstart) * 1e6 / n;
3781b6088dSJunchao Zhang 
389566063dSJacob Faibussowitsch   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Average cudaPointerGetAttributes() time = %.2f microseconds\n", time));
3981b6088dSJunchao Zhang 
4081b6088dSJunchao Zhang   for (i = 0; i < n; i++) {
419566063dSJacob Faibussowitsch     if (i % 2) PetscCall(PetscFree(ptrs[i]));
429566063dSJacob Faibussowitsch     else PetscCallCUDA(cudaFree(ptrs[i]));
4381b6088dSJunchao Zhang   }
449566063dSJacob Faibussowitsch   PetscCall(PetscFree(ptrs));
4581b6088dSJunchao Zhang 
469566063dSJacob Faibussowitsch   PetscCall(PetscFinalize());
47b122ec5aSJacob Faibussowitsch   return 0;
4881b6088dSJunchao Zhang }
4981b6088dSJunchao Zhang 
5081b6088dSJunchao Zhang /*TEST
5181b6088dSJunchao Zhang   build:
5281b6088dSJunchao Zhang     requires: cuda
5381b6088dSJunchao Zhang 
5481b6088dSJunchao Zhang   test:
5581b6088dSJunchao Zhang     requires: cuda
5681b6088dSJunchao Zhang     args: -n 2
5781b6088dSJunchao Zhang     output_file: output/empty.out
5881b6088dSJunchao Zhang     filter: grep "DOES_NOT_EXIST"
5981b6088dSJunchao Zhang 
6081b6088dSJunchao Zhang TEST*/
61