xref: /petsc/src/sys/objects/device/tests/ex2cu.cu (revision b122ec5aa1bd4469eb4e0673542fb7de3f411254)
181b6088dSJunchao Zhang static char help[] = "Benchmarking cudaPointerGetAttributes() time\n";
281b6088dSJunchao Zhang /*
381b6088dSJunchao Zhang   Running example on Summit at OLCF:
481b6088dSJunchao Zhang   # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS
581b6088dSJunchao Zhang   $ jsrun -n1 -a1 -c7 -g1 -r1  ./ex2cu
681b6088dSJunchao Zhang     Average cudaPointerGetAttributes() time = 0.29 microseconds
781b6088dSJunchao Zhang */
881b6088dSJunchao Zhang #include <petscsys.h>
981b6088dSJunchao Zhang #include <petscdevice.h>
1081b6088dSJunchao Zhang 
1181b6088dSJunchao Zhang int main(int argc,char **argv)
1281b6088dSJunchao Zhang {
1381b6088dSJunchao Zhang   PetscInt                     i,n=2000;
1481b6088dSJunchao Zhang   cudaError_t                  cerr;
1581b6088dSJunchao Zhang   PetscScalar                  **ptrs;
1681b6088dSJunchao Zhang   PetscLogDouble               tstart,tend,time;
1781b6088dSJunchao Zhang   struct cudaPointerAttributes attr;
1881b6088dSJunchao Zhang 
19*b122ec5aSJacob Faibussowitsch   CHKERRQ(PetscInitialize(&argc,&argv,(char*)0,help));
205f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL));
2181b6088dSJunchao Zhang 
225f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(n,&ptrs));
2381b6088dSJunchao Zhang   for (i=0; i<n; i++) {
245f80ce2aSJacob Faibussowitsch     if (i%2) CHKERRQ(PetscMalloc1(i+16,&ptrs[i]));
255f80ce2aSJacob Faibussowitsch     else CHKERRCUDA(cudaMalloc((void**)&ptrs[i],(i+16)*sizeof(PetscScalar)));
2681b6088dSJunchao Zhang   }
2781b6088dSJunchao Zhang 
285f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscTime(&tstart));
2981b6088dSJunchao Zhang   for (i=0; i<n; i++) {
3081b6088dSJunchao Zhang     cerr = cudaPointerGetAttributes(&attr,ptrs[i]);
3181b6088dSJunchao Zhang     if (cerr) cudaGetLastError();
3281b6088dSJunchao Zhang   }
335f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscTime(&tend));
3481b6088dSJunchao Zhang   time = (tend-tstart)*1e6/n;
3581b6088dSJunchao Zhang 
365f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Average cudaPointerGetAttributes() time = %.2f microseconds\n",time));
3781b6088dSJunchao Zhang 
3881b6088dSJunchao Zhang   for (i=0; i<n; i++) {
395f80ce2aSJacob Faibussowitsch     if (i%2) CHKERRQ(PetscFree(ptrs[i]));
405f80ce2aSJacob Faibussowitsch     else CHKERRCUDA(cudaFree(ptrs[i]));
4181b6088dSJunchao Zhang   }
425f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(ptrs));
4381b6088dSJunchao Zhang 
44*b122ec5aSJacob Faibussowitsch   CHKERRQ(PetscFinalize());
45*b122ec5aSJacob Faibussowitsch   return 0;
4681b6088dSJunchao Zhang }
4781b6088dSJunchao Zhang 
4881b6088dSJunchao Zhang /*TEST
4981b6088dSJunchao Zhang   build:
5081b6088dSJunchao Zhang     requires: cuda
5181b6088dSJunchao Zhang 
5281b6088dSJunchao Zhang   test:
5381b6088dSJunchao Zhang     requires: cuda
5481b6088dSJunchao Zhang     args: -n 2
5581b6088dSJunchao Zhang     output_file: output/empty.out
5681b6088dSJunchao Zhang     filter: grep "DOES_NOT_EXIST"
5781b6088dSJunchao Zhang 
5881b6088dSJunchao Zhang TEST*/
59