181b6088dSJunchao Zhang static char help[] = "Benchmarking cudaPointerGetAttributes() time\n"; 281b6088dSJunchao Zhang /* 381b6088dSJunchao Zhang Running example on Summit at OLCF: 481b6088dSJunchao Zhang # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS 581b6088dSJunchao Zhang $ jsrun -n1 -a1 -c7 -g1 -r1 ./ex2cu 681b6088dSJunchao Zhang Average cudaPointerGetAttributes() time = 0.29 microseconds 781b6088dSJunchao Zhang */ 881b6088dSJunchao Zhang #include <petscsys.h> 981b6088dSJunchao Zhang #include <petscdevice.h> 1081b6088dSJunchao Zhang 1181b6088dSJunchao Zhang int main(int argc,char **argv) 1281b6088dSJunchao Zhang { 1381b6088dSJunchao Zhang PetscInt i,n=2000; 1481b6088dSJunchao Zhang cudaError_t cerr; 1581b6088dSJunchao Zhang PetscScalar **ptrs; 1681b6088dSJunchao Zhang PetscLogDouble tstart,tend,time; 1781b6088dSJunchao Zhang struct cudaPointerAttributes attr; 1881b6088dSJunchao Zhang 19*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscInitialize(&argc,&argv,(char*)0,help)); 205f80ce2aSJacob Faibussowitsch CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL)); 2181b6088dSJunchao Zhang 225f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(n,&ptrs)); 2381b6088dSJunchao Zhang for (i=0; i<n; i++) { 245f80ce2aSJacob Faibussowitsch if (i%2) CHKERRQ(PetscMalloc1(i+16,&ptrs[i])); 255f80ce2aSJacob Faibussowitsch else CHKERRCUDA(cudaMalloc((void**)&ptrs[i],(i+16)*sizeof(PetscScalar))); 2681b6088dSJunchao Zhang } 2781b6088dSJunchao Zhang 285f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&tstart)); 2981b6088dSJunchao Zhang for (i=0; i<n; i++) { 3081b6088dSJunchao Zhang cerr = cudaPointerGetAttributes(&attr,ptrs[i]); 3181b6088dSJunchao Zhang if (cerr) cudaGetLastError(); 3281b6088dSJunchao Zhang } 335f80ce2aSJacob Faibussowitsch CHKERRQ(PetscTime(&tend)); 3481b6088dSJunchao Zhang time = (tend-tstart)*1e6/n; 3581b6088dSJunchao Zhang 365f80ce2aSJacob Faibussowitsch CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Average cudaPointerGetAttributes() time = %.2f microseconds\n",time)); 3781b6088dSJunchao Zhang 3881b6088dSJunchao Zhang for (i=0; i<n; i++) { 395f80ce2aSJacob Faibussowitsch if (i%2) CHKERRQ(PetscFree(ptrs[i])); 405f80ce2aSJacob Faibussowitsch else CHKERRCUDA(cudaFree(ptrs[i])); 4181b6088dSJunchao Zhang } 425f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(ptrs)); 4381b6088dSJunchao Zhang 44*b122ec5aSJacob Faibussowitsch CHKERRQ(PetscFinalize()); 45*b122ec5aSJacob Faibussowitsch return 0; 4681b6088dSJunchao Zhang } 4781b6088dSJunchao Zhang 4881b6088dSJunchao Zhang /*TEST 4981b6088dSJunchao Zhang build: 5081b6088dSJunchao Zhang requires: cuda 5181b6088dSJunchao Zhang 5281b6088dSJunchao Zhang test: 5381b6088dSJunchao Zhang requires: cuda 5481b6088dSJunchao Zhang args: -n 2 5581b6088dSJunchao Zhang output_file: output/empty.out 5681b6088dSJunchao Zhang filter: grep "DOES_NOT_EXIST" 5781b6088dSJunchao Zhang 5881b6088dSJunchao Zhang TEST*/ 59