181b6088dSJunchao Zhang static char help[] = "Benchmarking cudaPointerGetAttributes() time\n";
281b6088dSJunchao Zhang /*
381b6088dSJunchao Zhang Running example on Summit at OLCF:
481b6088dSJunchao Zhang # run with total 1 resource set (RS) (-n1), 1 RS per node (-r1), 1 MPI rank (-a1), 7 cores (-c7) and 1 GPU (-g1) per RS
581b6088dSJunchao Zhang $ jsrun -n1 -a1 -c7 -g1 -r1 ./ex2cu
69622a0a0SJunchao Zhang Average cudaPointerGetAttributes() time = 0.31 microseconds
781b6088dSJunchao Zhang */
881b6088dSJunchao Zhang #include <petscsys.h>
90e6b6b59SJacob Faibussowitsch #include <petscdevice_cuda.h>
1081b6088dSJunchao Zhang
main(int argc,char ** argv)11d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv)
12d71ae5a4SJacob Faibussowitsch {
139622a0a0SJunchao Zhang PetscInt i, n = 4000;
1481b6088dSJunchao Zhang cudaError_t cerr;
1581b6088dSJunchao Zhang PetscScalar **ptrs;
1681b6088dSJunchao Zhang PetscLogDouble tstart, tend, time;
1781b6088dSJunchao Zhang struct cudaPointerAttributes attr;
1881b6088dSJunchao Zhang
19327415f7SBarry Smith PetscFunctionBeginUser;
20*c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &argv, NULL, help));
219566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
229622a0a0SJunchao Zhang PetscCallCUDA(cudaStreamSynchronize(NULL)); /* Initialize CUDA runtime to get more accurate timing below */
2381b6088dSJunchao Zhang
249566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n, &ptrs));
2581b6088dSJunchao Zhang for (i = 0; i < n; i++) {
269566063dSJacob Faibussowitsch if (i % 2) PetscCall(PetscMalloc1(i + 16, &ptrs[i]));
279566063dSJacob Faibussowitsch else PetscCallCUDA(cudaMalloc((void **)&ptrs[i], (i + 16) * sizeof(PetscScalar)));
2881b6088dSJunchao Zhang }
2981b6088dSJunchao Zhang
309566063dSJacob Faibussowitsch PetscCall(PetscTime(&tstart));
3181b6088dSJunchao Zhang for (i = 0; i < n; i++) {
3281b6088dSJunchao Zhang cerr = cudaPointerGetAttributes(&attr, ptrs[i]);
339622a0a0SJunchao Zhang if (cerr) cerr = cudaGetLastError();
3481b6088dSJunchao Zhang }
359566063dSJacob Faibussowitsch PetscCall(PetscTime(&tend));
3681b6088dSJunchao Zhang time = (tend - tstart) * 1e6 / n;
3781b6088dSJunchao Zhang
389566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Average cudaPointerGetAttributes() time = %.2f microseconds\n", time));
3981b6088dSJunchao Zhang
4081b6088dSJunchao Zhang for (i = 0; i < n; i++) {
419566063dSJacob Faibussowitsch if (i % 2) PetscCall(PetscFree(ptrs[i]));
429566063dSJacob Faibussowitsch else PetscCallCUDA(cudaFree(ptrs[i]));
4381b6088dSJunchao Zhang }
449566063dSJacob Faibussowitsch PetscCall(PetscFree(ptrs));
4581b6088dSJunchao Zhang
469566063dSJacob Faibussowitsch PetscCall(PetscFinalize());
47b122ec5aSJacob Faibussowitsch return 0;
4881b6088dSJunchao Zhang }
4981b6088dSJunchao Zhang
5081b6088dSJunchao Zhang /*TEST
5181b6088dSJunchao Zhang build:
5281b6088dSJunchao Zhang requires: cuda
5381b6088dSJunchao Zhang
5481b6088dSJunchao Zhang test:
5581b6088dSJunchao Zhang requires: cuda
5681b6088dSJunchao Zhang args: -n 2
5781b6088dSJunchao Zhang output_file: output/empty.out
5881b6088dSJunchao Zhang filter: grep "DOES_NOT_EXIST"
5981b6088dSJunchao Zhang
6081b6088dSJunchao Zhang TEST*/
61