xref: /petsc/src/vec/vec/tests/ex2k.c (revision 6c5693054f5123506dab0f5da2d352ed973d0e50)
1 static char help[] = "Benchmarking VecMDot() or VecMAXPY()\n";
2 /*
3   Usage:
4    mpiexec -n <np> ./ex2k -vec_type <vector type>
5      -n  <n>  # number of data points of vector sizes from 128, 256, 512 and up. Maxima and default is 23.
6      -m  <m>  # run each VecMDot() m times to get the average time, default is 100.
7      -test_name <VecMDot or VecMAXPY>  # test to run, by default it is VecMDot
8      -output_bw <bool> # output bandwidth instead of time
9 
10   Example:
11 
12   Running on Frontier at OLCF:
13   # run with 1 mpi rank (-n1), 32 CPUs (-c32)
14   $ srun -n1 -c32 --gpus-per-node=8 --gpu-bind=closest ./ex2k -vec_type kokkos
15 */
16 
17 #include <petscvec.h>
18 #include <petscdevice.h>
19 
main(int argc,char ** argv)20 int main(int argc, char **argv)
21 {
22   PetscInt           i, j, k, M, N, mcount, its = 100, nsamples, ncount, maxN;
23   PetscLogDouble     tstart, tend, times[8], fom; // figure of merit
24   Vec                x, *ys;
25   PetscScalar       *vals;
26   PetscMPIInt        size;
27   PetscDeviceContext dctx;
28   char               testName[64] = "VecMDot"; // By default, test VecMDot
29   PetscBool          testMDot, testMAXPY;
30   PetscBool          outputBW = PETSC_FALSE; // output bandwidth instead of time
31   PetscRandom        rnd;
32   PetscLogStage      stage1;
33   // Try vectors of these (local) sizes. The max is very close to 2^31
34   PetscInt Ms[] = {128,     256,      512,      1024,     2048,      4096,      8192,     16384,   //
35                    32768,   65536,    131072,   262144,   524288,    1048576,   2097152,  4194304, //
36                    8388608, 16777216, 33554432, 67108864, 134217728, 268435456, 536870912};
37   PetscInt Ns[] = {1, 3, 8, 30}; // try this number of y vectors in VecMDot
38 
39   PetscFunctionBeginUser;
40   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
41   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
42   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rnd));
43 
44   mcount = PETSC_STATIC_ARRAY_LENGTH(Ms); // length of Ms[]
45   ncount = PETSC_STATIC_ARRAY_LENGTH(Ns); // length of Ns[]
46   maxN   = Ns[ncount - 1];                // at most this many y vectors
47 
48   nsamples = mcount;
49   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &mcount, NULL)); // Up to vectors of local size 2^{mcount+6}
50   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &its, NULL));    // Run each VecMDot() its times
51   PetscCall(PetscOptionsGetString(NULL, NULL, "-test_name", testName, sizeof(testName), NULL));
52   PetscCall(PetscOptionsGetBool(NULL, NULL, "-output_bw", &outputBW, NULL));
53   PetscCall(PetscStrncmp(testName, "VecMDot", sizeof(testName), &testMDot));
54   PetscCall(PetscStrncmp(testName, "VecMAXPY", sizeof(testName), &testMAXPY));
55   PetscCheck(testMDot || testMAXPY, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Unsupported test name: %s", testName);
56   PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
57   PetscCall(PetscMalloc1(maxN, &vals));
58   for (j = 0; j < maxN; j++) vals[j] = 3.14 + j; // same across all processes
59 
60   PetscCall(PetscLogStageRegister("Profiling", &stage1));
61   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Vector(N)   "));
62   for (j = 0; j < ncount; j++) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "   %s-%" PetscInt_FMT " ", testName, Ns[j]));
63   PetscCall(PetscPrintf(PETSC_COMM_WORLD, outputBW ? " (GB/s)\n" : " (us)\n"));
64   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "--------------------------------------------------------------------------\n"));
65 
66   nsamples = PetscMin(nsamples, mcount);
67   for (k = 0; k < nsamples; k++) { // for vector (local) size M
68     M = Ms[k];
69     PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
70     PetscCall(VecSetFromOptions(x));
71     PetscCall(VecSetSizes(x, M, PETSC_DECIDE));
72     PetscCall(VecSetUp(x));
73     PetscCall(VecDuplicateVecs(x, maxN, &ys));
74     PetscCall(VecSetRandom(x, rnd));
75     for (i = 0; i < maxN; i++) PetscCall(VecSetRandom(ys[i], rnd));
76 
77     for (j = 0; j < ncount; j++) { // try N y vectors
78       // Warm-up
79       N = Ns[j];
80       for (i = 0; i < 2; i++) {
81         if (testMDot) PetscCall(VecMDot(x, N, ys, vals));
82         else if (testMAXPY) PetscCall(VecMAXPY(x, N, vals, ys));
83       }
84       PetscCall(PetscDeviceContextSynchronize(dctx));
85       PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
86 
87       PetscCall(PetscLogStagePush(stage1)); // use LogStage so that -log_view result will be clearer
88       PetscCall(PetscTime(&tstart));
89       for (i = 0; i < its; i++) {
90         if (testMDot) PetscCall(VecMDot(x, N, ys, vals));
91         else if (testMAXPY) PetscCall(VecMAXPY(x, N, vals, ys));
92       }
93       PetscCall(PetscDeviceContextSynchronize(dctx));
94       PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
95       PetscCall(PetscTime(&tend));
96       times[j] = (tend - tstart) * 1e6 / its;
97       PetscCall(PetscLogStagePop());
98     }
99 
100     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%12" PetscInt_FMT, M));
101     for (j = 0; j < ncount; j++) {
102       N = Ns[j];
103       if (outputBW) {
104         // Read N y vectors and x vector of size M, and then write vals[] of size N
105         PetscLogDouble bytes = (M * (N + 1.0) + N) * sizeof(PetscScalar);
106         fom                  = (bytes / times[j]) * 1e-3;
107       } else {
108         fom = times[j];
109       }
110       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%12.1f ", fom));
111     }
112     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
113 
114     PetscCall(VecDestroy(&x));
115     PetscCall(VecDestroyVecs(maxN, &ys));
116   }
117 
118   PetscCall(PetscRandomDestroy(&rnd));
119   PetscCall(PetscFree(vals));
120   PetscCall(PetscFinalize());
121   return 0;
122 }
123 
124 /*TEST
125   testset:
126     args: -n 2 -m 2 -test_name {{VecMDot VecMAXPY}}
127     output_file: output/empty.out
128     filter: grep "DOES_NOT_EXIST"
129 
130     test:
131       suffix: standard
132 
133     test:
134       requires: kokkos_kernels
135       suffix: kok
136       args: -vec_type kokkos
137 
138 TEST*/
139