19f0612e4SBarry Smith /*
29f0612e4SBarry Smith An adaption of the Stream benchmark for MPI
39f0612e4SBarry Smith Original code developed by John D. McCalpin
49f0612e4SBarry Smith */
55e71baefSBarry Smith #include <petscsys.h>
6d3ae85c4SBarry Smith
79f0612e4SBarry Smith #define NTIMESINNER 1
8*c6bff371SJunchao Zhang #define N 80000000 // 3*sizeof(double)*N > aggregated last level cache size on a compute node
9d3ae85c4SBarry Smith #define NTIMES 50
10d3ae85c4SBarry Smith #define OFFSET 0
11d3ae85c4SBarry Smith
1251096aa5SJacob Faibussowitsch static double a[N + OFFSET], b[N + OFFSET], c[N + OFFSET];
13*c6bff371SJunchao Zhang static double mintime = 1e9;
149f0612e4SBarry Smith static double bytes = 3 * sizeof(double) * N;
15d3ae85c4SBarry Smith
main(int argc,char ** args)16d3ae85c4SBarry Smith int main(int argc, char **args)
17d3ae85c4SBarry Smith {
1851096aa5SJacob Faibussowitsch const double scalar = 3.0;
199f0612e4SBarry Smith double times[NTIMES], rate;
20*c6bff371SJunchao Zhang PetscMPIInt rank, size;
219f0612e4SBarry Smith PetscInt n = PETSC_DECIDE, NN;
22d3ae85c4SBarry Smith
23b8abcfdeSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &args, NULL, NULL));
2451096aa5SJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
2551096aa5SJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
26d3ae85c4SBarry Smith
279f0612e4SBarry Smith NN = N;
289f0612e4SBarry Smith PetscCall(PetscSplitOwnership(MPI_COMM_WORLD, &n, &NN));
29*c6bff371SJunchao Zhang for (PetscInt j = 0; j < n; ++j) {
30d3ae85c4SBarry Smith a[j] = 1.0;
31d3ae85c4SBarry Smith b[j] = 2.0;
329f0612e4SBarry Smith c[j] = 3.0;
33d3ae85c4SBarry Smith }
34d3ae85c4SBarry Smith
35d3ae85c4SBarry Smith /* --- MAIN LOOP --- repeat test cases NTIMES times --- */
36*c6bff371SJunchao Zhang for (PetscInt k = 0; k < NTIMES; ++k) {
3751096aa5SJacob Faibussowitsch PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD));
389f0612e4SBarry Smith // Do not include barrier in the timed region
399f0612e4SBarry Smith times[k] = MPI_Wtime();
40*c6bff371SJunchao Zhang for (PetscInt l = 0; l < NTIMESINNER; l++) {
41*c6bff371SJunchao Zhang for (PetscInt j = 0; j < n; j++) a[j] = b[j] + scalar * c[j];
42*c6bff371SJunchao Zhang if (size == 2000) PetscCall(PetscPrintf(PETSC_COMM_SELF, "never printed %g\n", a[11])); // to prevent the compiler from optimizing the loop out
43d3ae85c4SBarry Smith }
449f0612e4SBarry Smith // PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD));
459f0612e4SBarry Smith times[k] = MPI_Wtime() - times[k];
4651096aa5SJacob Faibussowitsch }
479f0612e4SBarry Smith // use maximum time over all MPI processes
48458b0db5SMartin Diehl PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, times, NTIMES, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD));
49*c6bff371SJunchao Zhang for (PetscInt k = 1; k < NTIMES; ++k) { /* note -- skip first iteration */
509f0612e4SBarry Smith mintime = PetscMin(mintime, times[k]);
519f0612e4SBarry Smith }
529f0612e4SBarry Smith rate = 1.0E-06 * bytes * NTIMESINNER / mintime;
53d3ae85c4SBarry Smith
54c5853193SPierre Jolivet if (rank == 0) {
5551096aa5SJacob Faibussowitsch FILE *fd;
5651096aa5SJacob Faibussowitsch
57ed38be93SPierre Jolivet if (size != 1) {
584198fb66SBarry Smith double prate;
5951096aa5SJacob Faibussowitsch
60*c6bff371SJunchao Zhang PetscCall(PetscFOpen(PETSC_COMM_SELF, "flops", "r", &fd));
61*c6bff371SJunchao Zhang PetscCheck(fscanf(fd, "%lg", &prate) == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_READ, "Unable to read file");
62*c6bff371SJunchao Zhang PetscCall(PetscFClose(PETSC_COMM_SELF, fd));
63*c6bff371SJunchao Zhang PetscCall(PetscPrintf(PETSC_COMM_SELF, "%3d %11.1f Rate (MB/s) %6.1f\n", size, rate, rate / prate));
6451096aa5SJacob Faibussowitsch } else {
65*c6bff371SJunchao Zhang PetscCall(PetscFOpen(PETSC_COMM_SELF, "flops", "w", &fd));
66*c6bff371SJunchao Zhang PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g\n", rate));
67*c6bff371SJunchao Zhang PetscCall(PetscFClose(PETSC_COMM_SELF, fd));
68*c6bff371SJunchao Zhang PetscCall(PetscPrintf(PETSC_COMM_SELF, "%3d %11.1f Rate (MB/s) %6.1f\n", size, rate, 1.0));
694198fb66SBarry Smith }
70d3ae85c4SBarry Smith }
7151096aa5SJacob Faibussowitsch PetscCall(PetscFinalize());
72d3ae85c4SBarry Smith return 0;
73d3ae85c4SBarry Smith }
74