xref: /petsc/src/benchmarks/streams/MPIVersion.c (revision abdd934af67c1cc360c4d0cfcf2de16e188d627e)
19f0612e4SBarry Smith /*
29f0612e4SBarry Smith   An adaption of the Stream benchmark for MPI
39f0612e4SBarry Smith   Original code developed by John D. McCalpin
49f0612e4SBarry Smith */
55e71baefSBarry Smith #include <petscsys.h>
6d3ae85c4SBarry Smith 
79f0612e4SBarry Smith #define NTIMESINNER 1
8*c6bff371SJunchao Zhang #define N           80000000 // 3*sizeof(double)*N > aggregated last level cache size on a compute node
9d3ae85c4SBarry Smith #define NTIMES      50
10d3ae85c4SBarry Smith #define OFFSET      0
11d3ae85c4SBarry Smith 
1251096aa5SJacob Faibussowitsch static double a[N + OFFSET], b[N + OFFSET], c[N + OFFSET];
13*c6bff371SJunchao Zhang static double mintime = 1e9;
149f0612e4SBarry Smith static double bytes   = 3 * sizeof(double) * N;
15d3ae85c4SBarry Smith 
main(int argc,char ** args)16d3ae85c4SBarry Smith int main(int argc, char **args)
17d3ae85c4SBarry Smith {
1851096aa5SJacob Faibussowitsch   const double scalar = 3.0;
199f0612e4SBarry Smith   double       times[NTIMES], rate;
20*c6bff371SJunchao Zhang   PetscMPIInt  rank, size;
219f0612e4SBarry Smith   PetscInt     n = PETSC_DECIDE, NN;
22d3ae85c4SBarry Smith 
23b8abcfdeSJacob Faibussowitsch   PetscCall(PetscInitialize(&argc, &args, NULL, NULL));
2451096aa5SJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
2551096aa5SJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
26d3ae85c4SBarry Smith 
279f0612e4SBarry Smith   NN = N;
289f0612e4SBarry Smith   PetscCall(PetscSplitOwnership(MPI_COMM_WORLD, &n, &NN));
29*c6bff371SJunchao Zhang   for (PetscInt j = 0; j < n; ++j) {
30d3ae85c4SBarry Smith     a[j] = 1.0;
31d3ae85c4SBarry Smith     b[j] = 2.0;
329f0612e4SBarry Smith     c[j] = 3.0;
33d3ae85c4SBarry Smith   }
34d3ae85c4SBarry Smith 
35d3ae85c4SBarry Smith   /*   --- MAIN LOOP --- repeat test cases NTIMES times --- */
36*c6bff371SJunchao Zhang   for (PetscInt k = 0; k < NTIMES; ++k) {
3751096aa5SJacob Faibussowitsch     PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD));
389f0612e4SBarry Smith     // Do not include barrier in the timed region
399f0612e4SBarry Smith     times[k] = MPI_Wtime();
40*c6bff371SJunchao Zhang     for (PetscInt l = 0; l < NTIMESINNER; l++) {
41*c6bff371SJunchao Zhang       for (PetscInt j = 0; j < n; j++) a[j] = b[j] + scalar * c[j];
42*c6bff371SJunchao Zhang       if (size == 2000) PetscCall(PetscPrintf(PETSC_COMM_SELF, "never printed %g\n", a[11])); // to prevent the compiler from optimizing the loop out
43d3ae85c4SBarry Smith     }
449f0612e4SBarry Smith     //   PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD));
459f0612e4SBarry Smith     times[k] = MPI_Wtime() - times[k];
4651096aa5SJacob Faibussowitsch   }
479f0612e4SBarry Smith   // use maximum time over all MPI processes
48458b0db5SMartin Diehl   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, times, NTIMES, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD));
49*c6bff371SJunchao Zhang   for (PetscInt k = 1; k < NTIMES; ++k) { /* note -- skip first iteration */
509f0612e4SBarry Smith     mintime = PetscMin(mintime, times[k]);
519f0612e4SBarry Smith   }
529f0612e4SBarry Smith   rate = 1.0E-06 * bytes * NTIMESINNER / mintime;
53d3ae85c4SBarry Smith 
54c5853193SPierre Jolivet   if (rank == 0) {
5551096aa5SJacob Faibussowitsch     FILE *fd;
5651096aa5SJacob Faibussowitsch 
57ed38be93SPierre Jolivet     if (size != 1) {
584198fb66SBarry Smith       double prate;
5951096aa5SJacob Faibussowitsch 
60*c6bff371SJunchao Zhang       PetscCall(PetscFOpen(PETSC_COMM_SELF, "flops", "r", &fd));
61*c6bff371SJunchao Zhang       PetscCheck(fscanf(fd, "%lg", &prate) == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_READ, "Unable to read file");
62*c6bff371SJunchao Zhang       PetscCall(PetscFClose(PETSC_COMM_SELF, fd));
63*c6bff371SJunchao Zhang       PetscCall(PetscPrintf(PETSC_COMM_SELF, "%3d %11.1f   Rate (MB/s) %6.1f\n", size, rate, rate / prate));
6451096aa5SJacob Faibussowitsch     } else {
65*c6bff371SJunchao Zhang       PetscCall(PetscFOpen(PETSC_COMM_SELF, "flops", "w", &fd));
66*c6bff371SJunchao Zhang       PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g\n", rate));
67*c6bff371SJunchao Zhang       PetscCall(PetscFClose(PETSC_COMM_SELF, fd));
68*c6bff371SJunchao Zhang       PetscCall(PetscPrintf(PETSC_COMM_SELF, "%3d %11.1f   Rate (MB/s) %6.1f\n", size, rate, 1.0));
694198fb66SBarry Smith     }
70d3ae85c4SBarry Smith   }
7151096aa5SJacob Faibussowitsch   PetscCall(PetscFinalize());
72d3ae85c4SBarry Smith   return 0;
73d3ae85c4SBarry Smith }
74