xref: /petsc/src/benchmarks/streams/MPIVersion.c (revision ccfb0f9f40a0131988d7995ed9679700dae2a75a)
1 /*
2   An adaption of the Stream benchmark for MPI
3   Original code developed by John D. McCalpin
4 */
5 #include <petscsys.h>
6 
7 #define NTIMESINNER 1
8 #define N           80000000 // 3*sizeof(double)*N > aggregated last level cache size on a compute node
9 #define NTIMES      50
10 #define OFFSET      0
11 
12 static double a[N + OFFSET], b[N + OFFSET], c[N + OFFSET];
13 static double mintime = 1e9;
14 static double bytes   = 3 * sizeof(double) * N;
15 
16 int main(int argc, char **args)
17 {
18   const double scalar = 3.0;
19   double       times[NTIMES], rate;
20   PetscMPIInt  rank, size;
21   PetscInt     n = PETSC_DECIDE, NN;
22 
23   PetscCall(PetscInitialize(&argc, &args, NULL, NULL));
24   PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
25   PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
26 
27   NN = N;
28   PetscCall(PetscSplitOwnership(MPI_COMM_WORLD, &n, &NN));
29   for (PetscInt j = 0; j < n; ++j) {
30     a[j] = 1.0;
31     b[j] = 2.0;
32     c[j] = 3.0;
33   }
34 
35   /*   --- MAIN LOOP --- repeat test cases NTIMES times --- */
36   for (PetscInt k = 0; k < NTIMES; ++k) {
37     PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD));
38     // Do not include barrier in the timed region
39     times[k] = MPI_Wtime();
40     for (PetscInt l = 0; l < NTIMESINNER; l++) {
41       for (PetscInt j = 0; j < n; j++) a[j] = b[j] + scalar * c[j];
42       if (size == 2000) PetscCall(PetscPrintf(PETSC_COMM_SELF, "never printed %g\n", a[11])); // to prevent the compiler from optimizing the loop out
43     }
44     //   PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD));
45     times[k] = MPI_Wtime() - times[k];
46   }
47   // use maximum time over all MPI processes
48   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, times, NTIMES, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD));
49   for (PetscInt k = 1; k < NTIMES; ++k) { /* note -- skip first iteration */
50     mintime = PetscMin(mintime, times[k]);
51   }
52   rate = 1.0E-06 * bytes * NTIMESINNER / mintime;
53 
54   if (rank == 0) {
55     FILE *fd;
56 
57     if (size != 1) {
58       double prate;
59 
60       PetscCall(PetscFOpen(PETSC_COMM_SELF, "flops", "r", &fd));
61       PetscCheck(fscanf(fd, "%lg", &prate) == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_READ, "Unable to read file");
62       PetscCall(PetscFClose(PETSC_COMM_SELF, fd));
63       PetscCall(PetscPrintf(PETSC_COMM_SELF, "%3d %11.1f   Rate (MB/s) %6.1f\n", size, rate, rate / prate));
64     } else {
65       PetscCall(PetscFOpen(PETSC_COMM_SELF, "flops", "w", &fd));
66       PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g\n", rate));
67       PetscCall(PetscFClose(PETSC_COMM_SELF, fd));
68       PetscCall(PetscPrintf(PETSC_COMM_SELF, "%3d %11.1f   Rate (MB/s) %6.1f\n", size, rate, 1.0));
69     }
70   }
71   PetscCall(PetscFinalize());
72   return 0;
73 }
74