1 /*
2 An adaption of the Stream benchmark for MPI
3 Original code developed by John D. McCalpin
4 */
5 #include <petscsys.h>
6
7 #define NTIMESINNER 1
8 #define N 80000000 // 3*sizeof(double)*N > aggregated last level cache size on a compute node
9 #define NTIMES 50
10 #define OFFSET 0
11
12 static double a[N + OFFSET], b[N + OFFSET], c[N + OFFSET];
13 static double mintime = 1e9;
14 static double bytes = 3 * sizeof(double) * N;
15
main(int argc,char ** args)16 int main(int argc, char **args)
17 {
18 const double scalar = 3.0;
19 double times[NTIMES], rate;
20 PetscMPIInt rank, size;
21 PetscInt n = PETSC_DECIDE, NN;
22
23 PetscCall(PetscInitialize(&argc, &args, NULL, NULL));
24 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
25 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size));
26
27 NN = N;
28 PetscCall(PetscSplitOwnership(MPI_COMM_WORLD, &n, &NN));
29 for (PetscInt j = 0; j < n; ++j) {
30 a[j] = 1.0;
31 b[j] = 2.0;
32 c[j] = 3.0;
33 }
34
35 /* --- MAIN LOOP --- repeat test cases NTIMES times --- */
36 for (PetscInt k = 0; k < NTIMES; ++k) {
37 PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD));
38 // Do not include barrier in the timed region
39 times[k] = MPI_Wtime();
40 for (PetscInt l = 0; l < NTIMESINNER; l++) {
41 for (PetscInt j = 0; j < n; j++) a[j] = b[j] + scalar * c[j];
42 if (size == 2000) PetscCall(PetscPrintf(PETSC_COMM_SELF, "never printed %g\n", a[11])); // to prevent the compiler from optimizing the loop out
43 }
44 // PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD));
45 times[k] = MPI_Wtime() - times[k];
46 }
47 // use maximum time over all MPI processes
48 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, times, NTIMES, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD));
49 for (PetscInt k = 1; k < NTIMES; ++k) { /* note -- skip first iteration */
50 mintime = PetscMin(mintime, times[k]);
51 }
52 rate = 1.0E-06 * bytes * NTIMESINNER / mintime;
53
54 if (rank == 0) {
55 FILE *fd;
56
57 if (size != 1) {
58 double prate;
59
60 PetscCall(PetscFOpen(PETSC_COMM_SELF, "flops", "r", &fd));
61 PetscCheck(fscanf(fd, "%lg", &prate) == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_READ, "Unable to read file");
62 PetscCall(PetscFClose(PETSC_COMM_SELF, fd));
63 PetscCall(PetscPrintf(PETSC_COMM_SELF, "%3d %11.1f Rate (MB/s) %6.1f\n", size, rate, rate / prate));
64 } else {
65 PetscCall(PetscFOpen(PETSC_COMM_SELF, "flops", "w", &fd));
66 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g\n", rate));
67 PetscCall(PetscFClose(PETSC_COMM_SELF, fd));
68 PetscCall(PetscPrintf(PETSC_COMM_SELF, "%3d %11.1f Rate (MB/s) %6.1f\n", size, rate, 1.0));
69 }
70 }
71 PetscCall(PetscFinalize());
72 return 0;
73 }
74