1 /* 2 An adaption of the Stream benchmark for MPI 3 Original code developed by John D. McCalpin 4 */ 5 #include <petscsys.h> 6 7 #define NTIMESINNER 1 8 #define N 80000000 // 3*sizeof(double)*N > aggregated last level cache size on a compute node 9 #define NTIMES 50 10 #define OFFSET 0 11 12 static double a[N + OFFSET], b[N + OFFSET], c[N + OFFSET]; 13 static double mintime = 1e9; 14 static double bytes = 3 * sizeof(double) * N; 15 16 int main(int argc, char **args) 17 { 18 const double scalar = 3.0; 19 double times[NTIMES], rate; 20 PetscMPIInt rank, size; 21 PetscInt n = PETSC_DECIDE, NN; 22 23 PetscCall(PetscInitialize(&argc, &args, NULL, NULL)); 24 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); 25 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD, &size)); 26 27 NN = N; 28 PetscCall(PetscSplitOwnership(MPI_COMM_WORLD, &n, &NN)); 29 for (PetscInt j = 0; j < n; ++j) { 30 a[j] = 1.0; 31 b[j] = 2.0; 32 c[j] = 3.0; 33 } 34 35 /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 36 for (PetscInt k = 0; k < NTIMES; ++k) { 37 PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD)); 38 // Do not include barrier in the timed region 39 times[k] = MPI_Wtime(); 40 for (PetscInt l = 0; l < NTIMESINNER; l++) { 41 for (PetscInt j = 0; j < n; j++) a[j] = b[j] + scalar * c[j]; 42 if (size == 2000) PetscCall(PetscPrintf(PETSC_COMM_SELF, "never printed %g\n", a[11])); // to prevent the compiler from optimizing the loop out 43 } 44 // PetscCallMPI(MPI_Barrier(MPI_COMM_WORLD)); 45 times[k] = MPI_Wtime() - times[k]; 46 } 47 // use maximum time over all MPI processes 48 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, times, NTIMES, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD)); 49 for (PetscInt k = 1; k < NTIMES; ++k) { /* note -- skip first iteration */ 50 mintime = PetscMin(mintime, times[k]); 51 } 52 rate = 1.0E-06 * bytes * NTIMESINNER / mintime; 53 54 if (rank == 0) { 55 FILE *fd; 56 57 if (size != 1) { 58 double prate; 59 60 PetscCall(PetscFOpen(PETSC_COMM_SELF, "flops", "r", &fd)); 61 PetscCheck(fscanf(fd, "%lg", &prate) == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_READ, "Unable to read file"); 62 PetscCall(PetscFClose(PETSC_COMM_SELF, fd)); 63 PetscCall(PetscPrintf(PETSC_COMM_SELF, "%3d %11.1f Rate (MB/s) %6.1f\n", size, rate, rate / prate)); 64 } else { 65 PetscCall(PetscFOpen(PETSC_COMM_SELF, "flops", "w", &fd)); 66 PetscCall(PetscFPrintf(PETSC_COMM_SELF, fd, "%g\n", rate)); 67 PetscCall(PetscFClose(PETSC_COMM_SELF, fd)); 68 PetscCall(PetscPrintf(PETSC_COMM_SELF, "%3d %11.1f Rate (MB/s) %6.1f\n", size, rate, 1.0)); 69 } 70 } 71 PetscCall(PetscFinalize()); 72 return 0; 73 } 74