1 /* 2 A simplification of the Stream benchmark for OpenMP 3 Original code developed by John D. McCalpin 4 */ 5 #include <stdio.h> 6 #include <math.h> 7 #include <limits.h> 8 #include <float.h> 9 #include <sys/time.h> 10 #include <stdlib.h> 11 #include <petscsys.h> 12 13 //#define N 2*4*20000000 14 #define N 80000000 15 //#define N 1200000 16 //#define N 120000 17 #define NTIMES 50 18 #define OFFSET 0 19 20 #if !defined(MIN) 21 #define MIN(x, y) ((x) < (y) ? (x) : (y)) 22 #endif 23 #if !defined(MAX) 24 #define MAX(x, y) ((x) > (y) ? (x) : (y)) 25 #endif 26 27 static double a[N + OFFSET], b[N + OFFSET], c[N + OFFSET]; 28 static double mintime = FLT_MAX; 29 static double bytes = 3 * sizeof(double) * N; 30 31 int main(int argc, char **argv) 32 { 33 MPI_Init(&argc, &argv); 34 const static double scalar = 3.0; 35 #pragma omp threadprivate(scalar) 36 double times[NTIMES], rate; 37 int size; 38 char *env; 39 FILE *fd; 40 41 env = getenv("OMP_NUM_THREADS"); 42 if (!env) env = (char *)"1"; 43 sscanf(env, "%d", &size); 44 45 #pragma omp parallel for schedule(static) 46 for (int j = 0; j < N; j++) { 47 a[j] = 1.0; 48 b[j] = 2.0; 49 c[j] = 3.0; 50 } 51 52 /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 53 for (int k = 0; k < NTIMES; k++) { 54 times[k] = MPI_Wtime(); 55 // https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf 56 // #pragma omp parallel for (same performance as below) 57 // #pragma omp parallel for simd schedule(static) (same performance as below) 58 #pragma omp parallel for schedule(static) 59 for (register int j = 0; j < N; j++) a[j] = b[j] + scalar * c[j]; 60 times[k] = MPI_Wtime() - times[k]; 61 } 62 for (int k = 1; k < NTIMES; k++) { /* note -- skip first iteration */ 63 mintime = MIN(mintime, times[k]); 64 } 65 66 if (size == 65) printf("Never printed %g\n", a[11]); 67 rate = 1.0E-06 * bytes / mintime; 68 69 if (size == 1) { 70 printf("%d %11.4f Rate (MB/s) 1\n", size, rate); 71 fd = fopen("flops", "w"); 72 fprintf(fd, "%g\n", rate); 73 fclose(fd); 74 } else { 75 double prate; 76 fd = fopen("flops", "r"); 77 fscanf(fd, "%lg", &prate); 78 fclose(fd); 79 printf("%d %11.4f Rate (MB/s) %g \n", size, rate, rate / prate); 80 } 81 MPI_Finalize(); 82 return 0; 83 } 84