1 /* 2 A simplification of the Stream benchmark for OpenMP 3 The array for each thread is a large distance from the array for the other threads 4 Original code developed by John D. McCalpin 5 */ 6 #include <stdio.h> 7 #include <math.h> 8 #include <limits.h> 9 #include <float.h> 10 #include <sys/time.h> 11 #include <stdlib.h> 12 #include <omp.h> 13 #include <petscsys.h> 14 15 #define NTIMESINNER 1 16 #define N 2 * 4 * 20000000 17 //#define N 1200000 18 //#define N 120000 19 #define NTIMES 50 20 #define OFFSET 0 21 22 #if !defined(MIN) 23 #define MIN(x, y) ((x) < (y) ? (x) : (y)) 24 #endif 25 #if !defined(MAX) 26 #define MAX(x, y) ((x) > (y) ? (x) : (y)) 27 #endif 28 29 static double a[64][10000000], b[64][10000000], c[64][10000000]; 30 static double mintime = FLT_MAX; 31 static double bytes = 3 * sizeof(double) * N; 32 33 int main() 34 { 35 const static double scalar = 3.0; 36 #pragma omp threadprivate(scalar) 37 double times[NTIMES], rate; 38 int size; 39 static int n; 40 #pragma omp threadprivate(n) 41 char *env; 42 FILE *fd; 43 44 env = getenv("OMP_NUM_THREADS"); 45 if (!env) env = (char *)"1"; 46 sscanf(env, "%d", &size); 47 48 #pragma omp parallel for schedule(static) 49 for (int j = 0; j < size; j++) { 50 n = (N / size + ((N % size) > omp_get_thread_num())); 51 for (int i = 0; i < n; i++) { 52 a[j][i] = 1.0; 53 b[j][i] = 2.0; 54 c[j][i] = 3.0; 55 } 56 } 57 58 /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 59 for (int k = 0; k < NTIMES; k++) { 60 times[k] = MPI_Wtime(); 61 // https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf 62 // #pragma omp parallel for (same performance as below) 63 // #pragma omp parallel for simd schedule(static) (same performance as below) 64 #pragma omp parallel for schedule(static) 65 for (int j = 0; j < size; j++) { 66 n = (N / size + ((N % size) > omp_get_thread_num())); 67 double *aa = a[j]; // these don't change the timings 68 const double *bb = b[j]; 69 const double *cc = c[j]; 70 for (int l = 0; l < NTIMESINNER; l++) { 71 for (register int i = 0; i < n; i++) aa[i] = bb[i] + scalar * cc[i]; 72 if (size == 65) printf("never printed %g\n", a[0][11]); 73 } 74 } 75 times[k] = MPI_Wtime() - times[k]; 76 } 77 for (int k = 1; k < NTIMES; k++) { /* note -- skip first iteration */ 78 mintime = MIN(mintime, times[k]); 79 } 80 rate = 1.0E-06 * bytes * NTIMESINNER / mintime; 81 82 if (size == 1) { 83 printf("%d %11.4f Rate (MB/s) 1\n", size, rate); 84 fd = fopen("flops", "w"); 85 fprintf(fd, "%g\n", rate); 86 fclose(fd); 87 } else { 88 double prate; 89 fd = fopen("flops", "r"); 90 fscanf(fd, "%lg", &prate); 91 fclose(fd); 92 printf("%d %11.4f Rate (MB/s) %g \n", size, rate, rate / prate); 93 } 94 return 0; 95 } 96