xref: /petsc/src/benchmarks/streams/OpenMPVersionLikeMPI.c (revision a336c15037c72f93cd561f5a5e11e93175f2efd9)
1 /*
2   A simplification of the Stream benchmark for OpenMP
3   The array for each thread is a large distance from the array for the other threads
4   Original code developed by John D. McCalpin
5 */
6 #include <stdio.h>
7 #include <math.h>
8 #include <limits.h>
9 #include <float.h>
10 #include <sys/time.h>
11 #include <stdlib.h>
12 #include <omp.h>
13 #include <petscsys.h>
14 
15 #define NTIMESINNER 1
16 #define N           2 * 4 * 20000000
17 //#define N 1200000
18 //#define N 120000
19 #define NTIMES 50
20 #define OFFSET 0
21 
22 #if !defined(MIN)
23   #define MIN(x, y) ((x) < (y) ? (x) : (y))
24 #endif
25 #if !defined(MAX)
26   #define MAX(x, y) ((x) > (y) ? (x) : (y))
27 #endif
28 
29 static double a[64][10000000], b[64][10000000], c[64][10000000];
30 static double mintime = FLT_MAX;
31 static double bytes   = 3 * sizeof(double) * N;
32 
33 int main()
34 {
35   const static double scalar = 3.0;
36 #pragma omp threadprivate(scalar)
37   double     times[NTIMES], rate;
38   int        size;
39   static int n;
40 #pragma omp threadprivate(n)
41   char *env;
42   FILE *fd;
43 
44   env = getenv("OMP_NUM_THREADS");
45   if (!env) env = (char *)"1";
46   sscanf(env, "%d", &size);
47 
48 #pragma omp parallel for schedule(static)
49   for (int j = 0; j < size; j++) {
50     n = (N / size + ((N % size) > omp_get_thread_num()));
51     for (int i = 0; i < n; i++) {
52       a[j][i] = 1.0;
53       b[j][i] = 2.0;
54       c[j][i] = 3.0;
55     }
56   }
57 
58   /*  --- MAIN LOOP --- repeat test cases NTIMES times --- */
59   for (int k = 0; k < NTIMES; k++) {
60     times[k] = MPI_Wtime();
61     // https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf
62 // #pragma omp parallel for  (same performance as below)
63 // #pragma omp parallel for simd schedule(static)  (same performance as below)
64 #pragma omp parallel for schedule(static)
65     for (int j = 0; j < size; j++) {
66       n                = (N / size + ((N % size) > omp_get_thread_num()));
67       double       *aa = a[j]; // these don't change the timings
68       const double *bb = b[j];
69       const double *cc = c[j];
70       for (int l = 0; l < NTIMESINNER; l++) {
71         for (register int i = 0; i < n; i++) aa[i] = bb[i] + scalar * cc[i];
72         if (size == 65) printf("never printed %g\n", a[0][11]);
73       }
74     }
75     times[k] = MPI_Wtime() - times[k];
76   }
77   for (int k = 1; k < NTIMES; k++) { /* note -- skip first iteration */
78     mintime = MIN(mintime, times[k]);
79   }
80   rate = 1.0E-06 * bytes * NTIMESINNER / mintime;
81 
82   if (size == 1) {
83     printf("%d %11.4f   Rate (MB/s) 1\n", size, rate);
84     fd = fopen("flops", "w");
85     fprintf(fd, "%g\n", rate);
86     fclose(fd);
87   } else {
88     double prate;
89     fd = fopen("flops", "r");
90     fscanf(fd, "%lg", &prate);
91     fclose(fd);
92     printf("%d %11.4f   Rate (MB/s) %g \n", size, rate, rate / prate);
93   }
94   return 0;
95 }
96