xref: /petsc/src/benchmarks/streams/OpenMPVersionLikeMPI.c (revision 3f02e49b19195914bf17f317a25cb39636853415)
1 /*
2   A simplification of the Stream benchmark for OpenMP
3   The array for each thread is a large distance from the array for the other threads
4   Original code developed by John D. McCalpin
5 */
6 #include <stdio.h>
7 #include <math.h>
8 #include <limits.h>
9 #include <float.h>
10 #include <sys/time.h>
11 #include <stdlib.h>
12 #include <omp.h>
13 #include <petscsys.h>
14 
15 #define NTIMESINNER 1
16 #define N 2*4*20000000
17 //#define N 1200000
18 //#define N 120000
19 #define NTIMES       50
20 #define OFFSET       0
21 
22 # if !defined(MIN)
23 # define MIN(x,y) ((x)<(y) ? (x) : (y))
24 # endif
25 # if !defined(MAX)
26 # define MAX(x,y) ((x)>(y) ? (x) : (y))
27 # endif
28 
29 static double a[64][10000000],b[64][10000000],c[64][10000000];
30 static double  mintime = FLT_MAX;
31 static double bytes =  3 * sizeof(double) * N;
32 
33 int main()
34 {
35   const static double scalar = 3.0;
36 #pragma omp threadprivate(scalar)
37   double       times[NTIMES],rate;
38   int          size;
39   static int   n;
40 #pragma omp threadprivate(n)
41   char         *env;
42   FILE         *fd;
43 
44   env = getenv("OMP_NUM_THREADS");
45   if (!env) env = (char *) "1";
46   sscanf(env,"%d",&size);
47 
48 #pragma omp parallel for schedule(static)
49   for (int j=0; j<size; j++) {
50     n = (N / size + ((N % size) > omp_get_thread_num()));
51     for (int i=0; i<n; i++){
52       a[j][i] = 1.0;
53       b[j][i] = 2.0;
54       c[j][i] = 3.0;
55     }
56   }
57 
58   /*  --- MAIN LOOP --- repeat test cases NTIMES times --- */
59   for (int k=0; k<NTIMES; k++)
60   {
61     times[k] = MPI_Wtime();
62     // https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf
63 // #pragma omp parallel for  (same performance as below)
64 // #pragma omp parallel for simd schedule(static)  (same performance as below)
65 #pragma omp parallel for schedule(static)
66     for (int j=0; j<size; j++) {
67       n = (N / size + ((N % size) > omp_get_thread_num()));
68       double *aa = a[j];  // these don't change the timings
69       const double *bb = b[j];
70       const double *cc = c[j];
71       for (int l=0; l<NTIMESINNER; l++) {
72         for (register int i=0; i<n; i++) aa[i] = bb[i]+scalar*cc[i];
73           if (size == 65) printf("never printed %g\n",a[0][11]);
74       }
75     }
76     times[k] = MPI_Wtime() - times[k];
77   }
78   for (int k=1; k<NTIMES; k++) {  /* note -- skip first iteration */
79       mintime = MIN(mintime, times[k]);
80   }
81   rate = 1.0E-06 * bytes*NTIMESINNER/mintime;
82 
83   if (size == 1) {
84     printf("%d %11.4f   Rate (MB/s) 1\n",size, rate);
85     fd = fopen("flops","w");
86     fprintf(fd,"%g\n",rate);
87     fclose(fd);
88   } else {
89     double prate;
90     fd = fopen("flops","r");
91     fscanf(fd,"%lg",&prate);
92     fclose(fd);
93     printf("%d %11.4f   Rate (MB/s) %g \n", size, rate,rate/prate);
94   }
95   return 0;
96 }
97