1 /*
2 A simplification of the Stream benchmark for OpenMP
3 The array for each thread is a large distance from the array for the other threads
4 Original code developed by John D. McCalpin
5 */
6 #include <stdio.h>
7 #include <math.h>
8 #include <limits.h>
9 #include <float.h>
10 #include <sys/time.h>
11 #include <stdlib.h>
12 #include <omp.h>
13 #include <petscsys.h>
14
15 #define NTIMESINNER 1
16 #define N 2 * 4 * 20000000
17 //#define N 1200000
18 //#define N 120000
19 #define NTIMES 50
20 #define OFFSET 0
21
22 #if !defined(MIN)
23 #define MIN(x, y) ((x) < (y) ? (x) : (y))
24 #endif
25 #if !defined(MAX)
26 #define MAX(x, y) ((x) > (y) ? (x) : (y))
27 #endif
28
29 static double a[64][10000000], b[64][10000000], c[64][10000000];
30 static double mintime = FLT_MAX;
31 static double bytes = 3 * sizeof(double) * N;
32
main()33 int main()
34 {
35 const static double scalar = 3.0;
36 #pragma omp threadprivate(scalar)
37 double times[NTIMES], rate;
38 int size;
39 static int n;
40 #pragma omp threadprivate(n)
41 char *env;
42 FILE *fd;
43
44 env = getenv("OMP_NUM_THREADS");
45 if (!env) env = (char *)"1";
46 sscanf(env, "%d", &size);
47
48 #pragma omp parallel for schedule(static)
49 for (int j = 0; j < size; j++) {
50 n = (N / size + ((N % size) > omp_get_thread_num()));
51 for (int i = 0; i < n; i++) {
52 a[j][i] = 1.0;
53 b[j][i] = 2.0;
54 c[j][i] = 3.0;
55 }
56 }
57
58 /* --- MAIN LOOP --- repeat test cases NTIMES times --- */
59 for (int k = 0; k < NTIMES; k++) {
60 times[k] = MPI_Wtime();
61 // https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf
62 // #pragma omp parallel for (same performance as below)
63 // #pragma omp parallel for simd schedule(static) (same performance as below)
64 #pragma omp parallel for schedule(static)
65 for (int j = 0; j < size; j++) {
66 n = (N / size + ((N % size) > omp_get_thread_num()));
67 double *aa = a[j]; // these don't change the timings
68 const double *bb = b[j];
69 const double *cc = c[j];
70 for (int l = 0; l < NTIMESINNER; l++) {
71 for (register int i = 0; i < n; i++) aa[i] = bb[i] + scalar * cc[i];
72 if (size == 65) printf("never printed %g\n", a[0][11]);
73 }
74 }
75 times[k] = MPI_Wtime() - times[k];
76 }
77 for (int k = 1; k < NTIMES; k++) { /* note -- skip first iteration */
78 mintime = MIN(mintime, times[k]);
79 }
80 rate = 1.0E-06 * bytes * NTIMESINNER / mintime;
81
82 if (size == 1) {
83 printf("%d %11.4f Rate (MB/s) 1\n", size, rate);
84 fd = fopen("flops", "w");
85 fprintf(fd, "%g\n", rate);
86 fclose(fd);
87 } else {
88 double prate;
89 fd = fopen("flops", "r");
90 fscanf(fd, "%lg", &prate);
91 fclose(fd);
92 printf("%d %11.4f Rate (MB/s) %g\n", size, rate, rate / prate);
93 }
94 return 0;
95 }
96