xref: /petsc/src/benchmarks/streams/BasicVersion.c (revision ccfb0f9f40a0131988d7995ed9679700dae2a75a)
1 #include <sys/time.h>
2 /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */
3 
4 double second()
5 {
6   /* struct timeval { long tv_sec;
7                     long tv_usec; };
8 
9 struct timezone { int tz_minuteswest;
10                   int tz_dsttime; }; */
11 
12   struct timeval  tp;
13   struct timezone tzp;
14   int             i;
15 
16   i = gettimeofday(&tp, &tzp);
17   return ((double)tp.tv_sec + (double)tp.tv_usec * 1.e-6);
18 }
19 #include <stdio.h>
20 #include <math.h>
21 #include <limits.h>
22 #include <float.h>
23 #include <sys/time.h>
24 
25 /*
26   Program: Stream
27   Programmer: Joe R. Zagar
28   Revision: 4.0-BETA, October 24, 1995
29   Original code developed by John D. McCalpin
30 
31   This program measures memory transfer rates in MB/s for simple
32   computational kernels coded in C.  These numbers reveal the quality
33   of code generation for simple uncacheable kernels as well as showing
34   the cost of floating-point operations relative to memory accesses.
35 
36   INSTRUCTIONS:
37 
38         1) Stream requires a good bit of memory to run.  Adjust the
39            value of 'N' (below) to give a 'timing calibration' of
40            at least 20 clock-ticks.  This will provide rate estimates
41            that should be good to about 5% precision.
42  */
43 
44 #define N      200000
45 #define NTIMES 50
46 #define OFFSET 0
47 
48 /*
49        3) Compile the code with full optimization.  Many compilers
50           generate unreasonably bad code before the optimizer tightens
51           things up.  If the results are unreasonably good, on the
52           other hand, the optimizer might be too smart for me!
53 
54           Try compiling with:
55                 cc -O stream_d.c second.c -o stream_d -lm
56 
57           This is known to work on Cray, SGI, IBM, and Sun machines.
58 
59        4) Mail the results to mccalpin@cs.virginia.edu
60           Be sure to include:
61                  a) computer hardware model number and software revision
62                  b) the compiler flags
63                  c) all of the output from the test case.
64   Thanks!
65 
66 */
67 
68 #define HLINE "-------------------------------------------------------------\n"
69 
70 #ifndef MIN
71   #define MIN(x, y) ((x) < (y) ? (x) : (y))
72 #endif
73 #ifndef MAX
74   #define MAX(x, y) ((x) > (y) ? (x) : (y))
75 #endif
76 
77 static double a[N + OFFSET], b[N + OFFSET], c[N + OFFSET];
78 /*double *a,*b,*c;*/
79 
80 static double mintime[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
81 
82 static const char *label[4] = {"Copy:      ", "Scale:     ", "Add:       ", "Triad:     "};
83 
84 static double bytes[4] = {2 * sizeof(double) * N, 2 * sizeof(double) * N, 3 * sizeof(double) * N, 3 * sizeof(double) * N};
85 
86 extern double second();
87 
88 int main(int argc, char **args)
89 {
90   int          checktick(void);
91   register int j, k;
92   double       scalar, t, times[4][NTIMES], irate[4];
93 
94   /* --- SETUP --- determine precision and check timing --- */
95 
96   for (j = 0; j < N; j++) {
97     a[j] = 1.0;
98     b[j] = 2.0;
99     c[j] = 0.0;
100   }
101 
102   t = second();
103   for (j = 0; j < N; j++) a[j] = 2.0E0 * a[j];
104   t = 1.0E6 * (second() - t);
105 
106   /*   --- MAIN LOOP --- repeat test cases NTIMES times --- */
107 
108   scalar = 3.0;
109   for (k = 0; k < NTIMES; k++) {
110     times[0][k] = second();
111     /* should all these barriers be pulled outside of the time call? */
112 
113     for (j = 0; j < N; j++) c[j] = a[j];
114     times[0][k] = second() - times[0][k];
115 
116     times[1][k] = second();
117 
118     for (j = 0; j < N; j++) b[j] = scalar * c[j];
119     times[1][k] = second() - times[1][k];
120 
121     times[2][k] = second();
122     for (j = 0; j < N; j++) c[j] = a[j] + b[j];
123     times[2][k] = second() - times[2][k];
124 
125     times[3][k] = second();
126     for (j = 0; j < N; j++) a[j] = b[j] + scalar * c[j];
127     times[3][k] = second() - times[3][k];
128   }
129 
130   /*   --- SUMMARY --- */
131 
132   for (k = 0; k < NTIMES; k++)
133     for (j = 0; j < 4; j++) mintime[j] = MIN(mintime[j], times[j][k]);
134 
135   for (j = 0; j < 4; j++) irate[j] = 1.0E-06 * bytes[j] / mintime[j];
136 
137   printf("Function      Rate (MB/s) \n");
138   for (j = 0; j < 4; j++) printf("%s%11.4f\n", label[j], irate[j]);
139   return 0;
140 }
141 
142 #define M 20
143 
144 int checktick(void)
145 {
146   int    i, minDelta, Delta;
147   double t1, t2, timesfound[M];
148 
149   /*  Collect a sequence of M unique time values from the system. */
150 
151   for (i = 0; i < M; i++) {
152     t1 = second();
153     while (((t2 = second()) - t1) < 1.0E-6);
154     timesfound[i] = t1 = t2;
155   }
156 
157   /*
158   Determine the minimum difference between these M values.
159   This result will be our estimate (in microseconds) for the
160   clock granularity.
161  */
162 
163   minDelta = 1000000;
164   for (i = 1; i < M; i++) {
165     Delta    = (int)(1.0E6 * (timesfound[i] - timesfound[i - 1]));
166     minDelta = MIN(minDelta, MAX(Delta, 0));
167   }
168 
169   return minDelta;
170 }
171