1 #include <sys/time.h>
2 /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */
3
second()4 double second()
5 {
6 /* struct timeval { long tv_sec;
7 long tv_usec; };
8
9 struct timezone { int tz_minuteswest;
10 int tz_dsttime; }; */
11
12 struct timeval tp;
13 struct timezone tzp;
14 int i;
15
16 i = gettimeofday(&tp, &tzp);
17 return ((double)tp.tv_sec + (double)tp.tv_usec * 1.e-6);
18 }
19 #include <stdio.h>
20 #include <math.h>
21 #include <limits.h>
22 #include <float.h>
23 #include <sys/time.h>
24
25 /*
26 Program: Stream
27 Programmer: Joe R. Zagar
28 Revision: 4.0-BETA, October 24, 1995
29 Original code developed by John D. McCalpin
30
31 This program measures memory transfer rates in MB/s for simple
32 computational kernels coded in C. These numbers reveal the quality
33 of code generation for simple uncacheable kernels as well as showing
34 the cost of floating-point operations relative to memory accesses.
35
36 INSTRUCTIONS:
37
38 1) Stream requires a good bit of memory to run. Adjust the
39 value of 'N' (below) to give a 'timing calibration' of
40 at least 20 clock-ticks. This will provide rate estimates
41 that should be good to about 5% precision.
42 */
43
44 #define N 200000
45 #define NTIMES 50
46 #define OFFSET 0
47
48 /*
49 3) Compile the code with full optimization. Many compilers
50 generate unreasonably bad code before the optimizer tightens
51 things up. If the results are unreasonably good, on the
52 other hand, the optimizer might be too smart for me!
53
54 Try compiling with:
55 cc -O stream_d.c second.c -o stream_d -lm
56
57 This is known to work on Cray, SGI, IBM, and Sun machines.
58
59 4) Mail the results to mccalpin@cs.virginia.edu
60 Be sure to include:
61 a) computer hardware model number and software revision
62 b) the compiler flags
63 c) all of the output from the test case.
64 Thanks!
65
66 */
67
68 #define HLINE "-------------------------------------------------------------\n"
69
70 #if !defined(MIN)
71 #define MIN(x, y) ((x) < (y) ? (x) : (y))
72 #endif
73 #if !defined(MAX)
74 #define MAX(x, y) ((x) > (y) ? (x) : (y))
75 #endif
76
77 static double a[N + OFFSET], b[N + OFFSET], c[N + OFFSET];
78 /*double *a,*b,*c;*/
79
80 static double mintime[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
81
82 static const char *label[4] = {"Copy: ", "Scale: ", "Add: ", "Triad: "};
83
84 static double bytes[4] = {2 * sizeof(double) * N, 2 * sizeof(double) * N, 3 * sizeof(double) * N, 3 * sizeof(double) * N};
85
86 extern double second();
87
main(int argc,char ** args)88 int main(int argc, char **args)
89 {
90 int checktick(void);
91 register int j, k;
92 double scalar, t, times[4][NTIMES], irate[4];
93
94 /* --- SETUP --- determine precision and check timing --- */
95
96 for (j = 0; j < N; j++) {
97 a[j] = 1.0;
98 b[j] = 2.0;
99 c[j] = 0.0;
100 }
101
102 t = second();
103 for (j = 0; j < N; j++) a[j] = 2.0E0 * a[j];
104 t = 1.0E6 * (second() - t);
105
106 /* --- MAIN LOOP --- repeat test cases NTIMES times --- */
107
108 scalar = 3.0;
109 for (k = 0; k < NTIMES; k++) {
110 times[0][k] = second();
111 /* should all these barriers be pulled outside of the time call? */
112
113 for (j = 0; j < N; j++) c[j] = a[j];
114 times[0][k] = second() - times[0][k];
115
116 times[1][k] = second();
117
118 for (j = 0; j < N; j++) b[j] = scalar * c[j];
119 times[1][k] = second() - times[1][k];
120
121 times[2][k] = second();
122 for (j = 0; j < N; j++) c[j] = a[j] + b[j];
123 times[2][k] = second() - times[2][k];
124
125 times[3][k] = second();
126 for (j = 0; j < N; j++) a[j] = b[j] + scalar * c[j];
127 times[3][k] = second() - times[3][k];
128 }
129
130 /* --- SUMMARY --- */
131
132 for (k = 0; k < NTIMES; k++)
133 for (j = 0; j < 4; j++) mintime[j] = MIN(mintime[j], times[j][k]);
134
135 for (j = 0; j < 4; j++) irate[j] = 1.0E-06 * bytes[j] / mintime[j];
136
137 printf("Function Rate (MB/s)\n");
138 for (j = 0; j < 4; j++) printf("%s%11.4f\n", label[j], irate[j]);
139 return 0;
140 }
141
142 #define M 20
143
checktick(void)144 int checktick(void)
145 {
146 int i, minDelta, Delta;
147 double t1, t2, timesfound[M];
148
149 /* Collect a sequence of M unique time values from the system. */
150
151 for (i = 0; i < M; i++) {
152 t1 = second();
153 while (((t2 = second()) - t1) < 1.0E-6);
154 timesfound[i] = t1 = t2;
155 }
156
157 /*
158 Determine the minimum difference between these M values.
159 This result will be our estimate (in microseconds) for the
160 clock granularity.
161 */
162
163 minDelta = 1000000;
164 for (i = 1; i < M; i++) {
165 Delta = (int)(1.0E6 * (timesfound[i] - timesfound[i - 1]));
166 minDelta = MIN(minDelta, MAX(Delta, 0));
167 }
168
169 return minDelta;
170 }
171