xref: /petsc/src/benchmarks/streams/BasicVersion.c (revision fbf9dbe564678ed6eff1806adbc4c4f01b9743f4)
1 
2 #include <sys/time.h>
3 /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */
4 
5 double second()
6 {
7 /* struct timeval { long tv_sec;
8                     long tv_usec; };
9 
10 struct timezone { int tz_minuteswest;
11                   int tz_dsttime; }; */
12 
13   struct timeval  tp;
14   struct timezone tzp;
15   int             i;
16 
17   i = gettimeofday(&tp,&tzp);
18   return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6);
19 }
20 # include <stdio.h>
21 # include <math.h>
22 # include <limits.h>
23 # include <float.h>
24 # include <sys/time.h>
25 
26 /*
27   Program: Stream
28   Programmer: Joe R. Zagar
29   Revision: 4.0-BETA, October 24, 1995
30   Original code developed by John D. McCalpin
31 
32   This program measures memory transfer rates in MB/s for simple
33   computational kernels coded in C.  These numbers reveal the quality
34   of code generation for simple uncacheable kernels as well as showing
35   the cost of floating-point operations relative to memory accesses.
36 
37   INSTRUCTIONS:
38 
39         1) Stream requires a good bit of memory to run.  Adjust the
40            value of 'N' (below) to give a 'timing calibration' of
41            at least 20 clock-ticks.  This will provide rate estimates
42            that should be good to about 5% precision.
43  */
44 
45 # define N      200000
46 # define NTIMES     50
47 # define OFFSET      0
48 
49 /*
50        3) Compile the code with full optimization.  Many compilers
51           generate unreasonably bad code before the optimizer tightens
52           things up.  If the results are unreasonably good, on the
53           other hand, the optimizer might be too smart for me!
54 
55           Try compiling with:
56                 cc -O stream_d.c second.c -o stream_d -lm
57 
58           This is known to work on Cray, SGI, IBM, and Sun machines.
59 
60        4) Mail the results to mccalpin@cs.virginia.edu
61           Be sure to include:
62                  a) computer hardware model number and software revision
63                  b) the compiler flags
64                  c) all of the output from the test case.
65   Thanks!
66 
67 */
68 
69 # define HLINE "-------------------------------------------------------------\n"
70 
71 # ifndef MIN
72 # define MIN(x,y) ((x)<(y) ? (x) : (y))
73 # endif
74 # ifndef MAX
75 # define MAX(x,y) ((x)>(y) ? (x) : (y))
76 # endif
77 
78 static double a[N+OFFSET],
79               b[N+OFFSET],
80               c[N+OFFSET];
81 /*double *a,*b,*c;*/
82 
83 static double mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
84 
85 static const char *label[4] = {"Copy:      ", "Scale:     ", "Add:       ", "Triad:     "};
86 
87 static double bytes[4] = {
88   2 * sizeof(double) * N,
89   2 * sizeof(double) * N,
90   3 * sizeof(double) * N,
91   3 * sizeof(double) * N
92 };
93 
94 extern double second();
95 
96 int main(int argc,char **args)
97 {
98   int           checktick(void);
99   register int j, k;
100   double       scalar, t, times[4][NTIMES],irate[4];
101 
102   /* --- SETUP --- determine precision and check timing --- */
103 
104   for (j=0; j<N; j++) {
105     a[j] = 1.0;
106     b[j] = 2.0;
107     c[j] = 0.0;
108   }
109 
110   t = second();
111   for (j = 0; j < N; j++) a[j] = 2.0E0 * a[j];
112   t = 1.0E6 * (second() - t);
113 
114   /*   --- MAIN LOOP --- repeat test cases NTIMES times --- */
115 
116   scalar = 3.0;
117   for (k=0; k<NTIMES; k++)
118   {
119 
120     times[0][k] = second();
121 /* should all these barriers be pulled outside of the time call? */
122 
123     for (j=0; j<N; j++) c[j] = a[j];
124     times[0][k] = second() - times[0][k];
125 
126     times[1][k] = second();
127 
128     for (j=0; j<N; j++) b[j] = scalar*c[j];
129     times[1][k] = second() - times[1][k];
130 
131     times[2][k] = second();
132     for (j=0; j<N; j++) c[j] = a[j]+b[j];
133     times[2][k] = second() - times[2][k];
134 
135     times[3][k] = second();
136     for (j=0; j<N; j++) a[j] = b[j]+scalar*c[j];
137     times[3][k] = second() - times[3][k];
138   }
139 
140   /*   --- SUMMARY --- */
141 
142   for (k=0; k<NTIMES; k++)
143     for (j=0; j<4; j++) mintime[j] = MIN(mintime[j], times[j][k]);
144 
145   for (j=0; j<4; j++) irate[j] = 1.0E-06 * bytes[j]/mintime[j];
146 
147   printf("Function      Rate (MB/s) \n");
148   for (j=0; j<4; j++) printf("%s%11.4f\n", label[j],irate[j]);
149   return 0;
150 }
151 
152 # define        M        20
153 
154 int checktick(void)
155 {
156   int    i, minDelta, Delta;
157   double t1, t2, timesfound[M];
158 
159 /*  Collect a sequence of M unique time values from the system. */
160 
161   for (i = 0; i < M; i++) {
162     t1 = second();
163     while (((t2=second()) - t1) < 1.0E-6) ;
164     timesfound[i] = t1 = t2;
165   }
166 
167 /*
168   Determine the minimum difference between these M values.
169   This result will be our estimate (in microseconds) for the
170   clock granularity.
171  */
172 
173   minDelta = 1000000;
174   for (i = 1; i < M; i++) {
175     Delta    = (int)(1.0E6 * (timesfound[i]-timesfound[i-1]));
176     minDelta = MIN(minDelta, MAX(Delta,0));
177   }
178 
179   return minDelta;
180 }
181 
182