xref: /petsc/src/benchmarks/streams/BasicVersion.c (revision 66af8762ec03dbef0e079729eb2a1734a35ed7ff)
1 #include <sys/time.h>
2 /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */
3 
4 double second()
5 {
6 /* struct timeval { long tv_sec;
7                     long tv_usec; };
8 
9 struct timezone { int tz_minuteswest;
10                   int tz_dsttime; }; */
11 
12   struct timeval  tp;
13   struct timezone tzp;
14   int             i;
15 
16   i = gettimeofday(&tp,&tzp);
17   return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6);
18 }
19 # include <stdio.h>
20 # include <math.h>
21 # include <limits.h>
22 # include <float.h>
23 # include <sys/time.h>
24 
25 /*
26   Program: Stream
27   Programmer: Joe R. Zagar
28   Revision: 4.0-BETA, October 24, 1995
29   Original code developed by John D. McCalpin
30 
31   This program measures memory transfer rates in MB/s for simple
32   computational kernels coded in C.  These numbers reveal the quality
33   of code generation for simple uncacheable kernels as well as showing
34   the cost of floating-point operations relative to memory accesses.
35 
36   INSTRUCTIONS:
37 
38         1) Stream requires a good bit of memory to run.  Adjust the
39            value of 'N' (below) to give a 'timing calibration' of
40            at least 20 clock-ticks.  This will provide rate estimates
41            that should be good to about 5% precision.
42  */
43 
44 # define N      200000
45 # define NTIMES     50
46 # define OFFSET      0
47 
48 /*
49        3) Compile the code with full optimization.  Many compilers
50           generate unreasonably bad code before the optimizer tightens
51           things up.  If the results are unreasonably good, on the
52           other hand, the optimizer might be too smart for me!
53 
54           Try compiling with:
55                 cc -O stream_d.c second.c -o stream_d -lm
56 
57           This is known to work on Cray, SGI, IBM, and Sun machines.
58 
59        4) Mail the results to mccalpin@cs.virginia.edu
60           Be sure to include:
61                  a) computer hardware model number and software revision
62                  b) the compiler flags
63                  c) all of the output from the test case.
64   Thanks!
65 
66 */
67 
68 # define HLINE "-------------------------------------------------------------\n"
69 
70 # ifndef MIN
71 # define MIN(x,y) ((x)<(y) ? (x) : (y))
72 # endif
73 # ifndef MAX
74 # define MAX(x,y) ((x)>(y) ? (x) : (y))
75 # endif
76 
77 static double a[N+OFFSET],
78               b[N+OFFSET],
79               c[N+OFFSET];
80 /*double *a,*b,*c;*/
81 
82 static double mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
83 
84 static const char *label[4] = {"Copy:      ", "Scale:     ", "Add:       ", "Triad:     "};
85 
86 static double bytes[4] = {
87   2 * sizeof(double) * N,
88   2 * sizeof(double) * N,
89   3 * sizeof(double) * N,
90   3 * sizeof(double) * N
91 };
92 
93 extern double second();
94 
95 int main(int argc,char **args)
96 {
97   int           checktick(void);
98   register int j, k;
99   double       scalar, t, times[4][NTIMES],irate[4];
100 
101   /* --- SETUP --- determine precision and check timing --- */
102 
103   for (j=0; j<N; j++) {
104     a[j] = 1.0;
105     b[j] = 2.0;
106     c[j] = 0.0;
107   }
108 
109   t = second();
110   for (j = 0; j < N; j++) a[j] = 2.0E0 * a[j];
111   t = 1.0E6 * (second() - t);
112 
113   /*   --- MAIN LOOP --- repeat test cases NTIMES times --- */
114 
115   scalar = 3.0;
116   for (k=0; k<NTIMES; k++)
117   {
118 
119     times[0][k] = second();
120 /* should all these barriers be pulled outside of the time call? */
121 
122     for (j=0; j<N; j++) c[j] = a[j];
123     times[0][k] = second() - times[0][k];
124 
125     times[1][k] = second();
126 
127     for (j=0; j<N; j++) b[j] = scalar*c[j];
128     times[1][k] = second() - times[1][k];
129 
130     times[2][k] = second();
131     for (j=0; j<N; j++) c[j] = a[j]+b[j];
132     times[2][k] = second() - times[2][k];
133 
134     times[3][k] = second();
135     for (j=0; j<N; j++) a[j] = b[j]+scalar*c[j];
136     times[3][k] = second() - times[3][k];
137   }
138 
139   /*   --- SUMMARY --- */
140 
141   for (k=0; k<NTIMES; k++)
142     for (j=0; j<4; j++) mintime[j] = MIN(mintime[j], times[j][k]);
143 
144   for (j=0; j<4; j++) irate[j] = 1.0E-06 * bytes[j]/mintime[j];
145 
146   printf("Function      Rate (MB/s) \n");
147   for (j=0; j<4; j++) printf("%s%11.4f\n", label[j],irate[j]);
148   return 0;
149 }
150 
151 # define        M        20
152 
153 int checktick(void)
154 {
155   int    i, minDelta, Delta;
156   double t1, t2, timesfound[M];
157 
158 /*  Collect a sequence of M unique time values from the system. */
159 
160   for (i = 0; i < M; i++) {
161     t1 = second();
162     while (((t2=second()) - t1) < 1.0E-6) ;
163     timesfound[i] = t1 = t2;
164   }
165 
166 /*
167   Determine the minimum difference between these M values.
168   This result will be our estimate (in microseconds) for the
169   clock granularity.
170  */
171 
172   minDelta = 1000000;
173   for (i = 1; i < M; i++) {
174     Delta    = (int)(1.0E6 * (timesfound[i]-timesfound[i-1]));
175     minDelta = MIN(minDelta, MAX(Delta,0));
176   }
177 
178   return minDelta;
179 }
180