xref: /petsc/src/benchmarks/streams/BasicVersion.c (revision abbcd2d45932c6dbad0ca42a290b3a741dfdc5ac)
1 
2 #include <sys/time.h>
3 /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */
4 
5 double second()
6 {
7 /* struct timeval { long tv_sec;
8                     long tv_usec; };
9 
10 struct timezone { int tz_minuteswest;
11                   int tz_dsttime; }; */
12 
13   struct timeval  tp;
14   struct timezone tzp;
15   int             i;
16 
17   i = gettimeofday(&tp,&tzp);
18   return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6);
19 }
20 # include <stdio.h>
21 # include <math.h>
22 # include <limits.h>
23 # include <float.h>
24 # include <sys/time.h>
25 
26 /*
27   Program: Stream
28   Programmer: Joe R. Zagar
29   Revision: 4.0-BETA, October 24, 1995
30   Original code developed by John D. McCalpin
31 
32   This program measures memory transfer rates in MB/s for simple
33   computational kernels coded in C.  These numbers reveal the quality
34   of code generation for simple uncacheable kernels as well as showing
35   the cost of floating-point operations relative to memory accesses.
36 
37   INSTRUCTIONS:
38 
39         1) Stream requires a good bit of memory to run.  Adjust the
40            value of 'N' (below) to give a 'timing calibration' of
41            at least 20 clock-ticks.  This will provide rate estimates
42            that should be good to about 5% precision.
43  */
44 
45 # define N      200000
46 # define NTIMES     50
47 # define OFFSET      0
48 
49 /*
50        3) Compile the code with full optimization.  Many compilers
51           generate unreasonably bad code before the optimizer tightens
52           things up.  If the results are unreasonably good, on the
53           other hand, the optimizer might be too smart for me!
54 
55           Try compiling with:
56                 cc -O stream_d.c second.c -o stream_d -lm
57 
58           This is known to work on Cray, SGI, IBM, and Sun machines.
59 
60 
61        4) Mail the results to mccalpin@cs.virginia.edu
62           Be sure to include:
63                  a) computer hardware model number and software revision
64                  b) the compiler flags
65                  c) all of the output from the test case.
66   Thanks!
67 
68 */
69 
70 # define HLINE "-------------------------------------------------------------\n"
71 
72 # ifndef MIN
73 # define MIN(x,y) ((x)<(y) ? (x) : (y))
74 # endif
75 # ifndef MAX
76 # define MAX(x,y) ((x)>(y) ? (x) : (y))
77 # endif
78 
79 static double a[N+OFFSET],
80               b[N+OFFSET],
81               c[N+OFFSET];
82 /*double *a,*b,*c;*/
83 
84 static double mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
85 
86 static const char *label[4] = {"Copy:      ", "Scale:     ", "Add:       ", "Triad:     "};
87 
88 static double bytes[4] = {
89   2 * sizeof(double) * N,
90   2 * sizeof(double) * N,
91   3 * sizeof(double) * N,
92   3 * sizeof(double) * N
93 };
94 
95 extern double second();
96 
97 int main(int argc,char **args)
98 {
99   int           checktick(void);
100   register int j, k;
101   double       scalar, t, times[4][NTIMES],irate[4];
102 
103   /* --- SETUP --- determine precision and check timing --- */
104 
105   for (j=0; j<N; j++) {
106     a[j] = 1.0;
107     b[j] = 2.0;
108     c[j] = 0.0;
109   }
110 
111   t = second();
112   for (j = 0; j < N; j++) a[j] = 2.0E0 * a[j];
113   t = 1.0E6 * (second() - t);
114 
115   /*   --- MAIN LOOP --- repeat test cases NTIMES times --- */
116 
117   scalar = 3.0;
118   for (k=0; k<NTIMES; k++)
119   {
120 
121     times[0][k] = second();
122 /* should all these barriers be pulled outside of the time call? */
123 
124     for (j=0; j<N; j++) c[j] = a[j];
125     times[0][k] = second() - times[0][k];
126 
127     times[1][k] = second();
128 
129     for (j=0; j<N; j++) b[j] = scalar*c[j];
130     times[1][k] = second() - times[1][k];
131 
132     times[2][k] = second();
133     for (j=0; j<N; j++) c[j] = a[j]+b[j];
134     times[2][k] = second() - times[2][k];
135 
136     times[3][k] = second();
137     for (j=0; j<N; j++) a[j] = b[j]+scalar*c[j];
138     times[3][k] = second() - times[3][k];
139   }
140 
141   /*   --- SUMMARY --- */
142 
143   for (k=0; k<NTIMES; k++)
144     for (j=0; j<4; j++) mintime[j] = MIN(mintime[j], times[j][k]);
145 
146   for (j=0; j<4; j++) irate[j] = 1.0E-06 * bytes[j]/mintime[j];
147 
148   printf("Function      Rate (MB/s) \n");
149   for (j=0; j<4; j++) printf("%s%11.4f\n", label[j],irate[j]);
150   return 0;
151 }
152 
153 # define        M        20
154 
155 int checktick(void)
156 {
157   int    i, minDelta, Delta;
158   double t1, t2, timesfound[M];
159 
160 /*  Collect a sequence of M unique time values from the system. */
161 
162   for (i = 0; i < M; i++) {
163     t1 = second();
164     while (((t2=second()) - t1) < 1.0E-6) ;
165     timesfound[i] = t1 = t2;
166   }
167 
168 /*
169   Determine the minimum difference between these M values.
170   This result will be our estimate (in microseconds) for the
171   clock granularity.
172  */
173 
174   minDelta = 1000000;
175   for (i = 1; i < M; i++) {
176     Delta    = (int)(1.0E6 * (timesfound[i]-timesfound[i-1]));
177     minDelta = MIN(minDelta, MAX(Delta,0));
178   }
179 
180   return(minDelta);
181 }
182 
183