xref: /petsc/src/benchmarks/streams/BasicVersion.c (revision 0e3d61c972ee8b0cd7b6ee2ab64f8543b0740577)
15d28107eSBarry Smith 
25d28107eSBarry Smith #include <sys/time.h>
35d28107eSBarry Smith /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */
45d28107eSBarry Smith 
55d28107eSBarry Smith double second()
65d28107eSBarry Smith {
75d28107eSBarry Smith /* struct timeval { long tv_sec;
85d28107eSBarry Smith                     long tv_usec; };
95d28107eSBarry Smith 
105d28107eSBarry Smith struct timezone { int tz_minuteswest;
115d28107eSBarry Smith                   int tz_dsttime; }; */
125d28107eSBarry Smith 
135d28107eSBarry Smith   struct timeval  tp;
145d28107eSBarry Smith   struct timezone tzp;
155d28107eSBarry Smith   int             i;
165d28107eSBarry Smith 
175d28107eSBarry Smith   i = gettimeofday(&tp,&tzp);
185d28107eSBarry Smith   return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6);
195d28107eSBarry Smith }
205d28107eSBarry Smith # include <stdio.h>
215d28107eSBarry Smith # include <math.h>
225d28107eSBarry Smith # include <limits.h>
230d04baf8SBarry Smith # include <float.h>
245d28107eSBarry Smith # include <sys/time.h>
255d28107eSBarry Smith 
265d28107eSBarry Smith /*
27*0e3d61c9SBarry Smith   Program: Stream
28*0e3d61c9SBarry Smith   Programmer: Joe R. Zagar
29*0e3d61c9SBarry Smith   Revision: 4.0-BETA, October 24, 1995
30*0e3d61c9SBarry Smith   Original code developed by John D. McCalpin
31*0e3d61c9SBarry Smith 
32*0e3d61c9SBarry Smith   This program measures memory transfer rates in MB/s for simple
33*0e3d61c9SBarry Smith   computational kernels coded in C.  These numbers reveal the quality
34*0e3d61c9SBarry Smith   of code generation for simple uncacheable kernels as well as showing
35*0e3d61c9SBarry Smith   the cost of floating-point operations relative to memory accesses.
36*0e3d61c9SBarry Smith 
37*0e3d61c9SBarry Smith   INSTRUCTIONS:
38*0e3d61c9SBarry Smith 
39*0e3d61c9SBarry Smith         1) Stream requires a good bit of memory to run.  Adjust the
40*0e3d61c9SBarry Smith            value of 'N' (below) to give a 'timing calibration' of
41*0e3d61c9SBarry Smith            at least 20 clock-ticks.  This will provide rate estimates
42*0e3d61c9SBarry Smith            that should be good to about 5% precision.
435d28107eSBarry Smith  */
445d28107eSBarry Smith 
45d3ae85c4SBarry Smith # define N      200000
465d28107eSBarry Smith # define NTIMES     50
475d28107eSBarry Smith # define OFFSET      0
485d28107eSBarry Smith 
495d28107eSBarry Smith /*
50*0e3d61c9SBarry Smith        3) Compile the code with full optimization.  Many compilers
51*0e3d61c9SBarry Smith           generate unreasonably bad code before the optimizer tightens
52*0e3d61c9SBarry Smith           things up.  If the results are unreasonably good, on the
53*0e3d61c9SBarry Smith           other hand, the optimizer might be too smart for me!
54*0e3d61c9SBarry Smith 
55*0e3d61c9SBarry Smith           Try compiling with:
56*0e3d61c9SBarry Smith                 cc -O stream_d.c second.c -o stream_d -lm
57*0e3d61c9SBarry Smith 
58*0e3d61c9SBarry Smith           This is known to work on Cray, SGI, IBM, and Sun machines.
59*0e3d61c9SBarry Smith 
60*0e3d61c9SBarry Smith 
61*0e3d61c9SBarry Smith        4) Mail the results to mccalpin@cs.virginia.edu
62*0e3d61c9SBarry Smith           Be sure to include:
63*0e3d61c9SBarry Smith                  a) computer hardware model number and software revision
64*0e3d61c9SBarry Smith                  b) the compiler flags
65*0e3d61c9SBarry Smith                  c) all of the output from the test case.
66*0e3d61c9SBarry Smith   Thanks!
67*0e3d61c9SBarry Smith 
685d28107eSBarry Smith */
695d28107eSBarry Smith 
705d28107eSBarry Smith # define HLINE "-------------------------------------------------------------\n"
715d28107eSBarry Smith 
725d28107eSBarry Smith # ifndef MIN
735d28107eSBarry Smith # define MIN(x,y) ((x)<(y) ? (x) : (y))
745d28107eSBarry Smith # endif
755d28107eSBarry Smith # ifndef MAX
765d28107eSBarry Smith # define MAX(x,y) ((x)>(y) ? (x) : (y))
775d28107eSBarry Smith # endif
785d28107eSBarry Smith 
795d28107eSBarry Smith static double a[N+OFFSET],
805d28107eSBarry Smith               b[N+OFFSET],
815d28107eSBarry Smith               c[N+OFFSET];
825d28107eSBarry Smith /*double *a,*b,*c;*/
835d28107eSBarry Smith 
84df4a11deSBarry Smith static double mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
855d28107eSBarry Smith 
86df4a11deSBarry Smith static const char *label[4] = {"Copy:      ", "Scale:     ", "Add:       ", "Triad:     "};
875d28107eSBarry Smith 
885d28107eSBarry Smith static double bytes[4] = {
895d28107eSBarry Smith   2 * sizeof(double) * N,
905d28107eSBarry Smith   2 * sizeof(double) * N,
915d28107eSBarry Smith   3 * sizeof(double) * N,
925d28107eSBarry Smith   3 * sizeof(double) * N
935d28107eSBarry Smith };
945d28107eSBarry Smith 
955d28107eSBarry Smith extern double second();
965d28107eSBarry Smith 
9701a79839SBarry Smith int main(int argc,char **args)
985d28107eSBarry Smith {
99d1d3a73cSBarry Smith   int           checktick(void);
1005d28107eSBarry Smith   register int j, k;
101d3ae85c4SBarry Smith   double       scalar, t, times[4][NTIMES],irate[4];
102df4a11deSBarry Smith 
1035d28107eSBarry Smith   /* --- SETUP --- determine precision and check timing --- */
1045d28107eSBarry Smith 
1055d28107eSBarry Smith   for (j=0; j<N; j++) {
1065d28107eSBarry Smith     a[j] = 1.0;
1075d28107eSBarry Smith     b[j] = 2.0;
1085d28107eSBarry Smith     c[j] = 0.0;
1095d28107eSBarry Smith   }
1105d28107eSBarry Smith 
1115d28107eSBarry Smith   t = second();
1126f2b61bcSKarl Rupp   for (j = 0; j < N; j++) a[j] = 2.0E0 * a[j];
1135d28107eSBarry Smith   t = 1.0E6 * (second() - t);
1145d28107eSBarry Smith 
1155d28107eSBarry Smith   /*   --- MAIN LOOP --- repeat test cases NTIMES times --- */
1165d28107eSBarry Smith 
1175d28107eSBarry Smith   scalar = 3.0;
1185d28107eSBarry Smith   for (k=0; k<NTIMES; k++)
1195d28107eSBarry Smith   {
120d3ae85c4SBarry Smith 
1215d28107eSBarry Smith     times[0][k] = second();
122df4a11deSBarry Smith /* should all these barriers be pulled outside of the time call? */
123d3ae85c4SBarry Smith 
1246f2b61bcSKarl Rupp     for (j=0; j<N; j++) c[j] = a[j];
1255d28107eSBarry Smith     times[0][k] = second() - times[0][k];
1265d28107eSBarry Smith 
1275d28107eSBarry Smith     times[1][k] = second();
128d3ae85c4SBarry Smith 
1296f2b61bcSKarl Rupp     for (j=0; j<N; j++) b[j] = scalar*c[j];
1305d28107eSBarry Smith     times[1][k] = second() - times[1][k];
1315d28107eSBarry Smith 
1325d28107eSBarry Smith     times[2][k] = second();
1336f2b61bcSKarl Rupp     for (j=0; j<N; j++) c[j] = a[j]+b[j];
1345d28107eSBarry Smith     times[2][k] = second() - times[2][k];
1355d28107eSBarry Smith 
1365d28107eSBarry Smith     times[3][k] = second();
1376f2b61bcSKarl Rupp     for (j=0; j<N; j++) a[j] = b[j]+scalar*c[j];
1385d28107eSBarry Smith     times[3][k] = second() - times[3][k];
1395d28107eSBarry Smith   }
1405d28107eSBarry Smith 
1415d28107eSBarry Smith   /*   --- SUMMARY --- */
1425d28107eSBarry Smith 
1436f2b61bcSKarl Rupp   for (k=0; k<NTIMES; k++)
1446f2b61bcSKarl Rupp     for (j=0; j<4; j++) mintime[j] = MIN(mintime[j], times[j][k]);
1455d28107eSBarry Smith 
1466f2b61bcSKarl Rupp   for (j=0; j<4; j++) irate[j] = 1.0E-06 * bytes[j]/mintime[j];
147df4a11deSBarry Smith 
148df4a11deSBarry Smith   printf("Function      Rate (MB/s) \n");
149d3ae85c4SBarry Smith   for (j=0; j<4; j++) printf("%s%11.4f\n", label[j],irate[j]);
1505d28107eSBarry Smith   return 0;
1515d28107eSBarry Smith }
1525d28107eSBarry Smith 
1535d28107eSBarry Smith # define        M        20
1545d28107eSBarry Smith 
155d1d3a73cSBarry Smith int checktick(void)
1565d28107eSBarry Smith {
1575d28107eSBarry Smith   int    i, minDelta, Delta;
1585d28107eSBarry Smith   double t1, t2, timesfound[M];
1595d28107eSBarry Smith 
1605d28107eSBarry Smith /*  Collect a sequence of M unique time values from the system. */
1615d28107eSBarry Smith 
1625d28107eSBarry Smith   for (i = 0; i < M; i++) {
1635d28107eSBarry Smith     t1 = second();
1646f2b61bcSKarl Rupp     while (((t2=second()) - t1) < 1.0E-6) ;
1655d28107eSBarry Smith     timesfound[i] = t1 = t2;
1665d28107eSBarry Smith   }
1675d28107eSBarry Smith 
1685d28107eSBarry Smith /*
169*0e3d61c9SBarry Smith   Determine the minimum difference between these M values.
170*0e3d61c9SBarry Smith   This result will be our estimate (in microseconds) for the
171*0e3d61c9SBarry Smith   clock granularity.
1725d28107eSBarry Smith  */
1735d28107eSBarry Smith 
1745d28107eSBarry Smith   minDelta = 1000000;
1755d28107eSBarry Smith   for (i = 1; i < M; i++) {
1765d28107eSBarry Smith     Delta    = (int)(1.0E6 * (timesfound[i]-timesfound[i-1]));
1775d28107eSBarry Smith     minDelta = MIN(minDelta, MAX(Delta,0));
1785d28107eSBarry Smith   }
1795d28107eSBarry Smith 
1805d28107eSBarry Smith   return(minDelta);
1815d28107eSBarry Smith }
1825d28107eSBarry Smith 
183