xref: /petsc/src/benchmarks/streams/BasicVersion.c (revision 11cc89d2bae31c43c795890e5d9b25a12b75f4f2) !
15d28107eSBarry Smith 
25d28107eSBarry Smith #include <sys/time.h>
35d28107eSBarry Smith /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */
45d28107eSBarry Smith 
55d28107eSBarry Smith double second()
65d28107eSBarry Smith {
75d28107eSBarry Smith /* struct timeval { long tv_sec;
85d28107eSBarry Smith                     long tv_usec; };
95d28107eSBarry Smith 
105d28107eSBarry Smith struct timezone { int tz_minuteswest;
115d28107eSBarry Smith                   int tz_dsttime; }; */
125d28107eSBarry Smith 
135d28107eSBarry Smith   struct timeval  tp;
145d28107eSBarry Smith   struct timezone tzp;
155d28107eSBarry Smith   int             i;
165d28107eSBarry Smith 
175d28107eSBarry Smith   i = gettimeofday(&tp,&tzp);
185d28107eSBarry Smith   return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6);
195d28107eSBarry Smith }
205d28107eSBarry Smith # include <stdio.h>
215d28107eSBarry Smith # include <math.h>
225d28107eSBarry Smith # include <limits.h>
230d04baf8SBarry Smith # include <float.h>
245d28107eSBarry Smith # include <sys/time.h>
255d28107eSBarry Smith 
265d28107eSBarry Smith /*
270e3d61c9SBarry Smith   Program: Stream
280e3d61c9SBarry Smith   Programmer: Joe R. Zagar
290e3d61c9SBarry Smith   Revision: 4.0-BETA, October 24, 1995
300e3d61c9SBarry Smith   Original code developed by John D. McCalpin
310e3d61c9SBarry Smith 
320e3d61c9SBarry Smith   This program measures memory transfer rates in MB/s for simple
330e3d61c9SBarry Smith   computational kernels coded in C.  These numbers reveal the quality
340e3d61c9SBarry Smith   of code generation for simple uncacheable kernels as well as showing
350e3d61c9SBarry Smith   the cost of floating-point operations relative to memory accesses.
360e3d61c9SBarry Smith 
370e3d61c9SBarry Smith   INSTRUCTIONS:
380e3d61c9SBarry Smith 
390e3d61c9SBarry Smith         1) Stream requires a good bit of memory to run.  Adjust the
400e3d61c9SBarry Smith            value of 'N' (below) to give a 'timing calibration' of
410e3d61c9SBarry Smith            at least 20 clock-ticks.  This will provide rate estimates
420e3d61c9SBarry Smith            that should be good to about 5% precision.
435d28107eSBarry Smith  */
445d28107eSBarry Smith 
45d3ae85c4SBarry Smith # define N      200000
465d28107eSBarry Smith # define NTIMES     50
475d28107eSBarry Smith # define OFFSET      0
485d28107eSBarry Smith 
495d28107eSBarry Smith /*
500e3d61c9SBarry Smith        3) Compile the code with full optimization.  Many compilers
510e3d61c9SBarry Smith           generate unreasonably bad code before the optimizer tightens
520e3d61c9SBarry Smith           things up.  If the results are unreasonably good, on the
530e3d61c9SBarry Smith           other hand, the optimizer might be too smart for me!
540e3d61c9SBarry Smith 
550e3d61c9SBarry Smith           Try compiling with:
560e3d61c9SBarry Smith                 cc -O stream_d.c second.c -o stream_d -lm
570e3d61c9SBarry Smith 
580e3d61c9SBarry Smith           This is known to work on Cray, SGI, IBM, and Sun machines.
590e3d61c9SBarry Smith 
600e3d61c9SBarry Smith        4) Mail the results to mccalpin@cs.virginia.edu
610e3d61c9SBarry Smith           Be sure to include:
620e3d61c9SBarry Smith                  a) computer hardware model number and software revision
630e3d61c9SBarry Smith                  b) the compiler flags
640e3d61c9SBarry Smith                  c) all of the output from the test case.
650e3d61c9SBarry Smith   Thanks!
660e3d61c9SBarry Smith 
675d28107eSBarry Smith */
685d28107eSBarry Smith 
695d28107eSBarry Smith # define HLINE "-------------------------------------------------------------\n"
705d28107eSBarry Smith 
715d28107eSBarry Smith # ifndef MIN
725d28107eSBarry Smith # define MIN(x,y) ((x)<(y) ? (x) : (y))
735d28107eSBarry Smith # endif
745d28107eSBarry Smith # ifndef MAX
755d28107eSBarry Smith # define MAX(x,y) ((x)>(y) ? (x) : (y))
765d28107eSBarry Smith # endif
775d28107eSBarry Smith 
785d28107eSBarry Smith static double a[N+OFFSET],
795d28107eSBarry Smith               b[N+OFFSET],
805d28107eSBarry Smith               c[N+OFFSET];
815d28107eSBarry Smith /*double *a,*b,*c;*/
825d28107eSBarry Smith 
83df4a11deSBarry Smith static double mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
845d28107eSBarry Smith 
85df4a11deSBarry Smith static const char *label[4] = {"Copy:      ", "Scale:     ", "Add:       ", "Triad:     "};
865d28107eSBarry Smith 
875d28107eSBarry Smith static double bytes[4] = {
885d28107eSBarry Smith   2 * sizeof(double) * N,
895d28107eSBarry Smith   2 * sizeof(double) * N,
905d28107eSBarry Smith   3 * sizeof(double) * N,
915d28107eSBarry Smith   3 * sizeof(double) * N
925d28107eSBarry Smith };
935d28107eSBarry Smith 
945d28107eSBarry Smith extern double second();
955d28107eSBarry Smith 
9601a79839SBarry Smith int main(int argc,char **args)
975d28107eSBarry Smith {
98d1d3a73cSBarry Smith   int           checktick(void);
995d28107eSBarry Smith   register int j, k;
100d3ae85c4SBarry Smith   double       scalar, t, times[4][NTIMES],irate[4];
101df4a11deSBarry Smith 
1025d28107eSBarry Smith   /* --- SETUP --- determine precision and check timing --- */
1035d28107eSBarry Smith 
1045d28107eSBarry Smith   for (j=0; j<N; j++) {
1055d28107eSBarry Smith     a[j] = 1.0;
1065d28107eSBarry Smith     b[j] = 2.0;
1075d28107eSBarry Smith     c[j] = 0.0;
1085d28107eSBarry Smith   }
1095d28107eSBarry Smith 
1105d28107eSBarry Smith   t = second();
1116f2b61bcSKarl Rupp   for (j = 0; j < N; j++) a[j] = 2.0E0 * a[j];
1125d28107eSBarry Smith   t = 1.0E6 * (second() - t);
1135d28107eSBarry Smith 
1145d28107eSBarry Smith   /*   --- MAIN LOOP --- repeat test cases NTIMES times --- */
1155d28107eSBarry Smith 
1165d28107eSBarry Smith   scalar = 3.0;
1175d28107eSBarry Smith   for (k=0; k<NTIMES; k++)
1185d28107eSBarry Smith   {
119d3ae85c4SBarry Smith 
1205d28107eSBarry Smith     times[0][k] = second();
121df4a11deSBarry Smith /* should all these barriers be pulled outside of the time call? */
122d3ae85c4SBarry Smith 
1236f2b61bcSKarl Rupp     for (j=0; j<N; j++) c[j] = a[j];
1245d28107eSBarry Smith     times[0][k] = second() - times[0][k];
1255d28107eSBarry Smith 
1265d28107eSBarry Smith     times[1][k] = second();
127d3ae85c4SBarry Smith 
1286f2b61bcSKarl Rupp     for (j=0; j<N; j++) b[j] = scalar*c[j];
1295d28107eSBarry Smith     times[1][k] = second() - times[1][k];
1305d28107eSBarry Smith 
1315d28107eSBarry Smith     times[2][k] = second();
1326f2b61bcSKarl Rupp     for (j=0; j<N; j++) c[j] = a[j]+b[j];
1335d28107eSBarry Smith     times[2][k] = second() - times[2][k];
1345d28107eSBarry Smith 
1355d28107eSBarry Smith     times[3][k] = second();
1366f2b61bcSKarl Rupp     for (j=0; j<N; j++) a[j] = b[j]+scalar*c[j];
1375d28107eSBarry Smith     times[3][k] = second() - times[3][k];
1385d28107eSBarry Smith   }
1395d28107eSBarry Smith 
1405d28107eSBarry Smith   /*   --- SUMMARY --- */
1415d28107eSBarry Smith 
1426f2b61bcSKarl Rupp   for (k=0; k<NTIMES; k++)
1436f2b61bcSKarl Rupp     for (j=0; j<4; j++) mintime[j] = MIN(mintime[j], times[j][k]);
1445d28107eSBarry Smith 
1456f2b61bcSKarl Rupp   for (j=0; j<4; j++) irate[j] = 1.0E-06 * bytes[j]/mintime[j];
146df4a11deSBarry Smith 
147df4a11deSBarry Smith   printf("Function      Rate (MB/s) \n");
148d3ae85c4SBarry Smith   for (j=0; j<4; j++) printf("%s%11.4f\n", label[j],irate[j]);
1495d28107eSBarry Smith   return 0;
1505d28107eSBarry Smith }
1515d28107eSBarry Smith 
1525d28107eSBarry Smith # define        M        20
1535d28107eSBarry Smith 
154d1d3a73cSBarry Smith int checktick(void)
1555d28107eSBarry Smith {
1565d28107eSBarry Smith   int    i, minDelta, Delta;
1575d28107eSBarry Smith   double t1, t2, timesfound[M];
1585d28107eSBarry Smith 
1595d28107eSBarry Smith /*  Collect a sequence of M unique time values from the system. */
1605d28107eSBarry Smith 
1615d28107eSBarry Smith   for (i = 0; i < M; i++) {
1625d28107eSBarry Smith     t1 = second();
1636f2b61bcSKarl Rupp     while (((t2=second()) - t1) < 1.0E-6) ;
1645d28107eSBarry Smith     timesfound[i] = t1 = t2;
1655d28107eSBarry Smith   }
1665d28107eSBarry Smith 
1675d28107eSBarry Smith /*
1680e3d61c9SBarry Smith   Determine the minimum difference between these M values.
1690e3d61c9SBarry Smith   This result will be our estimate (in microseconds) for the
1700e3d61c9SBarry Smith   clock granularity.
1715d28107eSBarry Smith  */
1725d28107eSBarry Smith 
1735d28107eSBarry Smith   minDelta = 1000000;
1745d28107eSBarry Smith   for (i = 1; i < M; i++) {
1755d28107eSBarry Smith     Delta    = (int)(1.0E6 * (timesfound[i]-timesfound[i-1]));
1765d28107eSBarry Smith     minDelta = MIN(minDelta, MAX(Delta,0));
1775d28107eSBarry Smith   }
1785d28107eSBarry Smith 
179*11cc89d2SBarry Smith   return minDelta;
1805d28107eSBarry Smith }
1815d28107eSBarry Smith 
182