15d28107eSBarry Smith 25d28107eSBarry Smith #include <sys/time.h> 35d28107eSBarry Smith /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */ 45d28107eSBarry Smith 55d28107eSBarry Smith double second() 65d28107eSBarry Smith { 75d28107eSBarry Smith /* struct timeval { long tv_sec; 85d28107eSBarry Smith long tv_usec; }; 95d28107eSBarry Smith 105d28107eSBarry Smith struct timezone { int tz_minuteswest; 115d28107eSBarry Smith int tz_dsttime; }; */ 125d28107eSBarry Smith 135d28107eSBarry Smith struct timeval tp; 145d28107eSBarry Smith struct timezone tzp; 155d28107eSBarry Smith int i; 165d28107eSBarry Smith 175d28107eSBarry Smith i = gettimeofday(&tp,&tzp); 185d28107eSBarry Smith return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6); 195d28107eSBarry Smith } 205d28107eSBarry Smith # include <stdio.h> 215d28107eSBarry Smith # include <math.h> 225d28107eSBarry Smith # include <limits.h> 230d04baf8SBarry Smith # include <float.h> 245d28107eSBarry Smith # include <sys/time.h> 255d28107eSBarry Smith 265d28107eSBarry Smith /* 270e3d61c9SBarry Smith Program: Stream 280e3d61c9SBarry Smith Programmer: Joe R. Zagar 290e3d61c9SBarry Smith Revision: 4.0-BETA, October 24, 1995 300e3d61c9SBarry Smith Original code developed by John D. McCalpin 310e3d61c9SBarry Smith 320e3d61c9SBarry Smith This program measures memory transfer rates in MB/s for simple 330e3d61c9SBarry Smith computational kernels coded in C. These numbers reveal the quality 340e3d61c9SBarry Smith of code generation for simple uncacheable kernels as well as showing 350e3d61c9SBarry Smith the cost of floating-point operations relative to memory accesses. 360e3d61c9SBarry Smith 370e3d61c9SBarry Smith INSTRUCTIONS: 380e3d61c9SBarry Smith 390e3d61c9SBarry Smith 1) Stream requires a good bit of memory to run. Adjust the 400e3d61c9SBarry Smith value of 'N' (below) to give a 'timing calibration' of 410e3d61c9SBarry Smith at least 20 clock-ticks. This will provide rate estimates 420e3d61c9SBarry Smith that should be good to about 5% precision. 435d28107eSBarry Smith */ 445d28107eSBarry Smith 45d3ae85c4SBarry Smith # define N 200000 465d28107eSBarry Smith # define NTIMES 50 475d28107eSBarry Smith # define OFFSET 0 485d28107eSBarry Smith 495d28107eSBarry Smith /* 500e3d61c9SBarry Smith 3) Compile the code with full optimization. Many compilers 510e3d61c9SBarry Smith generate unreasonably bad code before the optimizer tightens 520e3d61c9SBarry Smith things up. If the results are unreasonably good, on the 530e3d61c9SBarry Smith other hand, the optimizer might be too smart for me! 540e3d61c9SBarry Smith 550e3d61c9SBarry Smith Try compiling with: 560e3d61c9SBarry Smith cc -O stream_d.c second.c -o stream_d -lm 570e3d61c9SBarry Smith 580e3d61c9SBarry Smith This is known to work on Cray, SGI, IBM, and Sun machines. 590e3d61c9SBarry Smith 600e3d61c9SBarry Smith 4) Mail the results to mccalpin@cs.virginia.edu 610e3d61c9SBarry Smith Be sure to include: 620e3d61c9SBarry Smith a) computer hardware model number and software revision 630e3d61c9SBarry Smith b) the compiler flags 640e3d61c9SBarry Smith c) all of the output from the test case. 650e3d61c9SBarry Smith Thanks! 660e3d61c9SBarry Smith 675d28107eSBarry Smith */ 685d28107eSBarry Smith 695d28107eSBarry Smith # define HLINE "-------------------------------------------------------------\n" 705d28107eSBarry Smith 715d28107eSBarry Smith # ifndef MIN 725d28107eSBarry Smith # define MIN(x,y) ((x)<(y) ? (x) : (y)) 735d28107eSBarry Smith # endif 745d28107eSBarry Smith # ifndef MAX 755d28107eSBarry Smith # define MAX(x,y) ((x)>(y) ? (x) : (y)) 765d28107eSBarry Smith # endif 775d28107eSBarry Smith 785d28107eSBarry Smith static double a[N+OFFSET], 795d28107eSBarry Smith b[N+OFFSET], 805d28107eSBarry Smith c[N+OFFSET]; 815d28107eSBarry Smith /*double *a,*b,*c;*/ 825d28107eSBarry Smith 83df4a11deSBarry Smith static double mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; 845d28107eSBarry Smith 85df4a11deSBarry Smith static const char *label[4] = {"Copy: ", "Scale: ", "Add: ", "Triad: "}; 865d28107eSBarry Smith 875d28107eSBarry Smith static double bytes[4] = { 885d28107eSBarry Smith 2 * sizeof(double) * N, 895d28107eSBarry Smith 2 * sizeof(double) * N, 905d28107eSBarry Smith 3 * sizeof(double) * N, 915d28107eSBarry Smith 3 * sizeof(double) * N 925d28107eSBarry Smith }; 935d28107eSBarry Smith 945d28107eSBarry Smith extern double second(); 955d28107eSBarry Smith 9601a79839SBarry Smith int main(int argc,char **args) 975d28107eSBarry Smith { 98d1d3a73cSBarry Smith int checktick(void); 995d28107eSBarry Smith register int j, k; 100d3ae85c4SBarry Smith double scalar, t, times[4][NTIMES],irate[4]; 101df4a11deSBarry Smith 1025d28107eSBarry Smith /* --- SETUP --- determine precision and check timing --- */ 1035d28107eSBarry Smith 1045d28107eSBarry Smith for (j=0; j<N; j++) { 1055d28107eSBarry Smith a[j] = 1.0; 1065d28107eSBarry Smith b[j] = 2.0; 1075d28107eSBarry Smith c[j] = 0.0; 1085d28107eSBarry Smith } 1095d28107eSBarry Smith 1105d28107eSBarry Smith t = second(); 1116f2b61bcSKarl Rupp for (j = 0; j < N; j++) a[j] = 2.0E0 * a[j]; 1125d28107eSBarry Smith t = 1.0E6 * (second() - t); 1135d28107eSBarry Smith 1145d28107eSBarry Smith /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 1155d28107eSBarry Smith 1165d28107eSBarry Smith scalar = 3.0; 1175d28107eSBarry Smith for (k=0; k<NTIMES; k++) 1185d28107eSBarry Smith { 119d3ae85c4SBarry Smith 1205d28107eSBarry Smith times[0][k] = second(); 121df4a11deSBarry Smith /* should all these barriers be pulled outside of the time call? */ 122d3ae85c4SBarry Smith 1236f2b61bcSKarl Rupp for (j=0; j<N; j++) c[j] = a[j]; 1245d28107eSBarry Smith times[0][k] = second() - times[0][k]; 1255d28107eSBarry Smith 1265d28107eSBarry Smith times[1][k] = second(); 127d3ae85c4SBarry Smith 1286f2b61bcSKarl Rupp for (j=0; j<N; j++) b[j] = scalar*c[j]; 1295d28107eSBarry Smith times[1][k] = second() - times[1][k]; 1305d28107eSBarry Smith 1315d28107eSBarry Smith times[2][k] = second(); 1326f2b61bcSKarl Rupp for (j=0; j<N; j++) c[j] = a[j]+b[j]; 1335d28107eSBarry Smith times[2][k] = second() - times[2][k]; 1345d28107eSBarry Smith 1355d28107eSBarry Smith times[3][k] = second(); 1366f2b61bcSKarl Rupp for (j=0; j<N; j++) a[j] = b[j]+scalar*c[j]; 1375d28107eSBarry Smith times[3][k] = second() - times[3][k]; 1385d28107eSBarry Smith } 1395d28107eSBarry Smith 1405d28107eSBarry Smith /* --- SUMMARY --- */ 1415d28107eSBarry Smith 1426f2b61bcSKarl Rupp for (k=0; k<NTIMES; k++) 1436f2b61bcSKarl Rupp for (j=0; j<4; j++) mintime[j] = MIN(mintime[j], times[j][k]); 1445d28107eSBarry Smith 1456f2b61bcSKarl Rupp for (j=0; j<4; j++) irate[j] = 1.0E-06 * bytes[j]/mintime[j]; 146df4a11deSBarry Smith 147df4a11deSBarry Smith printf("Function Rate (MB/s) \n"); 148d3ae85c4SBarry Smith for (j=0; j<4; j++) printf("%s%11.4f\n", label[j],irate[j]); 1495d28107eSBarry Smith return 0; 1505d28107eSBarry Smith } 1515d28107eSBarry Smith 1525d28107eSBarry Smith # define M 20 1535d28107eSBarry Smith 154d1d3a73cSBarry Smith int checktick(void) 1555d28107eSBarry Smith { 1565d28107eSBarry Smith int i, minDelta, Delta; 1575d28107eSBarry Smith double t1, t2, timesfound[M]; 1585d28107eSBarry Smith 1595d28107eSBarry Smith /* Collect a sequence of M unique time values from the system. */ 1605d28107eSBarry Smith 1615d28107eSBarry Smith for (i = 0; i < M; i++) { 1625d28107eSBarry Smith t1 = second(); 1636f2b61bcSKarl Rupp while (((t2=second()) - t1) < 1.0E-6) ; 1645d28107eSBarry Smith timesfound[i] = t1 = t2; 1655d28107eSBarry Smith } 1665d28107eSBarry Smith 1675d28107eSBarry Smith /* 1680e3d61c9SBarry Smith Determine the minimum difference between these M values. 1690e3d61c9SBarry Smith This result will be our estimate (in microseconds) for the 1700e3d61c9SBarry Smith clock granularity. 1715d28107eSBarry Smith */ 1725d28107eSBarry Smith 1735d28107eSBarry Smith minDelta = 1000000; 1745d28107eSBarry Smith for (i = 1; i < M; i++) { 1755d28107eSBarry Smith Delta = (int)(1.0E6 * (timesfound[i]-timesfound[i-1])); 1765d28107eSBarry Smith minDelta = MIN(minDelta, MAX(Delta,0)); 1775d28107eSBarry Smith } 1785d28107eSBarry Smith 179*11cc89d2SBarry Smith return minDelta; 1805d28107eSBarry Smith } 1815d28107eSBarry Smith 182