1 2 #include <sys/time.h> 3 /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */ 4 5 double second() 6 { 7 /* struct timeval { long tv_sec; 8 long tv_usec; }; 9 10 struct timezone { int tz_minuteswest; 11 int tz_dsttime; }; */ 12 13 struct timeval tp; 14 struct timezone tzp; 15 int i; 16 17 i = gettimeofday(&tp,&tzp); 18 return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6); 19 } 20 # include <stdio.h> 21 # include <math.h> 22 # include <limits.h> 23 # include <float.h> 24 # include <sys/time.h> 25 26 /* 27 Program: Stream 28 Programmer: Joe R. Zagar 29 Revision: 4.0-BETA, October 24, 1995 30 Original code developed by John D. McCalpin 31 32 This program measures memory transfer rates in MB/s for simple 33 computational kernels coded in C. These numbers reveal the quality 34 of code generation for simple uncacheable kernels as well as showing 35 the cost of floating-point operations relative to memory accesses. 36 37 INSTRUCTIONS: 38 39 1) Stream requires a good bit of memory to run. Adjust the 40 value of 'N' (below) to give a 'timing calibration' of 41 at least 20 clock-ticks. This will provide rate estimates 42 that should be good to about 5% precision. 43 */ 44 45 # define N 200000 46 # define NTIMES 50 47 # define OFFSET 0 48 49 /* 50 3) Compile the code with full optimization. Many compilers 51 generate unreasonably bad code before the optimizer tightens 52 things up. If the results are unreasonably good, on the 53 other hand, the optimizer might be too smart for me! 54 55 Try compiling with: 56 cc -O stream_d.c second.c -o stream_d -lm 57 58 This is known to work on Cray, SGI, IBM, and Sun machines. 59 60 4) Mail the results to mccalpin@cs.virginia.edu 61 Be sure to include: 62 a) computer hardware model number and software revision 63 b) the compiler flags 64 c) all of the output from the test case. 65 Thanks! 66 67 */ 68 69 # define HLINE "-------------------------------------------------------------\n" 70 71 # ifndef MIN 72 # define MIN(x,y) ((x)<(y) ? (x) : (y)) 73 # endif 74 # ifndef MAX 75 # define MAX(x,y) ((x)>(y) ? (x) : (y)) 76 # endif 77 78 static double a[N+OFFSET], 79 b[N+OFFSET], 80 c[N+OFFSET]; 81 /*double *a,*b,*c;*/ 82 83 static double mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; 84 85 static const char *label[4] = {"Copy: ", "Scale: ", "Add: ", "Triad: "}; 86 87 static double bytes[4] = { 88 2 * sizeof(double) * N, 89 2 * sizeof(double) * N, 90 3 * sizeof(double) * N, 91 3 * sizeof(double) * N 92 }; 93 94 extern double second(); 95 96 int main(int argc,char **args) 97 { 98 int checktick(void); 99 register int j, k; 100 double scalar, t, times[4][NTIMES],irate[4]; 101 102 /* --- SETUP --- determine precision and check timing --- */ 103 104 for (j=0; j<N; j++) { 105 a[j] = 1.0; 106 b[j] = 2.0; 107 c[j] = 0.0; 108 } 109 110 t = second(); 111 for (j = 0; j < N; j++) a[j] = 2.0E0 * a[j]; 112 t = 1.0E6 * (second() - t); 113 114 /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 115 116 scalar = 3.0; 117 for (k=0; k<NTIMES; k++) 118 { 119 120 times[0][k] = second(); 121 /* should all these barriers be pulled outside of the time call? */ 122 123 for (j=0; j<N; j++) c[j] = a[j]; 124 times[0][k] = second() - times[0][k]; 125 126 times[1][k] = second(); 127 128 for (j=0; j<N; j++) b[j] = scalar*c[j]; 129 times[1][k] = second() - times[1][k]; 130 131 times[2][k] = second(); 132 for (j=0; j<N; j++) c[j] = a[j]+b[j]; 133 times[2][k] = second() - times[2][k]; 134 135 times[3][k] = second(); 136 for (j=0; j<N; j++) a[j] = b[j]+scalar*c[j]; 137 times[3][k] = second() - times[3][k]; 138 } 139 140 /* --- SUMMARY --- */ 141 142 for (k=0; k<NTIMES; k++) 143 for (j=0; j<4; j++) mintime[j] = MIN(mintime[j], times[j][k]); 144 145 for (j=0; j<4; j++) irate[j] = 1.0E-06 * bytes[j]/mintime[j]; 146 147 printf("Function Rate (MB/s) \n"); 148 for (j=0; j<4; j++) printf("%s%11.4f\n", label[j],irate[j]); 149 return 0; 150 } 151 152 # define M 20 153 154 int checktick(void) 155 { 156 int i, minDelta, Delta; 157 double t1, t2, timesfound[M]; 158 159 /* Collect a sequence of M unique time values from the system. */ 160 161 for (i = 0; i < M; i++) { 162 t1 = second(); 163 while (((t2=second()) - t1) < 1.0E-6) ; 164 timesfound[i] = t1 = t2; 165 } 166 167 /* 168 Determine the minimum difference between these M values. 169 This result will be our estimate (in microseconds) for the 170 clock granularity. 171 */ 172 173 minDelta = 1000000; 174 for (i = 1; i < M; i++) { 175 Delta = (int)(1.0E6 * (timesfound[i]-timesfound[i-1])); 176 minDelta = MIN(minDelta, MAX(Delta,0)); 177 } 178 179 return minDelta; 180 } 181 182