1 #include <sys/time.h> 2 /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */ 3 4 double second() 5 { 6 /* struct timeval { long tv_sec; 7 long tv_usec; }; 8 9 struct timezone { int tz_minuteswest; 10 int tz_dsttime; }; */ 11 12 struct timeval tp; 13 struct timezone tzp; 14 int i; 15 16 i = gettimeofday(&tp,&tzp); 17 return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6); 18 } 19 # include <stdio.h> 20 # include <math.h> 21 # include <limits.h> 22 # include <float.h> 23 # include <sys/time.h> 24 25 /* 26 Program: Stream 27 Programmer: Joe R. Zagar 28 Revision: 4.0-BETA, October 24, 1995 29 Original code developed by John D. McCalpin 30 31 This program measures memory transfer rates in MB/s for simple 32 computational kernels coded in C. These numbers reveal the quality 33 of code generation for simple uncacheable kernels as well as showing 34 the cost of floating-point operations relative to memory accesses. 35 36 INSTRUCTIONS: 37 38 1) Stream requires a good bit of memory to run. Adjust the 39 value of 'N' (below) to give a 'timing calibration' of 40 at least 20 clock-ticks. This will provide rate estimates 41 that should be good to about 5% precision. 42 */ 43 44 # define N 200000 45 # define NTIMES 50 46 # define OFFSET 0 47 48 /* 49 3) Compile the code with full optimization. Many compilers 50 generate unreasonably bad code before the optimizer tightens 51 things up. If the results are unreasonably good, on the 52 other hand, the optimizer might be too smart for me! 53 54 Try compiling with: 55 cc -O stream_d.c second.c -o stream_d -lm 56 57 This is known to work on Cray, SGI, IBM, and Sun machines. 58 59 4) Mail the results to mccalpin@cs.virginia.edu 60 Be sure to include: 61 a) computer hardware model number and software revision 62 b) the compiler flags 63 c) all of the output from the test case. 64 Thanks! 65 66 */ 67 68 # define HLINE "-------------------------------------------------------------\n" 69 70 # ifndef MIN 71 # define MIN(x,y) ((x)<(y) ? (x) : (y)) 72 # endif 73 # ifndef MAX 74 # define MAX(x,y) ((x)>(y) ? (x) : (y)) 75 # endif 76 77 static double a[N+OFFSET], 78 b[N+OFFSET], 79 c[N+OFFSET]; 80 /*double *a,*b,*c;*/ 81 82 static double mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; 83 84 static const char *label[4] = {"Copy: ", "Scale: ", "Add: ", "Triad: "}; 85 86 static double bytes[4] = { 87 2 * sizeof(double) * N, 88 2 * sizeof(double) * N, 89 3 * sizeof(double) * N, 90 3 * sizeof(double) * N 91 }; 92 93 extern double second(); 94 95 int main(int argc,char **args) 96 { 97 int checktick(void); 98 register int j, k; 99 double scalar, t, times[4][NTIMES],irate[4]; 100 101 /* --- SETUP --- determine precision and check timing --- */ 102 103 for (j=0; j<N; j++) { 104 a[j] = 1.0; 105 b[j] = 2.0; 106 c[j] = 0.0; 107 } 108 109 t = second(); 110 for (j = 0; j < N; j++) a[j] = 2.0E0 * a[j]; 111 t = 1.0E6 * (second() - t); 112 113 /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 114 115 scalar = 3.0; 116 for (k=0; k<NTIMES; k++) 117 { 118 119 times[0][k] = second(); 120 /* should all these barriers be pulled outside of the time call? */ 121 122 for (j=0; j<N; j++) c[j] = a[j]; 123 times[0][k] = second() - times[0][k]; 124 125 times[1][k] = second(); 126 127 for (j=0; j<N; j++) b[j] = scalar*c[j]; 128 times[1][k] = second() - times[1][k]; 129 130 times[2][k] = second(); 131 for (j=0; j<N; j++) c[j] = a[j]+b[j]; 132 times[2][k] = second() - times[2][k]; 133 134 times[3][k] = second(); 135 for (j=0; j<N; j++) a[j] = b[j]+scalar*c[j]; 136 times[3][k] = second() - times[3][k]; 137 } 138 139 /* --- SUMMARY --- */ 140 141 for (k=0; k<NTIMES; k++) 142 for (j=0; j<4; j++) mintime[j] = MIN(mintime[j], times[j][k]); 143 144 for (j=0; j<4; j++) irate[j] = 1.0E-06 * bytes[j]/mintime[j]; 145 146 printf("Function Rate (MB/s) \n"); 147 for (j=0; j<4; j++) printf("%s%11.4f\n", label[j],irate[j]); 148 return 0; 149 } 150 151 # define M 20 152 153 int checktick(void) 154 { 155 int i, minDelta, Delta; 156 double t1, t2, timesfound[M]; 157 158 /* Collect a sequence of M unique time values from the system. */ 159 160 for (i = 0; i < M; i++) { 161 t1 = second(); 162 while (((t2=second()) - t1) < 1.0E-6) ; 163 timesfound[i] = t1 = t2; 164 } 165 166 /* 167 Determine the minimum difference between these M values. 168 This result will be our estimate (in microseconds) for the 169 clock granularity. 170 */ 171 172 minDelta = 1000000; 173 for (i = 1; i < M; i++) { 174 Delta = (int)(1.0E6 * (timesfound[i]-timesfound[i-1])); 175 minDelta = MIN(minDelta, MAX(Delta,0)); 176 } 177 178 return minDelta; 179 } 180