1 2 #include <sys/time.h> 3 /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */ 4 5 double second() 6 { 7 /* struct timeval { long tv_sec; 8 long tv_usec; }; 9 10 struct timezone { int tz_minuteswest; 11 int tz_dsttime; }; */ 12 13 struct timeval tp; 14 struct timezone tzp; 15 int i; 16 17 i = gettimeofday(&tp,&tzp); 18 return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6); 19 } 20 # include <stdio.h> 21 # include <math.h> 22 # include <limits.h> 23 # include <float.h> 24 # include <sys/time.h> 25 26 /* 27 Program: Stream 28 Programmer: Joe R. Zagar 29 Revision: 4.0-BETA, October 24, 1995 30 Original code developed by John D. McCalpin 31 32 This program measures memory transfer rates in MB/s for simple 33 computational kernels coded in C. These numbers reveal the quality 34 of code generation for simple uncacheable kernels as well as showing 35 the cost of floating-point operations relative to memory accesses. 36 37 INSTRUCTIONS: 38 39 1) Stream requires a good bit of memory to run. Adjust the 40 value of 'N' (below) to give a 'timing calibration' of 41 at least 20 clock-ticks. This will provide rate estimates 42 that should be good to about 5% precision. 43 */ 44 45 # define N 200000 46 # define NTIMES 50 47 # define OFFSET 0 48 49 /* 50 3) Compile the code with full optimization. Many compilers 51 generate unreasonably bad code before the optimizer tightens 52 things up. If the results are unreasonably good, on the 53 other hand, the optimizer might be too smart for me! 54 55 Try compiling with: 56 cc -O stream_d.c second.c -o stream_d -lm 57 58 This is known to work on Cray, SGI, IBM, and Sun machines. 59 60 61 4) Mail the results to mccalpin@cs.virginia.edu 62 Be sure to include: 63 a) computer hardware model number and software revision 64 b) the compiler flags 65 c) all of the output from the test case. 66 Thanks! 67 68 */ 69 70 # define HLINE "-------------------------------------------------------------\n" 71 72 # ifndef MIN 73 # define MIN(x,y) ((x)<(y) ? (x) : (y)) 74 # endif 75 # ifndef MAX 76 # define MAX(x,y) ((x)>(y) ? (x) : (y)) 77 # endif 78 79 static double a[N+OFFSET], 80 b[N+OFFSET], 81 c[N+OFFSET]; 82 /*double *a,*b,*c;*/ 83 84 static double mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; 85 86 static const char *label[4] = {"Copy: ", "Scale: ", "Add: ", "Triad: "}; 87 88 static double bytes[4] = { 89 2 * sizeof(double) * N, 90 2 * sizeof(double) * N, 91 3 * sizeof(double) * N, 92 3 * sizeof(double) * N 93 }; 94 95 extern double second(); 96 97 int main(int argc,char **args) 98 { 99 int checktick(void); 100 register int j, k; 101 double scalar, t, times[4][NTIMES],irate[4]; 102 103 /* --- SETUP --- determine precision and check timing --- */ 104 105 for (j=0; j<N; j++) { 106 a[j] = 1.0; 107 b[j] = 2.0; 108 c[j] = 0.0; 109 } 110 111 t = second(); 112 for (j = 0; j < N; j++) a[j] = 2.0E0 * a[j]; 113 t = 1.0E6 * (second() - t); 114 115 /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 116 117 scalar = 3.0; 118 for (k=0; k<NTIMES; k++) 119 { 120 121 times[0][k] = second(); 122 /* should all these barriers be pulled outside of the time call? */ 123 124 for (j=0; j<N; j++) c[j] = a[j]; 125 times[0][k] = second() - times[0][k]; 126 127 times[1][k] = second(); 128 129 for (j=0; j<N; j++) b[j] = scalar*c[j]; 130 times[1][k] = second() - times[1][k]; 131 132 times[2][k] = second(); 133 for (j=0; j<N; j++) c[j] = a[j]+b[j]; 134 times[2][k] = second() - times[2][k]; 135 136 times[3][k] = second(); 137 for (j=0; j<N; j++) a[j] = b[j]+scalar*c[j]; 138 times[3][k] = second() - times[3][k]; 139 } 140 141 /* --- SUMMARY --- */ 142 143 for (k=0; k<NTIMES; k++) 144 for (j=0; j<4; j++) mintime[j] = MIN(mintime[j], times[j][k]); 145 146 for (j=0; j<4; j++) irate[j] = 1.0E-06 * bytes[j]/mintime[j]; 147 148 printf("Function Rate (MB/s) \n"); 149 for (j=0; j<4; j++) printf("%s%11.4f\n", label[j],irate[j]); 150 return 0; 151 } 152 153 # define M 20 154 155 int checktick(void) 156 { 157 int i, minDelta, Delta; 158 double t1, t2, timesfound[M]; 159 160 /* Collect a sequence of M unique time values from the system. */ 161 162 for (i = 0; i < M; i++) { 163 t1 = second(); 164 while (((t2=second()) - t1) < 1.0E-6) ; 165 timesfound[i] = t1 = t2; 166 } 167 168 /* 169 Determine the minimum difference between these M values. 170 This result will be our estimate (in microseconds) for the 171 clock granularity. 172 */ 173 174 minDelta = 1000000; 175 for (i = 1; i < M; i++) { 176 Delta = (int)(1.0E6 * (timesfound[i]-timesfound[i-1])); 177 minDelta = MIN(minDelta, MAX(Delta,0)); 178 } 179 180 return(minDelta); 181 } 182 183