1 #include <sys/time.h> 2 /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */ 3 4 double second() 5 { 6 /* struct timeval { long tv_sec; 7 long tv_usec; }; 8 9 struct timezone { int tz_minuteswest; 10 int tz_dsttime; }; */ 11 12 struct timeval tp; 13 struct timezone tzp; 14 int i; 15 16 i = gettimeofday(&tp, &tzp); 17 return ((double)tp.tv_sec + (double)tp.tv_usec * 1.e-6); 18 } 19 #include <stdio.h> 20 #include <math.h> 21 #include <limits.h> 22 #include <float.h> 23 #include <sys/time.h> 24 25 /* 26 Program: Stream 27 Programmer: Joe R. Zagar 28 Revision: 4.0-BETA, October 24, 1995 29 Original code developed by John D. McCalpin 30 31 This program measures memory transfer rates in MB/s for simple 32 computational kernels coded in C. These numbers reveal the quality 33 of code generation for simple uncacheable kernels as well as showing 34 the cost of floating-point operations relative to memory accesses. 35 36 INSTRUCTIONS: 37 38 1) Stream requires a good bit of memory to run. Adjust the 39 value of 'N' (below) to give a 'timing calibration' of 40 at least 20 clock-ticks. This will provide rate estimates 41 that should be good to about 5% precision. 42 */ 43 44 #define N 200000 45 #define NTIMES 50 46 #define OFFSET 0 47 48 /* 49 3) Compile the code with full optimization. Many compilers 50 generate unreasonably bad code before the optimizer tightens 51 things up. If the results are unreasonably good, on the 52 other hand, the optimizer might be too smart for me! 53 54 Try compiling with: 55 cc -O stream_d.c second.c -o stream_d -lm 56 57 This is known to work on Cray, SGI, IBM, and Sun machines. 58 59 4) Mail the results to mccalpin@cs.virginia.edu 60 Be sure to include: 61 a) computer hardware model number and software revision 62 b) the compiler flags 63 c) all of the output from the test case. 64 Thanks! 65 66 */ 67 68 #define HLINE "-------------------------------------------------------------\n" 69 70 #ifndef MIN 71 #define MIN(x, y) ((x) < (y) ? (x) : (y)) 72 #endif 73 #ifndef MAX 74 #define MAX(x, y) ((x) > (y) ? (x) : (y)) 75 #endif 76 77 static double a[N + OFFSET], b[N + OFFSET], c[N + OFFSET]; 78 /*double *a,*b,*c;*/ 79 80 static double mintime[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; 81 82 static const char *label[4] = {"Copy: ", "Scale: ", "Add: ", "Triad: "}; 83 84 static double bytes[4] = {2 * sizeof(double) * N, 2 * sizeof(double) * N, 3 * sizeof(double) * N, 3 * sizeof(double) * N}; 85 86 extern double second(); 87 88 int main(int argc, char **args) 89 { 90 int checktick(void); 91 register int j, k; 92 double scalar, t, times[4][NTIMES], irate[4]; 93 94 /* --- SETUP --- determine precision and check timing --- */ 95 96 for (j = 0; j < N; j++) { 97 a[j] = 1.0; 98 b[j] = 2.0; 99 c[j] = 0.0; 100 } 101 102 t = second(); 103 for (j = 0; j < N; j++) a[j] = 2.0E0 * a[j]; 104 t = 1.0E6 * (second() - t); 105 106 /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 107 108 scalar = 3.0; 109 for (k = 0; k < NTIMES; k++) { 110 times[0][k] = second(); 111 /* should all these barriers be pulled outside of the time call? */ 112 113 for (j = 0; j < N; j++) c[j] = a[j]; 114 times[0][k] = second() - times[0][k]; 115 116 times[1][k] = second(); 117 118 for (j = 0; j < N; j++) b[j] = scalar * c[j]; 119 times[1][k] = second() - times[1][k]; 120 121 times[2][k] = second(); 122 for (j = 0; j < N; j++) c[j] = a[j] + b[j]; 123 times[2][k] = second() - times[2][k]; 124 125 times[3][k] = second(); 126 for (j = 0; j < N; j++) a[j] = b[j] + scalar * c[j]; 127 times[3][k] = second() - times[3][k]; 128 } 129 130 /* --- SUMMARY --- */ 131 132 for (k = 0; k < NTIMES; k++) 133 for (j = 0; j < 4; j++) mintime[j] = MIN(mintime[j], times[j][k]); 134 135 for (j = 0; j < 4; j++) irate[j] = 1.0E-06 * bytes[j] / mintime[j]; 136 137 printf("Function Rate (MB/s) \n"); 138 for (j = 0; j < 4; j++) printf("%s%11.4f\n", label[j], irate[j]); 139 return 0; 140 } 141 142 #define M 20 143 144 int checktick(void) 145 { 146 int i, minDelta, Delta; 147 double t1, t2, timesfound[M]; 148 149 /* Collect a sequence of M unique time values from the system. */ 150 151 for (i = 0; i < M; i++) { 152 t1 = second(); 153 while (((t2 = second()) - t1) < 1.0E-6); 154 timesfound[i] = t1 = t2; 155 } 156 157 /* 158 Determine the minimum difference between these M values. 159 This result will be our estimate (in microseconds) for the 160 clock granularity. 161 */ 162 163 minDelta = 1000000; 164 for (i = 1; i < M; i++) { 165 Delta = (int)(1.0E6 * (timesfound[i] - timesfound[i - 1])); 166 minDelta = MIN(minDelta, MAX(Delta, 0)); 167 } 168 169 return minDelta; 170 } 171