#include /* int gettimeofday(struct timeval *tp, struct timezone *tzp); */ double second() { /* struct timeval { long tv_sec; long tv_usec; }; struct timezone { int tz_minuteswest; int tz_dsttime; }; */ struct timeval tp; struct timezone tzp; int i; i = gettimeofday(&tp,&tzp); return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6); } # include # include # include # include # include /* * Program: Stream * Programmer: Joe R. Zagar * Revision: 4.0-BETA, October 24, 1995 * Original code developed by John D. McCalpin * * This program measures memory transfer rates in MB/s for simple * computational kernels coded in C. These numbers reveal the quality * of code generation for simple uncacheable kernels as well as showing * the cost of floating-point operations relative to memory accesses. * * INSTRUCTIONS: * * 1) Stream requires a good bit of memory to run. Adjust the * value of 'N' (below) to give a 'timing calibration' of * at least 20 clock-ticks. This will provide rate estimates * that should be good to about 5% precision. */ # define N 200000 # define NTIMES 50 # define OFFSET 0 /* * 3) Compile the code with full optimization. Many compilers * generate unreasonably bad code before the optimizer tightens * things up. If the results are unreasonably good, on the * other hand, the optimizer might be too smart for me! * * Try compiling with: * cc -O stream_d.c second.c -o stream_d -lm * * This is known to work on Cray, SGI, IBM, and Sun machines. * * * 4) Mail the results to mccalpin@cs.virginia.edu * Be sure to include: * a) computer hardware model number and software revision * b) the compiler flags * c) all of the output from the test case. * Thanks! * */ # define HLINE "-------------------------------------------------------------\n" # ifndef MIN # define MIN(x,y) ((x)<(y) ? (x) : (y)) # endif # ifndef MAX # define MAX(x,y) ((x)>(y) ? (x) : (y)) # endif static double a[N+OFFSET], b[N+OFFSET], c[N+OFFSET]; /*double *a,*b,*c;*/ static double mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; static const char *label[4] = {"Copy: ", "Scale: ", "Add: ", "Triad: "}; static double bytes[4] = { 2 * sizeof(double) * N, 2 * sizeof(double) * N, 3 * sizeof(double) * N, 3 * sizeof(double) * N }; extern double second(); int main(int argc,char **args) { int checktick(void); register int j, k; double scalar, t, times[4][NTIMES],irate[4]; /* --- SETUP --- determine precision and check timing --- */ for (j=0; j