/* A simplification of the Stream benchmark for OpenMP The array for each thread is a large distance from the array for the other threads Original code developed by John D. McCalpin */ #include #include #include #include #include #include #include #include #define NTIMESINNER 1 #define N 2*4*20000000 //#define N 1200000 //#define N 120000 #define NTIMES 50 #define OFFSET 0 # if !defined(MIN) # define MIN(x,y) ((x)<(y) ? (x) : (y)) # endif # if !defined(MAX) # define MAX(x,y) ((x)>(y) ? (x) : (y)) # endif static double a[64][10000000],b[64][10000000],c[64][10000000]; static double mintime = FLT_MAX; static double bytes = 3 * sizeof(double) * N; int main() { const static double scalar = 3.0; #pragma omp threadprivate(scalar) double times[NTIMES],rate; int size; static int n; #pragma omp threadprivate(n) char *env; FILE *fd; env = getenv("OMP_NUM_THREADS"); if (!env) env = (char *) "1"; sscanf(env,"%d",&size); #pragma omp parallel for schedule(static) for (int j=0; j omp_get_thread_num())); for (int i=0; i omp_get_thread_num())); double *aa = a[j]; // these don't change the timings const double *bb = b[j]; const double *cc = c[j]; for (int l=0; l