1*5d28107eSBarry Smith /*-----------------------------------------------------------------------*/ 2*5d28107eSBarry Smith /* Program: Stream */ 3*5d28107eSBarry Smith /* Revision: $Id: stream.c,v 5.9 2009/04/11 16:35:00 mccalpin Exp mccalpin $ */ 4*5d28107eSBarry Smith /* Original code developed by John D. McCalpin */ 5*5d28107eSBarry Smith /* Programmers: John D. McCalpin */ 6*5d28107eSBarry Smith /* Joe R. Zagar */ 7*5d28107eSBarry Smith /* */ 8*5d28107eSBarry Smith /* This program measures memory transfer rates in MB/s for simple */ 9*5d28107eSBarry Smith /* computational kernels coded in C. */ 10*5d28107eSBarry Smith /*-----------------------------------------------------------------------*/ 11*5d28107eSBarry Smith /* Copyright 1991-2005: John D. McCalpin */ 12*5d28107eSBarry Smith /*-----------------------------------------------------------------------*/ 13*5d28107eSBarry Smith /* License: */ 14*5d28107eSBarry Smith /* 1. You are free to use this program and/or to redistribute */ 15*5d28107eSBarry Smith /* this program. */ 16*5d28107eSBarry Smith /* 2. You are free to modify this program for your own use, */ 17*5d28107eSBarry Smith /* including commercial use, subject to the publication */ 18*5d28107eSBarry Smith /* restrictions in item 3. */ 19*5d28107eSBarry Smith /* 3. You are free to publish results obtained from running this */ 20*5d28107eSBarry Smith /* program, or from works that you derive from this program, */ 21*5d28107eSBarry Smith /* with the following limitations: */ 22*5d28107eSBarry Smith /* 3a. In order to be referred to as "STREAM benchmark results", */ 23*5d28107eSBarry Smith /* published results must be in conformance to the STREAM */ 24*5d28107eSBarry Smith /* Run Rules, (briefly reviewed below) published at */ 25*5d28107eSBarry Smith /* http://www.cs.virginia.edu/stream/ref.html */ 26*5d28107eSBarry Smith /* and incorporated herein by reference. */ 27*5d28107eSBarry Smith /* As the copyright holder, John McCalpin retains the */ 28*5d28107eSBarry Smith /* right to determine conformity with the Run Rules. */ 29*5d28107eSBarry Smith /* 3b. Results based on modified source code or on runs not in */ 30*5d28107eSBarry Smith /* accordance with the STREAM Run Rules must be clearly */ 31*5d28107eSBarry Smith /* labelled whenever they are published. Examples of */ 32*5d28107eSBarry Smith /* proper labelling include: */ 33*5d28107eSBarry Smith /* "tuned STREAM benchmark results" */ 34*5d28107eSBarry Smith /* "based on a variant of the STREAM benchmark code" */ 35*5d28107eSBarry Smith /* Other comparable, clear and reasonable labelling is */ 36*5d28107eSBarry Smith /* acceptable. */ 37*5d28107eSBarry Smith /* 3c. Submission of results to the STREAM benchmark web site */ 38*5d28107eSBarry Smith /* is encouraged, but not required. */ 39*5d28107eSBarry Smith /* 4. Use of this program or creation of derived works based on this */ 40*5d28107eSBarry Smith /* program constitutes acceptance of these licensing restrictions. */ 41*5d28107eSBarry Smith /* 5. Absolutely no warranty is expressed or implied. */ 42*5d28107eSBarry Smith /*-----------------------------------------------------------------------*/ 43*5d28107eSBarry Smith # include <stdio.h> 44*5d28107eSBarry Smith # include <math.h> 45*5d28107eSBarry Smith # include <float.h> 46*5d28107eSBarry Smith # include <limits.h> 47*5d28107eSBarry Smith # include <sys/time.h> 48*5d28107eSBarry Smith 49*5d28107eSBarry Smith /* INSTRUCTIONS: 50*5d28107eSBarry Smith * 51*5d28107eSBarry Smith * 1) Stream requires a good bit of memory to run. Adjust the 52*5d28107eSBarry Smith * value of 'N' (below) to give a 'timing calibration' of 53*5d28107eSBarry Smith * at least 20 clock-ticks. This will provide rate estimates 54*5d28107eSBarry Smith * that should be good to about 5% precision. 55*5d28107eSBarry Smith */ 56*5d28107eSBarry Smith 57*5d28107eSBarry Smith #ifndef N 58*5d28107eSBarry Smith # define N 2000000 59*5d28107eSBarry Smith #endif 60*5d28107eSBarry Smith #ifndef NTIMES 61*5d28107eSBarry Smith # define NTIMES 10 62*5d28107eSBarry Smith #endif 63*5d28107eSBarry Smith #ifndef OFFSET 64*5d28107eSBarry Smith # define OFFSET 0 65*5d28107eSBarry Smith #endif 66*5d28107eSBarry Smith 67*5d28107eSBarry Smith /* 68*5d28107eSBarry Smith * 3) Compile the code with full optimization. Many compilers 69*5d28107eSBarry Smith * generate unreasonably bad code before the optimizer tightens 70*5d28107eSBarry Smith * things up. If the results are unreasonably good, on the 71*5d28107eSBarry Smith * other hand, the optimizer might be too smart for me! 72*5d28107eSBarry Smith * 73*5d28107eSBarry Smith * Try compiling with: 74*5d28107eSBarry Smith * cc -O stream_omp.c -o stream_omp 75*5d28107eSBarry Smith * 76*5d28107eSBarry Smith * This is known to work on Cray, SGI, IBM, and Sun machines. 77*5d28107eSBarry Smith * 78*5d28107eSBarry Smith * 79*5d28107eSBarry Smith * 4) Mail the results to mccalpin@cs.virginia.edu 80*5d28107eSBarry Smith * Be sure to include: 81*5d28107eSBarry Smith * a) computer hardware model number and software revision 82*5d28107eSBarry Smith * b) the compiler flags 83*5d28107eSBarry Smith * c) all of the output from the test case. 84*5d28107eSBarry Smith * Thanks! 85*5d28107eSBarry Smith * 86*5d28107eSBarry Smith */ 87*5d28107eSBarry Smith 88*5d28107eSBarry Smith # define HLINE "-------------------------------------------------------------\n" 89*5d28107eSBarry Smith 90*5d28107eSBarry Smith # ifndef MIN 91*5d28107eSBarry Smith # define MIN(x,y) ((x)<(y)?(x):(y)) 92*5d28107eSBarry Smith # endif 93*5d28107eSBarry Smith # ifndef MAX 94*5d28107eSBarry Smith # define MAX(x,y) ((x)>(y)?(x):(y)) 95*5d28107eSBarry Smith # endif 96*5d28107eSBarry Smith 97*5d28107eSBarry Smith static double a[N+OFFSET], 98*5d28107eSBarry Smith b[N+OFFSET], 99*5d28107eSBarry Smith c[N+OFFSET]; 100*5d28107eSBarry Smith 101*5d28107eSBarry Smith static double avgtime[4] = {0}, maxtime[4] = {0}, 102*5d28107eSBarry Smith mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; 103*5d28107eSBarry Smith 104*5d28107eSBarry Smith static char *label[4] = {"Copy: ", "Scale: ", 105*5d28107eSBarry Smith "Add: ", "Triad: "}; 106*5d28107eSBarry Smith 107*5d28107eSBarry Smith static double bytes[4] = { 108*5d28107eSBarry Smith 2 * sizeof(double) * N, 109*5d28107eSBarry Smith 2 * sizeof(double) * N, 110*5d28107eSBarry Smith 3 * sizeof(double) * N, 111*5d28107eSBarry Smith 3 * sizeof(double) * N 112*5d28107eSBarry Smith }; 113*5d28107eSBarry Smith 114*5d28107eSBarry Smith extern double mysecond(); 115*5d28107eSBarry Smith extern void checkSTREAMresults(); 116*5d28107eSBarry Smith #ifdef TUNED 117*5d28107eSBarry Smith extern void tuned_STREAM_Copy(); 118*5d28107eSBarry Smith extern void tuned_STREAM_Scale(double scalar); 119*5d28107eSBarry Smith extern void tuned_STREAM_Add(); 120*5d28107eSBarry Smith extern void tuned_STREAM_Triad(double scalar); 121*5d28107eSBarry Smith #endif 122*5d28107eSBarry Smith #ifdef _OPENMP 123*5d28107eSBarry Smith extern int omp_get_num_threads(); 124*5d28107eSBarry Smith #endif 125*5d28107eSBarry Smith int 126*5d28107eSBarry Smith main() 127*5d28107eSBarry Smith { 128*5d28107eSBarry Smith int quantum, checktick(); 129*5d28107eSBarry Smith int BytesPerWord; 130*5d28107eSBarry Smith register int j, k; 131*5d28107eSBarry Smith double scalar, t, times[4][NTIMES]; 132*5d28107eSBarry Smith 133*5d28107eSBarry Smith /* --- SETUP --- determine precision and check timing --- */ 134*5d28107eSBarry Smith 135*5d28107eSBarry Smith printf(HLINE); 136*5d28107eSBarry Smith printf("STREAM version $Revision: 5.9 $\n"); 137*5d28107eSBarry Smith printf(HLINE); 138*5d28107eSBarry Smith BytesPerWord = sizeof(double); 139*5d28107eSBarry Smith printf("This system uses %d bytes per DOUBLE PRECISION word.\n", 140*5d28107eSBarry Smith BytesPerWord); 141*5d28107eSBarry Smith 142*5d28107eSBarry Smith printf(HLINE); 143*5d28107eSBarry Smith #ifdef NO_LONG_LONG 144*5d28107eSBarry Smith printf("Array size = %d, Offset = %d\n" , N, OFFSET); 145*5d28107eSBarry Smith #else 146*5d28107eSBarry Smith printf("Array size = %llu, Offset = %d\n", (unsigned long long) N, OFFSET); 147*5d28107eSBarry Smith #endif 148*5d28107eSBarry Smith 149*5d28107eSBarry Smith printf("Total memory required = %.1f MB.\n", 150*5d28107eSBarry Smith (3.0 * BytesPerWord) * ( (double) N / 1048576.0)); 151*5d28107eSBarry Smith printf("Each test is run %d times, but only\n", NTIMES); 152*5d28107eSBarry Smith printf("the *best* time for each is used.\n"); 153*5d28107eSBarry Smith 154*5d28107eSBarry Smith #ifdef _OPENMP 155*5d28107eSBarry Smith printf(HLINE); 156*5d28107eSBarry Smith #pragma omp parallel 157*5d28107eSBarry Smith { 158*5d28107eSBarry Smith #pragma omp master 159*5d28107eSBarry Smith { 160*5d28107eSBarry Smith k = omp_get_num_threads(); 161*5d28107eSBarry Smith printf ("Number of Threads requested = %i\n",k); 162*5d28107eSBarry Smith } 163*5d28107eSBarry Smith } 164*5d28107eSBarry Smith #endif 165*5d28107eSBarry Smith 166*5d28107eSBarry Smith printf(HLINE); 167*5d28107eSBarry Smith #pragma omp parallel 168*5d28107eSBarry Smith { 169*5d28107eSBarry Smith printf ("Printing one line per active thread....\n"); 170*5d28107eSBarry Smith } 171*5d28107eSBarry Smith 172*5d28107eSBarry Smith /* Get initial value for system clock. */ 173*5d28107eSBarry Smith #pragma omp parallel for 174*5d28107eSBarry Smith for (j=0; j<N; j++) { 175*5d28107eSBarry Smith a[j] = 1.0; 176*5d28107eSBarry Smith b[j] = 2.0; 177*5d28107eSBarry Smith c[j] = 0.0; 178*5d28107eSBarry Smith } 179*5d28107eSBarry Smith 180*5d28107eSBarry Smith printf(HLINE); 181*5d28107eSBarry Smith 182*5d28107eSBarry Smith if ( (quantum = checktick()) >= 1) 183*5d28107eSBarry Smith printf("Your clock granularity/precision appears to be " 184*5d28107eSBarry Smith "%d microseconds.\n", quantum); 185*5d28107eSBarry Smith else { 186*5d28107eSBarry Smith printf("Your clock granularity appears to be " 187*5d28107eSBarry Smith "less than one microsecond.\n"); 188*5d28107eSBarry Smith quantum = 1; 189*5d28107eSBarry Smith } 190*5d28107eSBarry Smith 191*5d28107eSBarry Smith t = mysecond(); 192*5d28107eSBarry Smith #pragma omp parallel for 193*5d28107eSBarry Smith for (j = 0; j < N; j++) 194*5d28107eSBarry Smith a[j] = 2.0E0 * a[j]; 195*5d28107eSBarry Smith t = 1.0E6 * (mysecond() - t); 196*5d28107eSBarry Smith 197*5d28107eSBarry Smith printf("Each test below will take on the order" 198*5d28107eSBarry Smith " of %d microseconds.\n", (int) t ); 199*5d28107eSBarry Smith printf(" (= %d clock ticks)\n", (int) (t/quantum) ); 200*5d28107eSBarry Smith printf("Increase the size of the arrays if this shows that\n"); 201*5d28107eSBarry Smith printf("you are not getting at least 20 clock ticks per test.\n"); 202*5d28107eSBarry Smith 203*5d28107eSBarry Smith printf(HLINE); 204*5d28107eSBarry Smith 205*5d28107eSBarry Smith printf("WARNING -- The above is only a rough guideline.\n"); 206*5d28107eSBarry Smith printf("For best results, please be sure you know the\n"); 207*5d28107eSBarry Smith printf("precision of your system timer.\n"); 208*5d28107eSBarry Smith printf(HLINE); 209*5d28107eSBarry Smith 210*5d28107eSBarry Smith /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 211*5d28107eSBarry Smith 212*5d28107eSBarry Smith scalar = 3.0; 213*5d28107eSBarry Smith for (k=0; k<NTIMES; k++) 214*5d28107eSBarry Smith { 215*5d28107eSBarry Smith times[0][k] = mysecond(); 216*5d28107eSBarry Smith #ifdef TUNED 217*5d28107eSBarry Smith tuned_STREAM_Copy(); 218*5d28107eSBarry Smith #else 219*5d28107eSBarry Smith #pragma omp parallel for 220*5d28107eSBarry Smith for (j=0; j<N; j++) 221*5d28107eSBarry Smith c[j] = a[j]; 222*5d28107eSBarry Smith #endif 223*5d28107eSBarry Smith times[0][k] = mysecond() - times[0][k]; 224*5d28107eSBarry Smith 225*5d28107eSBarry Smith times[1][k] = mysecond(); 226*5d28107eSBarry Smith #ifdef TUNED 227*5d28107eSBarry Smith tuned_STREAM_Scale(scalar); 228*5d28107eSBarry Smith #else 229*5d28107eSBarry Smith #pragma omp parallel for 230*5d28107eSBarry Smith for (j=0; j<N; j++) 231*5d28107eSBarry Smith b[j] = scalar*c[j]; 232*5d28107eSBarry Smith #endif 233*5d28107eSBarry Smith times[1][k] = mysecond() - times[1][k]; 234*5d28107eSBarry Smith 235*5d28107eSBarry Smith times[2][k] = mysecond(); 236*5d28107eSBarry Smith #ifdef TUNED 237*5d28107eSBarry Smith tuned_STREAM_Add(); 238*5d28107eSBarry Smith #else 239*5d28107eSBarry Smith #pragma omp parallel for 240*5d28107eSBarry Smith for (j=0; j<N; j++) 241*5d28107eSBarry Smith c[j] = a[j]+b[j]; 242*5d28107eSBarry Smith #endif 243*5d28107eSBarry Smith times[2][k] = mysecond() - times[2][k]; 244*5d28107eSBarry Smith 245*5d28107eSBarry Smith times[3][k] = mysecond(); 246*5d28107eSBarry Smith #ifdef TUNED 247*5d28107eSBarry Smith tuned_STREAM_Triad(scalar); 248*5d28107eSBarry Smith #else 249*5d28107eSBarry Smith #pragma omp parallel for 250*5d28107eSBarry Smith for (j=0; j<N; j++) 251*5d28107eSBarry Smith a[j] = b[j]+scalar*c[j]; 252*5d28107eSBarry Smith #endif 253*5d28107eSBarry Smith times[3][k] = mysecond() - times[3][k]; 254*5d28107eSBarry Smith } 255*5d28107eSBarry Smith 256*5d28107eSBarry Smith /* --- SUMMARY --- */ 257*5d28107eSBarry Smith 258*5d28107eSBarry Smith for (k=1; k<NTIMES; k++) /* note -- skip first iteration */ 259*5d28107eSBarry Smith { 260*5d28107eSBarry Smith for (j=0; j<4; j++) 261*5d28107eSBarry Smith { 262*5d28107eSBarry Smith avgtime[j] = avgtime[j] + times[j][k]; 263*5d28107eSBarry Smith mintime[j] = MIN(mintime[j], times[j][k]); 264*5d28107eSBarry Smith maxtime[j] = MAX(maxtime[j], times[j][k]); 265*5d28107eSBarry Smith } 266*5d28107eSBarry Smith } 267*5d28107eSBarry Smith 268*5d28107eSBarry Smith printf("Function Rate (MB/s) Avg time Min time Max time\n"); 269*5d28107eSBarry Smith for (j=0; j<4; j++) { 270*5d28107eSBarry Smith avgtime[j] = avgtime[j]/(double)(NTIMES-1); 271*5d28107eSBarry Smith 272*5d28107eSBarry Smith printf("%s%11.4f %11.4f %11.4f %11.4f\n", label[j], 273*5d28107eSBarry Smith 1.0E-06 * bytes[j]/mintime[j], 274*5d28107eSBarry Smith avgtime[j], 275*5d28107eSBarry Smith mintime[j], 276*5d28107eSBarry Smith maxtime[j]); 277*5d28107eSBarry Smith } 278*5d28107eSBarry Smith printf(HLINE); 279*5d28107eSBarry Smith 280*5d28107eSBarry Smith /* --- Check Results --- */ 281*5d28107eSBarry Smith checkSTREAMresults(); 282*5d28107eSBarry Smith printf(HLINE); 283*5d28107eSBarry Smith 284*5d28107eSBarry Smith return 0; 285*5d28107eSBarry Smith } 286*5d28107eSBarry Smith 287*5d28107eSBarry Smith # define M 20 288*5d28107eSBarry Smith 289*5d28107eSBarry Smith int 290*5d28107eSBarry Smith checktick() 291*5d28107eSBarry Smith { 292*5d28107eSBarry Smith int i, minDelta, Delta; 293*5d28107eSBarry Smith double t1, t2, timesfound[M]; 294*5d28107eSBarry Smith 295*5d28107eSBarry Smith /* Collect a sequence of M unique time values from the system. */ 296*5d28107eSBarry Smith 297*5d28107eSBarry Smith for (i = 0; i < M; i++) { 298*5d28107eSBarry Smith t1 = mysecond(); 299*5d28107eSBarry Smith while( ((t2=mysecond()) - t1) < 1.0E-6 ) 300*5d28107eSBarry Smith ; 301*5d28107eSBarry Smith timesfound[i] = t1 = t2; 302*5d28107eSBarry Smith } 303*5d28107eSBarry Smith 304*5d28107eSBarry Smith /* 305*5d28107eSBarry Smith * Determine the minimum difference between these M values. 306*5d28107eSBarry Smith * This result will be our estimate (in microseconds) for the 307*5d28107eSBarry Smith * clock granularity. 308*5d28107eSBarry Smith */ 309*5d28107eSBarry Smith 310*5d28107eSBarry Smith minDelta = 1000000; 311*5d28107eSBarry Smith for (i = 1; i < M; i++) { 312*5d28107eSBarry Smith Delta = (int)( 1.0E6 * (timesfound[i]-timesfound[i-1])); 313*5d28107eSBarry Smith minDelta = MIN(minDelta, MAX(Delta,0)); 314*5d28107eSBarry Smith } 315*5d28107eSBarry Smith 316*5d28107eSBarry Smith return(minDelta); 317*5d28107eSBarry Smith } 318*5d28107eSBarry Smith 319*5d28107eSBarry Smith 320*5d28107eSBarry Smith 321*5d28107eSBarry Smith /* A gettimeofday routine to give access to the wall 322*5d28107eSBarry Smith clock timer on most UNIX-like systems. */ 323*5d28107eSBarry Smith 324*5d28107eSBarry Smith #include <sys/time.h> 325*5d28107eSBarry Smith 326*5d28107eSBarry Smith double mysecond() 327*5d28107eSBarry Smith { 328*5d28107eSBarry Smith struct timeval tp; 329*5d28107eSBarry Smith struct timezone tzp; 330*5d28107eSBarry Smith int i; 331*5d28107eSBarry Smith 332*5d28107eSBarry Smith i = gettimeofday(&tp,&tzp); 333*5d28107eSBarry Smith return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); 334*5d28107eSBarry Smith } 335*5d28107eSBarry Smith 336*5d28107eSBarry Smith void checkSTREAMresults () 337*5d28107eSBarry Smith { 338*5d28107eSBarry Smith double aj,bj,cj,scalar; 339*5d28107eSBarry Smith double asum,bsum,csum; 340*5d28107eSBarry Smith double epsilon; 341*5d28107eSBarry Smith int j,k; 342*5d28107eSBarry Smith 343*5d28107eSBarry Smith /* reproduce initialization */ 344*5d28107eSBarry Smith aj = 1.0; 345*5d28107eSBarry Smith bj = 2.0; 346*5d28107eSBarry Smith cj = 0.0; 347*5d28107eSBarry Smith /* a[] is modified during timing check */ 348*5d28107eSBarry Smith aj = 2.0E0 * aj; 349*5d28107eSBarry Smith /* now execute timing loop */ 350*5d28107eSBarry Smith scalar = 3.0; 351*5d28107eSBarry Smith for (k=0; k<NTIMES; k++) 352*5d28107eSBarry Smith { 353*5d28107eSBarry Smith cj = aj; 354*5d28107eSBarry Smith bj = scalar*cj; 355*5d28107eSBarry Smith cj = aj+bj; 356*5d28107eSBarry Smith aj = bj+scalar*cj; 357*5d28107eSBarry Smith } 358*5d28107eSBarry Smith aj = aj * (double) (N); 359*5d28107eSBarry Smith bj = bj * (double) (N); 360*5d28107eSBarry Smith cj = cj * (double) (N); 361*5d28107eSBarry Smith 362*5d28107eSBarry Smith asum = 0.0; 363*5d28107eSBarry Smith bsum = 0.0; 364*5d28107eSBarry Smith csum = 0.0; 365*5d28107eSBarry Smith for (j=0; j<N; j++) { 366*5d28107eSBarry Smith asum += a[j]; 367*5d28107eSBarry Smith bsum += b[j]; 368*5d28107eSBarry Smith csum += c[j]; 369*5d28107eSBarry Smith } 370*5d28107eSBarry Smith #ifdef VERBOSE 371*5d28107eSBarry Smith printf ("Results Comparison: \n"); 372*5d28107eSBarry Smith printf (" Expected : %f %f %f \n",aj,bj,cj); 373*5d28107eSBarry Smith printf (" Observed : %f %f %f \n",asum,bsum,csum); 374*5d28107eSBarry Smith #endif 375*5d28107eSBarry Smith 376*5d28107eSBarry Smith #ifndef abs 377*5d28107eSBarry Smith #define abs(a) ((a) >= 0 ? (a) : -(a)) 378*5d28107eSBarry Smith #endif 379*5d28107eSBarry Smith epsilon = 1.e-8; 380*5d28107eSBarry Smith 381*5d28107eSBarry Smith if (abs(aj-asum)/asum > epsilon) { 382*5d28107eSBarry Smith printf ("Failed Validation on array a[]\n"); 383*5d28107eSBarry Smith printf (" Expected : %f \n",aj); 384*5d28107eSBarry Smith printf (" Observed : %f \n",asum); 385*5d28107eSBarry Smith } 386*5d28107eSBarry Smith else if (abs(bj-bsum)/bsum > epsilon) { 387*5d28107eSBarry Smith printf ("Failed Validation on array b[]\n"); 388*5d28107eSBarry Smith printf (" Expected : %f \n",bj); 389*5d28107eSBarry Smith printf (" Observed : %f \n",bsum); 390*5d28107eSBarry Smith } 391*5d28107eSBarry Smith else if (abs(cj-csum)/csum > epsilon) { 392*5d28107eSBarry Smith printf ("Failed Validation on array c[]\n"); 393*5d28107eSBarry Smith printf (" Expected : %f \n",cj); 394*5d28107eSBarry Smith printf (" Observed : %f \n",csum); 395*5d28107eSBarry Smith } 396*5d28107eSBarry Smith else { 397*5d28107eSBarry Smith printf ("Solution Validates\n"); 398*5d28107eSBarry Smith } 399*5d28107eSBarry Smith } 400*5d28107eSBarry Smith 401*5d28107eSBarry Smith void tuned_STREAM_Copy() 402*5d28107eSBarry Smith { 403*5d28107eSBarry Smith int j; 404*5d28107eSBarry Smith #pragma omp parallel for 405*5d28107eSBarry Smith for (j=0; j<N; j++) 406*5d28107eSBarry Smith c[j] = a[j]; 407*5d28107eSBarry Smith } 408*5d28107eSBarry Smith 409*5d28107eSBarry Smith void tuned_STREAM_Scale(double scalar) 410*5d28107eSBarry Smith { 411*5d28107eSBarry Smith int j; 412*5d28107eSBarry Smith #pragma omp parallel for 413*5d28107eSBarry Smith for (j=0; j<N; j++) 414*5d28107eSBarry Smith b[j] = scalar*c[j]; 415*5d28107eSBarry Smith } 416*5d28107eSBarry Smith 417*5d28107eSBarry Smith void tuned_STREAM_Add() 418*5d28107eSBarry Smith { 419*5d28107eSBarry Smith int j; 420*5d28107eSBarry Smith #pragma omp parallel for 421*5d28107eSBarry Smith for (j=0; j<N; j++) 422*5d28107eSBarry Smith c[j] = a[j]+b[j]; 423*5d28107eSBarry Smith } 424*5d28107eSBarry Smith 425*5d28107eSBarry Smith void tuned_STREAM_Triad(double scalar) 426*5d28107eSBarry Smith { 427*5d28107eSBarry Smith int j; 428*5d28107eSBarry Smith #pragma omp parallel for 429*5d28107eSBarry Smith for (j=0; j<N; j++) 430*5d28107eSBarry Smith a[j] = b[j]+scalar*c[j]; 431*5d28107eSBarry Smith } 432