15d28107eSBarry Smith /*-----------------------------------------------------------------------*/ 25d28107eSBarry Smith /* Program: Stream */ 35d28107eSBarry Smith /* Revision: $Id: stream.c,v 5.9 2009/04/11 16:35:00 mccalpin Exp mccalpin $ */ 45d28107eSBarry Smith /* Original code developed by John D. McCalpin */ 55d28107eSBarry Smith /* Programmers: John D. McCalpin */ 65d28107eSBarry Smith /* Joe R. Zagar */ 75d28107eSBarry Smith /* */ 85d28107eSBarry Smith /* This program measures memory transfer rates in MB/s for simple */ 95d28107eSBarry Smith /* computational kernels coded in C. */ 105d28107eSBarry Smith /*-----------------------------------------------------------------------*/ 115d28107eSBarry Smith /* Copyright 1991-2005: John D. McCalpin */ 125d28107eSBarry Smith /*-----------------------------------------------------------------------*/ 135d28107eSBarry Smith /* License: */ 145d28107eSBarry Smith /* 1. You are free to use this program and/or to redistribute */ 155d28107eSBarry Smith /* this program. */ 165d28107eSBarry Smith /* 2. You are free to modify this program for your own use, */ 175d28107eSBarry Smith /* including commercial use, subject to the publication */ 185d28107eSBarry Smith /* restrictions in item 3. */ 195d28107eSBarry Smith /* 3. You are free to publish results obtained from running this */ 205d28107eSBarry Smith /* program, or from works that you derive from this program, */ 215d28107eSBarry Smith /* with the following limitations: */ 225d28107eSBarry Smith /* 3a. In order to be referred to as "STREAM benchmark results", */ 235d28107eSBarry Smith /* published results must be in conformance to the STREAM */ 245d28107eSBarry Smith /* Run Rules, (briefly reviewed below) published at */ 255d28107eSBarry Smith /* http://www.cs.virginia.edu/stream/ref.html */ 265d28107eSBarry Smith /* and incorporated herein by reference. */ 275d28107eSBarry Smith /* As the copyright holder, John McCalpin retains the */ 285d28107eSBarry Smith /* right to determine conformity with the Run Rules. */ 295d28107eSBarry Smith /* 3b. Results based on modified source code or on runs not in */ 305d28107eSBarry Smith /* accordance with the STREAM Run Rules must be clearly */ 315d28107eSBarry Smith /* labelled whenever they are published. Examples of */ 325d28107eSBarry Smith /* proper labelling include: */ 335d28107eSBarry Smith /* "tuned STREAM benchmark results" */ 345d28107eSBarry Smith /* "based on a variant of the STREAM benchmark code" */ 355d28107eSBarry Smith /* Other comparable, clear and reasonable labelling is */ 365d28107eSBarry Smith /* acceptable. */ 375d28107eSBarry Smith /* 3c. Submission of results to the STREAM benchmark web site */ 385d28107eSBarry Smith /* is encouraged, but not required. */ 395d28107eSBarry Smith /* 4. Use of this program or creation of derived works based on this */ 405d28107eSBarry Smith /* program constitutes acceptance of these licensing restrictions. */ 415d28107eSBarry Smith /* 5. Absolutely no warranty is expressed or implied. */ 425d28107eSBarry Smith /*-----------------------------------------------------------------------*/ 435d28107eSBarry Smith # include <stdio.h> 445d28107eSBarry Smith # include <math.h> 455d28107eSBarry Smith # include <float.h> 465d28107eSBarry Smith # include <limits.h> 475d28107eSBarry Smith # include <sys/time.h> 485d28107eSBarry Smith 495d28107eSBarry Smith /* INSTRUCTIONS: 505d28107eSBarry Smith * 515d28107eSBarry Smith * 1) Stream requires a good bit of memory to run. Adjust the 525d28107eSBarry Smith * value of 'N' (below) to give a 'timing calibration' of 535d28107eSBarry Smith * at least 20 clock-ticks. This will provide rate estimates 545d28107eSBarry Smith * that should be good to about 5% precision. 555d28107eSBarry Smith */ 565d28107eSBarry Smith 575d28107eSBarry Smith #ifndef N 585d28107eSBarry Smith # define N 2000000 595d28107eSBarry Smith #endif 605d28107eSBarry Smith #ifndef NTIMES 615d28107eSBarry Smith # define NTIMES 10 625d28107eSBarry Smith #endif 635d28107eSBarry Smith #ifndef OFFSET 645d28107eSBarry Smith # define OFFSET 0 655d28107eSBarry Smith #endif 665d28107eSBarry Smith 675d28107eSBarry Smith /* 685d28107eSBarry Smith * 3) Compile the code with full optimization. Many compilers 695d28107eSBarry Smith * generate unreasonably bad code before the optimizer tightens 705d28107eSBarry Smith * things up. If the results are unreasonably good, on the 715d28107eSBarry Smith * other hand, the optimizer might be too smart for me! 725d28107eSBarry Smith * 735d28107eSBarry Smith * Try compiling with: 745d28107eSBarry Smith * cc -O stream_omp.c -o stream_omp 755d28107eSBarry Smith * 765d28107eSBarry Smith * This is known to work on Cray, SGI, IBM, and Sun machines. 775d28107eSBarry Smith * 785d28107eSBarry Smith * 795d28107eSBarry Smith * 4) Mail the results to mccalpin@cs.virginia.edu 805d28107eSBarry Smith * Be sure to include: 815d28107eSBarry Smith * a) computer hardware model number and software revision 825d28107eSBarry Smith * b) the compiler flags 835d28107eSBarry Smith * c) all of the output from the test case. 845d28107eSBarry Smith * Thanks! 855d28107eSBarry Smith * 865d28107eSBarry Smith */ 875d28107eSBarry Smith 885d28107eSBarry Smith # define HLINE "-------------------------------------------------------------\n" 895d28107eSBarry Smith 905d28107eSBarry Smith # ifndef MIN 915d28107eSBarry Smith # define MIN(x,y) ((x)<(y)?(x):(y)) 925d28107eSBarry Smith # endif 935d28107eSBarry Smith # ifndef MAX 945d28107eSBarry Smith # define MAX(x,y) ((x)>(y)?(x):(y)) 955d28107eSBarry Smith # endif 965d28107eSBarry Smith 975d28107eSBarry Smith static double a[N+OFFSET], 985d28107eSBarry Smith b[N+OFFSET], 995d28107eSBarry Smith c[N+OFFSET]; 1005d28107eSBarry Smith 1015d28107eSBarry Smith static double avgtime[4] = {0}, maxtime[4] = {0}, 1025d28107eSBarry Smith mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; 1035d28107eSBarry Smith 1045d28107eSBarry Smith static char *label[4] = {"Copy: ", "Scale: ", 1055d28107eSBarry Smith "Add: ", "Triad: "}; 1065d28107eSBarry Smith 1075d28107eSBarry Smith static double bytes[4] = { 1085d28107eSBarry Smith 2 * sizeof(double) * N, 1095d28107eSBarry Smith 2 * sizeof(double) * N, 1105d28107eSBarry Smith 3 * sizeof(double) * N, 1115d28107eSBarry Smith 3 * sizeof(double) * N 1125d28107eSBarry Smith }; 1135d28107eSBarry Smith 1145d28107eSBarry Smith extern double mysecond(); 1155d28107eSBarry Smith extern void checkSTREAMresults(); 1165d28107eSBarry Smith #ifdef TUNED 1175d28107eSBarry Smith extern void tuned_STREAM_Copy(); 1185d28107eSBarry Smith extern void tuned_STREAM_Scale(double scalar); 1195d28107eSBarry Smith extern void tuned_STREAM_Add(); 1205d28107eSBarry Smith extern void tuned_STREAM_Triad(double scalar); 1215d28107eSBarry Smith #endif 1225d28107eSBarry Smith extern int omp_get_num_threads(); 123*01a79839SBarry Smith int main() 1245d28107eSBarry Smith { 1255d28107eSBarry Smith int quantum, checktick(); 1265d28107eSBarry Smith int BytesPerWord; 1275d28107eSBarry Smith register int j, k; 1285d28107eSBarry Smith double scalar, t, times[4][NTIMES]; 1295d28107eSBarry Smith 1305d28107eSBarry Smith /* --- SETUP --- determine precision and check timing --- */ 1315d28107eSBarry Smith 1325d28107eSBarry Smith printf(HLINE); 1335d28107eSBarry Smith printf("STREAM version $Revision: 5.9 $\n"); 1345d28107eSBarry Smith printf(HLINE); 1355d28107eSBarry Smith BytesPerWord = sizeof(double); 1365d28107eSBarry Smith printf("This system uses %d bytes per DOUBLE PRECISION word.\n", 1375d28107eSBarry Smith BytesPerWord); 1385d28107eSBarry Smith 1395d28107eSBarry Smith printf(HLINE); 1405d28107eSBarry Smith #ifdef NO_LONG_LONG 1415d28107eSBarry Smith printf("Array size = %d, Offset = %d\n" , N, OFFSET); 1425d28107eSBarry Smith #else 1435d28107eSBarry Smith printf("Array size = %llu, Offset = %d\n", (unsigned long long) N, OFFSET); 1445d28107eSBarry Smith #endif 1455d28107eSBarry Smith 1465d28107eSBarry Smith printf("Total memory required = %.1f MB.\n", 1475d28107eSBarry Smith (3.0 * BytesPerWord) * ( (double) N / 1048576.0)); 1485d28107eSBarry Smith printf("Each test is run %d times, but only\n", NTIMES); 1495d28107eSBarry Smith printf("the *best* time for each is used.\n"); 1505d28107eSBarry Smith 1515d28107eSBarry Smith printf(HLINE); 1525d28107eSBarry Smith #pragma omp parallel 1535d28107eSBarry Smith { 1545d28107eSBarry Smith #pragma omp master 1555d28107eSBarry Smith { 1565d28107eSBarry Smith k = omp_get_num_threads(); 1575d28107eSBarry Smith printf ("Number of Threads requested = %i\n",k); 1585d28107eSBarry Smith } 1595d28107eSBarry Smith } 1605d28107eSBarry Smith 1615d28107eSBarry Smith printf(HLINE); 1625d28107eSBarry Smith #pragma omp parallel 1635d28107eSBarry Smith { 1645d28107eSBarry Smith printf ("Printing one line per active thread....\n"); 1655d28107eSBarry Smith } 1665d28107eSBarry Smith 1675d28107eSBarry Smith /* Get initial value for system clock. */ 1685d28107eSBarry Smith #pragma omp parallel for 1695d28107eSBarry Smith for (j=0; j<N; j++) { 1705d28107eSBarry Smith a[j] = 1.0; 1715d28107eSBarry Smith b[j] = 2.0; 1725d28107eSBarry Smith c[j] = 0.0; 1735d28107eSBarry Smith } 1745d28107eSBarry Smith 1755d28107eSBarry Smith printf(HLINE); 1765d28107eSBarry Smith 1775d28107eSBarry Smith if ( (quantum = checktick()) >= 1) 1785d28107eSBarry Smith printf("Your clock granularity/precision appears to be " 1795d28107eSBarry Smith "%d microseconds.\n", quantum); 1805d28107eSBarry Smith else { 1815d28107eSBarry Smith printf("Your clock granularity appears to be " 1825d28107eSBarry Smith "less than one microsecond.\n"); 1835d28107eSBarry Smith quantum = 1; 1845d28107eSBarry Smith } 1855d28107eSBarry Smith 1865d28107eSBarry Smith t = mysecond(); 1875d28107eSBarry Smith #pragma omp parallel for 1885d28107eSBarry Smith for (j = 0; j < N; j++) 1895d28107eSBarry Smith a[j] = 2.0E0 * a[j]; 1905d28107eSBarry Smith t = 1.0E6 * (mysecond() - t); 1915d28107eSBarry Smith 1925d28107eSBarry Smith printf("Each test below will take on the order" 1935d28107eSBarry Smith " of %d microseconds.\n", (int) t ); 1945d28107eSBarry Smith printf(" (= %d clock ticks)\n", (int) (t/quantum) ); 1955d28107eSBarry Smith printf("Increase the size of the arrays if this shows that\n"); 1965d28107eSBarry Smith printf("you are not getting at least 20 clock ticks per test.\n"); 1975d28107eSBarry Smith 1985d28107eSBarry Smith printf(HLINE); 1995d28107eSBarry Smith 2005d28107eSBarry Smith /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 2015d28107eSBarry Smith 2025d28107eSBarry Smith scalar = 3.0; 2035d28107eSBarry Smith for (k=0; k<NTIMES; k++) 2045d28107eSBarry Smith { 2055d28107eSBarry Smith times[0][k] = mysecond(); 2065d28107eSBarry Smith #ifdef TUNED 2075d28107eSBarry Smith tuned_STREAM_Copy(); 2085d28107eSBarry Smith #else 2095d28107eSBarry Smith #pragma omp parallel for 2105d28107eSBarry Smith for (j=0; j<N; j++) 2115d28107eSBarry Smith c[j] = a[j]; 2125d28107eSBarry Smith #endif 2135d28107eSBarry Smith times[0][k] = mysecond() - times[0][k]; 2145d28107eSBarry Smith 2155d28107eSBarry Smith times[1][k] = mysecond(); 2165d28107eSBarry Smith #ifdef TUNED 2175d28107eSBarry Smith tuned_STREAM_Scale(scalar); 2185d28107eSBarry Smith #else 2195d28107eSBarry Smith #pragma omp parallel for 2205d28107eSBarry Smith for (j=0; j<N; j++) 2215d28107eSBarry Smith b[j] = scalar*c[j]; 2225d28107eSBarry Smith #endif 2235d28107eSBarry Smith times[1][k] = mysecond() - times[1][k]; 2245d28107eSBarry Smith 2255d28107eSBarry Smith times[2][k] = mysecond(); 2265d28107eSBarry Smith #ifdef TUNED 2275d28107eSBarry Smith tuned_STREAM_Add(); 2285d28107eSBarry Smith #else 2295d28107eSBarry Smith #pragma omp parallel for 2305d28107eSBarry Smith for (j=0; j<N; j++) 2315d28107eSBarry Smith c[j] = a[j]+b[j]; 2325d28107eSBarry Smith #endif 2335d28107eSBarry Smith times[2][k] = mysecond() - times[2][k]; 2345d28107eSBarry Smith 2355d28107eSBarry Smith times[3][k] = mysecond(); 2365d28107eSBarry Smith #ifdef TUNED 2375d28107eSBarry Smith tuned_STREAM_Triad(scalar); 2385d28107eSBarry Smith #else 2395d28107eSBarry Smith #pragma omp parallel for 2405d28107eSBarry Smith for (j=0; j<N; j++) 2415d28107eSBarry Smith a[j] = b[j]+scalar*c[j]; 2425d28107eSBarry Smith #endif 2435d28107eSBarry Smith times[3][k] = mysecond() - times[3][k]; 2445d28107eSBarry Smith } 2455d28107eSBarry Smith 2465d28107eSBarry Smith /* --- SUMMARY --- */ 2475d28107eSBarry Smith 2485d28107eSBarry Smith for (k=1; k<NTIMES; k++) /* note -- skip first iteration */ 2495d28107eSBarry Smith { 2505d28107eSBarry Smith for (j=0; j<4; j++) 2515d28107eSBarry Smith { 2525d28107eSBarry Smith avgtime[j] = avgtime[j] + times[j][k]; 2535d28107eSBarry Smith mintime[j] = MIN(mintime[j], times[j][k]); 2545d28107eSBarry Smith maxtime[j] = MAX(maxtime[j], times[j][k]); 2555d28107eSBarry Smith } 2565d28107eSBarry Smith } 2575d28107eSBarry Smith 2585d28107eSBarry Smith printf("Function Rate (MB/s) Avg time Min time Max time\n"); 2595d28107eSBarry Smith for (j=0; j<4; j++) { 2605d28107eSBarry Smith avgtime[j] = avgtime[j]/(double)(NTIMES-1); 2615d28107eSBarry Smith 2625d28107eSBarry Smith printf("%s%11.4f %11.4f %11.4f %11.4f\n", label[j], 2635d28107eSBarry Smith 1.0E-06 * bytes[j]/mintime[j], 2645d28107eSBarry Smith avgtime[j], 2655d28107eSBarry Smith mintime[j], 2665d28107eSBarry Smith maxtime[j]); 2675d28107eSBarry Smith } 2685d28107eSBarry Smith printf(HLINE); 2695d28107eSBarry Smith 2705d28107eSBarry Smith /* --- Check Results --- */ 2715d28107eSBarry Smith checkSTREAMresults(); 2725d28107eSBarry Smith printf(HLINE); 2735d28107eSBarry Smith 2745d28107eSBarry Smith return 0; 2755d28107eSBarry Smith } 2765d28107eSBarry Smith 2775d28107eSBarry Smith # define M 20 2785d28107eSBarry Smith 2795d28107eSBarry Smith int 2805d28107eSBarry Smith checktick() 2815d28107eSBarry Smith { 2825d28107eSBarry Smith int i, minDelta, Delta; 2835d28107eSBarry Smith double t1, t2, timesfound[M]; 2845d28107eSBarry Smith 2855d28107eSBarry Smith /* Collect a sequence of M unique time values from the system. */ 2865d28107eSBarry Smith 2875d28107eSBarry Smith for (i = 0; i < M; i++) { 2885d28107eSBarry Smith t1 = mysecond(); 2895d28107eSBarry Smith while( ((t2=mysecond()) - t1) < 1.0E-6 ) 2905d28107eSBarry Smith ; 2915d28107eSBarry Smith timesfound[i] = t1 = t2; 2925d28107eSBarry Smith } 2935d28107eSBarry Smith 2945d28107eSBarry Smith /* 2955d28107eSBarry Smith * Determine the minimum difference between these M values. 2965d28107eSBarry Smith * This result will be our estimate (in microseconds) for the 2975d28107eSBarry Smith * clock granularity. 2985d28107eSBarry Smith */ 2995d28107eSBarry Smith 3005d28107eSBarry Smith minDelta = 1000000; 3015d28107eSBarry Smith for (i = 1; i < M; i++) { 3025d28107eSBarry Smith Delta = (int)( 1.0E6 * (timesfound[i]-timesfound[i-1])); 3035d28107eSBarry Smith minDelta = MIN(minDelta, MAX(Delta,0)); 3045d28107eSBarry Smith } 3055d28107eSBarry Smith 3065d28107eSBarry Smith return(minDelta); 3075d28107eSBarry Smith } 3085d28107eSBarry Smith 3095d28107eSBarry Smith 3105d28107eSBarry Smith 3115d28107eSBarry Smith /* A gettimeofday routine to give access to the wall 3125d28107eSBarry Smith clock timer on most UNIX-like systems. */ 3135d28107eSBarry Smith 3145d28107eSBarry Smith #include <sys/time.h> 3155d28107eSBarry Smith 3165d28107eSBarry Smith double mysecond() 3175d28107eSBarry Smith { 3185d28107eSBarry Smith struct timeval tp; 3195d28107eSBarry Smith struct timezone tzp; 3205d28107eSBarry Smith int i; 3215d28107eSBarry Smith 3225d28107eSBarry Smith i = gettimeofday(&tp,&tzp); 3235d28107eSBarry Smith return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); 3245d28107eSBarry Smith } 3255d28107eSBarry Smith 3265d28107eSBarry Smith void checkSTREAMresults () 3275d28107eSBarry Smith { 3285d28107eSBarry Smith double aj,bj,cj,scalar; 3295d28107eSBarry Smith double asum,bsum,csum; 3305d28107eSBarry Smith double epsilon; 3315d28107eSBarry Smith int j,k; 3325d28107eSBarry Smith 3335d28107eSBarry Smith /* reproduce initialization */ 3345d28107eSBarry Smith aj = 1.0; 3355d28107eSBarry Smith bj = 2.0; 3365d28107eSBarry Smith cj = 0.0; 3375d28107eSBarry Smith /* a[] is modified during timing check */ 3385d28107eSBarry Smith aj = 2.0E0 * aj; 3395d28107eSBarry Smith /* now execute timing loop */ 3405d28107eSBarry Smith scalar = 3.0; 3415d28107eSBarry Smith for (k=0; k<NTIMES; k++) 3425d28107eSBarry Smith { 3435d28107eSBarry Smith cj = aj; 3445d28107eSBarry Smith bj = scalar*cj; 3455d28107eSBarry Smith cj = aj+bj; 3465d28107eSBarry Smith aj = bj+scalar*cj; 3475d28107eSBarry Smith } 3485d28107eSBarry Smith aj = aj * (double) (N); 3495d28107eSBarry Smith bj = bj * (double) (N); 3505d28107eSBarry Smith cj = cj * (double) (N); 3515d28107eSBarry Smith 3525d28107eSBarry Smith asum = 0.0; 3535d28107eSBarry Smith bsum = 0.0; 3545d28107eSBarry Smith csum = 0.0; 3555d28107eSBarry Smith for (j=0; j<N; j++) { 3565d28107eSBarry Smith asum += a[j]; 3575d28107eSBarry Smith bsum += b[j]; 3585d28107eSBarry Smith csum += c[j]; 3595d28107eSBarry Smith } 3605d28107eSBarry Smith #ifdef VERBOSE 3615d28107eSBarry Smith printf ("Results Comparison: \n"); 3625d28107eSBarry Smith printf (" Expected : %f %f %f \n",aj,bj,cj); 3635d28107eSBarry Smith printf (" Observed : %f %f %f \n",asum,bsum,csum); 3645d28107eSBarry Smith #endif 3655d28107eSBarry Smith 3665d28107eSBarry Smith #ifndef abs 3675d28107eSBarry Smith #define abs(a) ((a) >= 0 ? (a) : -(a)) 3685d28107eSBarry Smith #endif 3695d28107eSBarry Smith epsilon = 1.e-8; 3705d28107eSBarry Smith 3715d28107eSBarry Smith if (abs(aj-asum)/asum > epsilon) { 3725d28107eSBarry Smith printf ("Failed Validation on array a[]\n"); 3735d28107eSBarry Smith printf (" Expected : %f \n",aj); 3745d28107eSBarry Smith printf (" Observed : %f \n",asum); 3755d28107eSBarry Smith } 3765d28107eSBarry Smith else if (abs(bj-bsum)/bsum > epsilon) { 3775d28107eSBarry Smith printf ("Failed Validation on array b[]\n"); 3785d28107eSBarry Smith printf (" Expected : %f \n",bj); 3795d28107eSBarry Smith printf (" Observed : %f \n",bsum); 3805d28107eSBarry Smith } 3815d28107eSBarry Smith else if (abs(cj-csum)/csum > epsilon) { 3825d28107eSBarry Smith printf ("Failed Validation on array c[]\n"); 3835d28107eSBarry Smith printf (" Expected : %f \n",cj); 3845d28107eSBarry Smith printf (" Observed : %f \n",csum); 3855d28107eSBarry Smith } 3865d28107eSBarry Smith else { 3875d28107eSBarry Smith printf ("Solution Validates\n"); 3885d28107eSBarry Smith } 3895d28107eSBarry Smith } 3905d28107eSBarry Smith 3915d28107eSBarry Smith void tuned_STREAM_Copy() 3925d28107eSBarry Smith { 3935d28107eSBarry Smith int j; 3945d28107eSBarry Smith #pragma omp parallel for 3955d28107eSBarry Smith for (j=0; j<N; j++) 3965d28107eSBarry Smith c[j] = a[j]; 3975d28107eSBarry Smith } 3985d28107eSBarry Smith 3995d28107eSBarry Smith void tuned_STREAM_Scale(double scalar) 4005d28107eSBarry Smith { 4015d28107eSBarry Smith int j; 4025d28107eSBarry Smith #pragma omp parallel for 4035d28107eSBarry Smith for (j=0; j<N; j++) 4045d28107eSBarry Smith b[j] = scalar*c[j]; 4055d28107eSBarry Smith } 4065d28107eSBarry Smith 4075d28107eSBarry Smith void tuned_STREAM_Add() 4085d28107eSBarry Smith { 4095d28107eSBarry Smith int j; 4105d28107eSBarry Smith #pragma omp parallel for 4115d28107eSBarry Smith for (j=0; j<N; j++) 4125d28107eSBarry Smith c[j] = a[j]+b[j]; 4135d28107eSBarry Smith } 4145d28107eSBarry Smith 4155d28107eSBarry Smith void tuned_STREAM_Triad(double scalar) 4165d28107eSBarry Smith { 4175d28107eSBarry Smith int j; 4185d28107eSBarry Smith #pragma omp parallel for 4195d28107eSBarry Smith for (j=0; j<N; j++) 4205d28107eSBarry Smith a[j] = b[j]+scalar*c[j]; 4215d28107eSBarry Smith } 422