1*9f0612e4SBarry Smith /* 2*9f0612e4SBarry Smith A simplification of the Stream benchmark for OpenMP 3*9f0612e4SBarry Smith Original code developed by John D. McCalpin 4*9f0612e4SBarry Smith */ 55d28107eSBarry Smith #include <stdio.h> 65d28107eSBarry Smith #include <math.h> 75d28107eSBarry Smith #include <limits.h> 80d04baf8SBarry Smith #include <float.h> 95d28107eSBarry Smith #include <sys/time.h> 104198fb66SBarry Smith #include <stdlib.h> 11*9f0612e4SBarry Smith #include <petscsys.h> 125d28107eSBarry Smith 13*9f0612e4SBarry Smith //#define N 2*4*20000000 14*9f0612e4SBarry Smith #define N 80000000 15*9f0612e4SBarry Smith //#define N 1200000 16*9f0612e4SBarry Smith //#define N 120000 17511c7730SShri Abhyankar #define NTIMES 50 185d28107eSBarry Smith #define OFFSET 0 195d28107eSBarry Smith 20519f805aSKarl Rupp # if !defined(MIN) 215d28107eSBarry Smith # define MIN(x,y) ((x)<(y) ? (x) : (y)) 225d28107eSBarry Smith # endif 23519f805aSKarl Rupp # if !defined(MAX) 245d28107eSBarry Smith # define MAX(x,y) ((x)>(y) ? (x) : (y)) 255d28107eSBarry Smith # endif 265d28107eSBarry Smith 27*9f0612e4SBarry Smith static double a[N+OFFSET],b[N+OFFSET],c[N+OFFSET]; 28*9f0612e4SBarry Smith static double mintime = FLT_MAX; 29*9f0612e4SBarry Smith static double bytes = 3 * sizeof(double) * N; 305d28107eSBarry Smith 31*9f0612e4SBarry Smith int main(int argc,char **argv) 325d28107eSBarry Smith { 33*9f0612e4SBarry Smith MPI_Init(&argc,&argv); 34*9f0612e4SBarry Smith const static double scalar = 3.0; 35*9f0612e4SBarry Smith #pragma omp threadprivate(scalar) 36*9f0612e4SBarry Smith double times[NTIMES],rate; 374198fb66SBarry Smith int size; 384198fb66SBarry Smith char *env; 394198fb66SBarry Smith FILE *fd; 405d28107eSBarry Smith 414198fb66SBarry Smith env = getenv("OMP_NUM_THREADS"); 42*9f0612e4SBarry Smith if (!env) env = (char *) "1"; 434198fb66SBarry Smith sscanf(env,"%d",&size); 445d28107eSBarry Smith 45*9f0612e4SBarry Smith #pragma omp parallel for schedule(static) 46*9f0612e4SBarry Smith for (int j=0; j<N; j++) { 475d28107eSBarry Smith a[j] = 1.0; 485d28107eSBarry Smith b[j] = 2.0; 49*9f0612e4SBarry Smith c[j] = 3.0; 505d28107eSBarry Smith } 515d28107eSBarry Smith 525d28107eSBarry Smith /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 53*9f0612e4SBarry Smith for (int k=0; k<NTIMES; k++) 545d28107eSBarry Smith { 55*9f0612e4SBarry Smith times[k] = MPI_Wtime(); 56*9f0612e4SBarry Smith // https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf 57*9f0612e4SBarry Smith // #pragma omp parallel for (same performance as below) 58*9f0612e4SBarry Smith // #pragma omp parallel for simd schedule(static) (same performance as below) 59*9f0612e4SBarry Smith #pragma omp parallel for schedule(static) 60*9f0612e4SBarry Smith for (register int j=0; j<N; j++) a[j] = b[j]+scalar*c[j]; 61*9f0612e4SBarry Smith times[k] = MPI_Wtime() - times[k]; 62*9f0612e4SBarry Smith } 63*9f0612e4SBarry Smith for (int k=1; k<NTIMES; k++) { /* note -- skip first iteration */ 64*9f0612e4SBarry Smith mintime = MIN(mintime, times[k]); 655d28107eSBarry Smith } 665d28107eSBarry Smith 67*9f0612e4SBarry Smith if (size == 65) printf("Never printed %g\n",a[11]); 68*9f0612e4SBarry Smith rate = 1.0E-06 * bytes/mintime; 694198fb66SBarry Smith 704198fb66SBarry Smith if (size == 1) { 71*9f0612e4SBarry Smith printf("%d %11.4f Rate (MB/s) 1\n",size, rate); 724198fb66SBarry Smith fd = fopen("flops","w"); 734198fb66SBarry Smith fprintf(fd,"%g\n",rate); 744198fb66SBarry Smith fclose(fd); 754198fb66SBarry Smith } else { 764198fb66SBarry Smith double prate; 774198fb66SBarry Smith fd = fopen("flops","r"); 784198fb66SBarry Smith fscanf(fd,"%lg",&prate); 794198fb66SBarry Smith fclose(fd); 804198fb66SBarry Smith printf("%d %11.4f Rate (MB/s) %g \n", size, rate,rate/prate); 814198fb66SBarry Smith } 82*9f0612e4SBarry Smith MPI_Finalize(); 835d28107eSBarry Smith return 0; 845d28107eSBarry Smith } 85