1*9f0612e4SBarry Smith /* 2*9f0612e4SBarry Smith A simplification of the Stream benchmark for OpenMP 3*9f0612e4SBarry Smith The array for each thread is a large distance from the array for the other threads 4*9f0612e4SBarry Smith Original code developed by John D. McCalpin 5*9f0612e4SBarry Smith */ 6*9f0612e4SBarry Smith #include <stdio.h> 7*9f0612e4SBarry Smith #include <math.h> 8*9f0612e4SBarry Smith #include <limits.h> 9*9f0612e4SBarry Smith #include <float.h> 10*9f0612e4SBarry Smith #include <sys/time.h> 11*9f0612e4SBarry Smith #include <stdlib.h> 12*9f0612e4SBarry Smith #include <omp.h> 13*9f0612e4SBarry Smith #include <petscsys.h> 14*9f0612e4SBarry Smith 15*9f0612e4SBarry Smith #define NTIMESINNER 1 16*9f0612e4SBarry Smith #define N 2*4*20000000 17*9f0612e4SBarry Smith //#define N 1200000 18*9f0612e4SBarry Smith //#define N 120000 19*9f0612e4SBarry Smith #define NTIMES 50 20*9f0612e4SBarry Smith #define OFFSET 0 21*9f0612e4SBarry Smith 22*9f0612e4SBarry Smith # if !defined(MIN) 23*9f0612e4SBarry Smith # define MIN(x,y) ((x)<(y) ? (x) : (y)) 24*9f0612e4SBarry Smith # endif 25*9f0612e4SBarry Smith # if !defined(MAX) 26*9f0612e4SBarry Smith # define MAX(x,y) ((x)>(y) ? (x) : (y)) 27*9f0612e4SBarry Smith # endif 28*9f0612e4SBarry Smith 29*9f0612e4SBarry Smith static double a[64][10000000],b[64][10000000],c[64][10000000]; 30*9f0612e4SBarry Smith static double mintime = FLT_MAX; 31*9f0612e4SBarry Smith static double bytes = 3 * sizeof(double) * N; 32*9f0612e4SBarry Smith 33*9f0612e4SBarry Smith int main() 34*9f0612e4SBarry Smith { 35*9f0612e4SBarry Smith const static double scalar = 3.0; 36*9f0612e4SBarry Smith #pragma omp threadprivate(scalar) 37*9f0612e4SBarry Smith double times[NTIMES],rate; 38*9f0612e4SBarry Smith int size; 39*9f0612e4SBarry Smith static int n; 40*9f0612e4SBarry Smith #pragma omp threadprivate(n) 41*9f0612e4SBarry Smith char *env; 42*9f0612e4SBarry Smith FILE *fd; 43*9f0612e4SBarry Smith 44*9f0612e4SBarry Smith env = getenv("OMP_NUM_THREADS"); 45*9f0612e4SBarry Smith if (!env) env = (char *) "1"; 46*9f0612e4SBarry Smith sscanf(env,"%d",&size); 47*9f0612e4SBarry Smith 48*9f0612e4SBarry Smith #pragma omp parallel for schedule(static) 49*9f0612e4SBarry Smith for (int j=0; j<size; j++) { 50*9f0612e4SBarry Smith n = (N / size + ((N % size) > omp_get_thread_num())); 51*9f0612e4SBarry Smith for (int i=0; i<n; i++){ 52*9f0612e4SBarry Smith a[j][i] = 1.0; 53*9f0612e4SBarry Smith b[j][i] = 2.0; 54*9f0612e4SBarry Smith c[j][i] = 3.0; 55*9f0612e4SBarry Smith } 56*9f0612e4SBarry Smith } 57*9f0612e4SBarry Smith 58*9f0612e4SBarry Smith /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 59*9f0612e4SBarry Smith for (int k=0; k<NTIMES; k++) 60*9f0612e4SBarry Smith { 61*9f0612e4SBarry Smith times[k] = MPI_Wtime(); 62*9f0612e4SBarry Smith // https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf 63*9f0612e4SBarry Smith // #pragma omp parallel for (same performance as below) 64*9f0612e4SBarry Smith // #pragma omp parallel for simd schedule(static) (same performance as below) 65*9f0612e4SBarry Smith #pragma omp parallel for schedule(static) 66*9f0612e4SBarry Smith for (int j=0; j<size; j++) { 67*9f0612e4SBarry Smith n = (N / size + ((N % size) > omp_get_thread_num())); 68*9f0612e4SBarry Smith double *aa = a[j]; // these don't change the timings 69*9f0612e4SBarry Smith const double *bb = b[j]; 70*9f0612e4SBarry Smith const double *cc = c[j]; 71*9f0612e4SBarry Smith for (int l=0; l<NTIMESINNER; l++) { 72*9f0612e4SBarry Smith for (register int i=0; i<n; i++) aa[i] = bb[i]+scalar*cc[i]; 73*9f0612e4SBarry Smith if (size == 65) printf("never printed %g\n",a[0][11]); 74*9f0612e4SBarry Smith } 75*9f0612e4SBarry Smith } 76*9f0612e4SBarry Smith times[k] = MPI_Wtime() - times[k]; 77*9f0612e4SBarry Smith } 78*9f0612e4SBarry Smith for (int k=1; k<NTIMES; k++) { /* note -- skip first iteration */ 79*9f0612e4SBarry Smith mintime = MIN(mintime, times[k]); 80*9f0612e4SBarry Smith } 81*9f0612e4SBarry Smith rate = 1.0E-06 * bytes*NTIMESINNER/mintime; 82*9f0612e4SBarry Smith 83*9f0612e4SBarry Smith if (size == 1) { 84*9f0612e4SBarry Smith printf("%d %11.4f Rate (MB/s) 1\n",size, rate); 85*9f0612e4SBarry Smith fd = fopen("flops","w"); 86*9f0612e4SBarry Smith fprintf(fd,"%g\n",rate); 87*9f0612e4SBarry Smith fclose(fd); 88*9f0612e4SBarry Smith } else { 89*9f0612e4SBarry Smith double prate; 90*9f0612e4SBarry Smith fd = fopen("flops","r"); 91*9f0612e4SBarry Smith fscanf(fd,"%lg",&prate); 92*9f0612e4SBarry Smith fclose(fd); 93*9f0612e4SBarry Smith printf("%d %11.4f Rate (MB/s) %g \n", size, rate,rate/prate); 94*9f0612e4SBarry Smith } 95*9f0612e4SBarry Smith return 0; 96*9f0612e4SBarry Smith } 97