1 /* 2 A simplification of the Stream benchmark for OpenMP 3 The array for each thread is a large distance from the array for the other threads 4 Original code developed by John D. McCalpin 5 */ 6 #include <stdio.h> 7 #include <math.h> 8 #include <limits.h> 9 #include <float.h> 10 #include <sys/time.h> 11 #include <stdlib.h> 12 #include <omp.h> 13 #include <petscsys.h> 14 15 #define NTIMESINNER 1 16 #define N 2*4*20000000 17 //#define N 1200000 18 //#define N 120000 19 #define NTIMES 50 20 #define OFFSET 0 21 22 # if !defined(MIN) 23 # define MIN(x,y) ((x)<(y) ? (x) : (y)) 24 # endif 25 # if !defined(MAX) 26 # define MAX(x,y) ((x)>(y) ? (x) : (y)) 27 # endif 28 29 static double a[64][10000000],b[64][10000000],c[64][10000000]; 30 static double mintime = FLT_MAX; 31 static double bytes = 3 * sizeof(double) * N; 32 33 int main() 34 { 35 const static double scalar = 3.0; 36 #pragma omp threadprivate(scalar) 37 double times[NTIMES],rate; 38 int size; 39 static int n; 40 #pragma omp threadprivate(n) 41 char *env; 42 FILE *fd; 43 44 env = getenv("OMP_NUM_THREADS"); 45 if (!env) env = (char *) "1"; 46 sscanf(env,"%d",&size); 47 48 #pragma omp parallel for schedule(static) 49 for (int j=0; j<size; j++) { 50 n = (N / size + ((N % size) > omp_get_thread_num())); 51 for (int i=0; i<n; i++){ 52 a[j][i] = 1.0; 53 b[j][i] = 2.0; 54 c[j][i] = 3.0; 55 } 56 } 57 58 /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 59 for (int k=0; k<NTIMES; k++) 60 { 61 times[k] = MPI_Wtime(); 62 // https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf 63 // #pragma omp parallel for (same performance as below) 64 // #pragma omp parallel for simd schedule(static) (same performance as below) 65 #pragma omp parallel for schedule(static) 66 for (int j=0; j<size; j++) { 67 n = (N / size + ((N % size) > omp_get_thread_num())); 68 double *aa = a[j]; // these don't change the timings 69 const double *bb = b[j]; 70 const double *cc = c[j]; 71 for (int l=0; l<NTIMESINNER; l++) { 72 for (register int i=0; i<n; i++) aa[i] = bb[i]+scalar*cc[i]; 73 if (size == 65) printf("never printed %g\n",a[0][11]); 74 } 75 } 76 times[k] = MPI_Wtime() - times[k]; 77 } 78 for (int k=1; k<NTIMES; k++) { /* note -- skip first iteration */ 79 mintime = MIN(mintime, times[k]); 80 } 81 rate = 1.0E-06 * bytes*NTIMESINNER/mintime; 82 83 if (size == 1) { 84 printf("%d %11.4f Rate (MB/s) 1\n",size, rate); 85 fd = fopen("flops","w"); 86 fprintf(fd,"%g\n",rate); 87 fclose(fd); 88 } else { 89 double prate; 90 fd = fopen("flops","r"); 91 fscanf(fd,"%lg",&prate); 92 fclose(fd); 93 printf("%d %11.4f Rate (MB/s) %g \n", size, rate,rate/prate); 94 } 95 return 0; 96 } 97