19f0612e4SBarry Smith /* 29f0612e4SBarry Smith A simplification of the Stream benchmark for OpenMP 39f0612e4SBarry Smith The array for each thread is a large distance from the array for the other threads 49f0612e4SBarry Smith Original code developed by John D. McCalpin 59f0612e4SBarry Smith */ 69f0612e4SBarry Smith #include <stdio.h> 79f0612e4SBarry Smith #include <math.h> 89f0612e4SBarry Smith #include <limits.h> 99f0612e4SBarry Smith #include <float.h> 109f0612e4SBarry Smith #include <sys/time.h> 119f0612e4SBarry Smith #include <stdlib.h> 129f0612e4SBarry Smith #include <omp.h> 139f0612e4SBarry Smith #include <petscsys.h> 149f0612e4SBarry Smith 159f0612e4SBarry Smith #define NTIMESINNER 1 169f0612e4SBarry Smith #define N 2 * 4 * 20000000 179f0612e4SBarry Smith //#define N 1200000 189f0612e4SBarry Smith //#define N 120000 199f0612e4SBarry Smith #define NTIMES 50 209f0612e4SBarry Smith #define OFFSET 0 219f0612e4SBarry Smith 229f0612e4SBarry Smith #if !defined(MIN) 239f0612e4SBarry Smith #define MIN(x, y) ((x) < (y) ? (x) : (y)) 249f0612e4SBarry Smith #endif 259f0612e4SBarry Smith #if !defined(MAX) 269f0612e4SBarry Smith #define MAX(x, y) ((x) > (y) ? (x) : (y)) 279f0612e4SBarry Smith #endif 289f0612e4SBarry Smith 299f0612e4SBarry Smith static double a[64][10000000], b[64][10000000], c[64][10000000]; 309f0612e4SBarry Smith static double mintime = FLT_MAX; 319f0612e4SBarry Smith static double bytes = 3 * sizeof(double) * N; 329f0612e4SBarry Smith 339f0612e4SBarry Smith int main() 349f0612e4SBarry Smith { 359f0612e4SBarry Smith const static double scalar = 3.0; 369f0612e4SBarry Smith #pragma omp threadprivate(scalar) 379f0612e4SBarry Smith double times[NTIMES], rate; 389f0612e4SBarry Smith int size; 399f0612e4SBarry Smith static int n; 409f0612e4SBarry Smith #pragma omp threadprivate(n) 419f0612e4SBarry Smith char *env; 429f0612e4SBarry Smith FILE *fd; 439f0612e4SBarry Smith 449f0612e4SBarry Smith env = getenv("OMP_NUM_THREADS"); 459f0612e4SBarry Smith if (!env) env = (char *)"1"; 469f0612e4SBarry Smith sscanf(env, "%d", &size); 479f0612e4SBarry Smith 489f0612e4SBarry Smith #pragma omp parallel for schedule(static) 499f0612e4SBarry Smith for (int j = 0; j < size; j++) { 509f0612e4SBarry Smith n = (N / size + ((N % size) > omp_get_thread_num())); 519f0612e4SBarry Smith for (int i = 0; i < n; i++) { 529f0612e4SBarry Smith a[j][i] = 1.0; 539f0612e4SBarry Smith b[j][i] = 2.0; 549f0612e4SBarry Smith c[j][i] = 3.0; 559f0612e4SBarry Smith } 569f0612e4SBarry Smith } 579f0612e4SBarry Smith 589f0612e4SBarry Smith /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 59*67595998SJunchao Zhang for (int k = 0; k < NTIMES; k++) { 609f0612e4SBarry Smith times[k] = MPI_Wtime(); 619f0612e4SBarry Smith // https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf 629f0612e4SBarry Smith // #pragma omp parallel for (same performance as below) 639f0612e4SBarry Smith // #pragma omp parallel for simd schedule(static) (same performance as below) 649f0612e4SBarry Smith #pragma omp parallel for schedule(static) 659f0612e4SBarry Smith for (int j = 0; j < size; j++) { 669f0612e4SBarry Smith n = (N / size + ((N % size) > omp_get_thread_num())); 679f0612e4SBarry Smith double *aa = a[j]; // these don't change the timings 689f0612e4SBarry Smith const double *bb = b[j]; 699f0612e4SBarry Smith const double *cc = c[j]; 709f0612e4SBarry Smith for (int l = 0; l < NTIMESINNER; l++) { 719f0612e4SBarry Smith for (register int i = 0; i < n; i++) aa[i] = bb[i] + scalar * cc[i]; 729f0612e4SBarry Smith if (size == 65) printf("never printed %g\n", a[0][11]); 739f0612e4SBarry Smith } 749f0612e4SBarry Smith } 759f0612e4SBarry Smith times[k] = MPI_Wtime() - times[k]; 769f0612e4SBarry Smith } 779f0612e4SBarry Smith for (int k = 1; k < NTIMES; k++) { /* note -- skip first iteration */ 789f0612e4SBarry Smith mintime = MIN(mintime, times[k]); 799f0612e4SBarry Smith } 809f0612e4SBarry Smith rate = 1.0E-06 * bytes * NTIMESINNER / mintime; 819f0612e4SBarry Smith 829f0612e4SBarry Smith if (size == 1) { 839f0612e4SBarry Smith printf("%d %11.4f Rate (MB/s) 1\n", size, rate); 849f0612e4SBarry Smith fd = fopen("flops", "w"); 859f0612e4SBarry Smith fprintf(fd, "%g\n", rate); 869f0612e4SBarry Smith fclose(fd); 879f0612e4SBarry Smith } else { 889f0612e4SBarry Smith double prate; 899f0612e4SBarry Smith fd = fopen("flops", "r"); 909f0612e4SBarry Smith fscanf(fd, "%lg", &prate); 919f0612e4SBarry Smith fclose(fd); 929f0612e4SBarry Smith printf("%d %11.4f Rate (MB/s) %g\n", size, rate, rate / prate); 939f0612e4SBarry Smith } 949f0612e4SBarry Smith return 0; 959f0612e4SBarry Smith } 96