19f0612e4SBarry Smith /* 29f0612e4SBarry Smith A simplification of the Stream benchmark for OpenMP 39f0612e4SBarry Smith Original code developed by John D. McCalpin 49f0612e4SBarry Smith */ 55d28107eSBarry Smith #include <stdio.h> 65d28107eSBarry Smith #include <math.h> 75d28107eSBarry Smith #include <limits.h> 80d04baf8SBarry Smith #include <float.h> 95d28107eSBarry Smith #include <sys/time.h> 104198fb66SBarry Smith #include <stdlib.h> 119f0612e4SBarry Smith #include <petscsys.h> 125d28107eSBarry Smith 139f0612e4SBarry Smith //#define N 2*4*20000000 149f0612e4SBarry Smith #define N 80000000 159f0612e4SBarry Smith //#define N 1200000 169f0612e4SBarry Smith //#define N 120000 17511c7730SShri Abhyankar #define NTIMES 50 185d28107eSBarry Smith #define OFFSET 0 195d28107eSBarry Smith 20519f805aSKarl Rupp #if !defined(MIN) 215d28107eSBarry Smith #define MIN(x, y) ((x) < (y) ? (x) : (y)) 225d28107eSBarry Smith #endif 23519f805aSKarl Rupp #if !defined(MAX) 245d28107eSBarry Smith #define MAX(x, y) ((x) > (y) ? (x) : (y)) 255d28107eSBarry Smith #endif 265d28107eSBarry Smith 279f0612e4SBarry Smith static double a[N + OFFSET], b[N + OFFSET], c[N + OFFSET]; 289f0612e4SBarry Smith static double mintime = FLT_MAX; 299f0612e4SBarry Smith static double bytes = 3 * sizeof(double) * N; 305d28107eSBarry Smith 319f0612e4SBarry Smith int main(int argc, char **argv) 325d28107eSBarry Smith { 339f0612e4SBarry Smith MPI_Init(&argc, &argv); 349f0612e4SBarry Smith const static double scalar = 3.0; 359f0612e4SBarry Smith #pragma omp threadprivate(scalar) 369f0612e4SBarry Smith double times[NTIMES], rate; 374198fb66SBarry Smith int size; 384198fb66SBarry Smith char *env; 394198fb66SBarry Smith FILE *fd; 405d28107eSBarry Smith 414198fb66SBarry Smith env = getenv("OMP_NUM_THREADS"); 429f0612e4SBarry Smith if (!env) env = (char *)"1"; 434198fb66SBarry Smith sscanf(env, "%d", &size); 445d28107eSBarry Smith 459f0612e4SBarry Smith #pragma omp parallel for schedule(static) 469f0612e4SBarry Smith for (int j = 0; j < N; j++) { 475d28107eSBarry Smith a[j] = 1.0; 485d28107eSBarry Smith b[j] = 2.0; 499f0612e4SBarry Smith c[j] = 3.0; 505d28107eSBarry Smith } 515d28107eSBarry Smith 525d28107eSBarry Smith /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ 53*67595998SJunchao Zhang for (int k = 0; k < NTIMES; k++) { 549f0612e4SBarry Smith times[k] = MPI_Wtime(); 559f0612e4SBarry Smith // https://www.openmp.org/wp-content/uploads/OpenMP-API-Specification-5-2.pdf 569f0612e4SBarry Smith // #pragma omp parallel for (same performance as below) 579f0612e4SBarry Smith // #pragma omp parallel for simd schedule(static) (same performance as below) 589f0612e4SBarry Smith #pragma omp parallel for schedule(static) 599f0612e4SBarry Smith for (register int j = 0; j < N; j++) a[j] = b[j] + scalar * c[j]; 609f0612e4SBarry Smith times[k] = MPI_Wtime() - times[k]; 619f0612e4SBarry Smith } 629f0612e4SBarry Smith for (int k = 1; k < NTIMES; k++) { /* note -- skip first iteration */ 639f0612e4SBarry Smith mintime = MIN(mintime, times[k]); 645d28107eSBarry Smith } 655d28107eSBarry Smith 669f0612e4SBarry Smith if (size == 65) printf("Never printed %g\n", a[11]); 679f0612e4SBarry Smith rate = 1.0E-06 * bytes / mintime; 684198fb66SBarry Smith 694198fb66SBarry Smith if (size == 1) { 709f0612e4SBarry Smith printf("%d %11.4f Rate (MB/s) 1\n", size, rate); 714198fb66SBarry Smith fd = fopen("flops", "w"); 724198fb66SBarry Smith fprintf(fd, "%g\n", rate); 734198fb66SBarry Smith fclose(fd); 744198fb66SBarry Smith } else { 754198fb66SBarry Smith double prate; 764198fb66SBarry Smith fd = fopen("flops", "r"); 774198fb66SBarry Smith fscanf(fd, "%lg", &prate); 784198fb66SBarry Smith fclose(fd); 794198fb66SBarry Smith printf("%d %11.4f Rate (MB/s) %g\n", size, rate, rate / prate); 804198fb66SBarry Smith } 819f0612e4SBarry Smith MPI_Finalize(); 825d28107eSBarry Smith return 0; 835d28107eSBarry Smith } 84