1*c5e229c2SMartin Diehl#include <petsc/finclude/petscsys.h> 29f0612e4SBarry Smithprogram ex69F90 39f0612e4SBarry Smith 49f0612e4SBarry Smith! Demonstrates two issues 59f0612e4SBarry Smith! 69f0612e4SBarry Smith! A) How using mpiexec to start up a program can dramatically change 79f0612e4SBarry Smith! the OpenMP thread binding/mapping resulting in poor performance 89f0612e4SBarry Smith! 99f0612e4SBarry Smith! Set the environmental variable with, for example, 109f0612e4SBarry Smith! export OMP_NUM_THREADS=4 119f0612e4SBarry Smith! Run this example on one MPI process three ways 129f0612e4SBarry Smith! ./ex69f 139f0612e4SBarry Smith! mpiexec -n 1 ./ex69f 149f0612e4SBarry Smith! mpiexec --bind-to numa -n 1 ./ex69f 159f0612e4SBarry Smith! 169f0612e4SBarry Smith! You may get very different wall clock times 17d7c1f440SPierre Jolivet! It seems some mpiexec implementations change the thread binding/mapping that results with 189f0612e4SBarry Smith! OpenMP so all the threads are run on a single core 199f0612e4SBarry Smith! 209f0612e4SBarry Smith! The same differences occur without the PetscInitialize() call indicating 219f0612e4SBarry Smith! the binding change is done by the mpiexec, not the MPI_Init() 229f0612e4SBarry Smith! 239f0612e4SBarry Smith! B) How cpu_time() may give unexpected results, much larger than expected, 249f0612e4SBarry Smith! even for code portions with no OpenMP 259f0612e4SBarry Smith! 269f0612e4SBarry Smith! Note the CPU time for output of the second loop, it should equal the wallclock time 279f0612e4SBarry Smith! since the loop is not run in parallel (with OpenMP) but instead it may be listed as 289f0612e4SBarry Smith! many times higher 299f0612e4SBarry Smith! 309f0612e4SBarry Smith! $ OMP_NUM_THREADS=8 ./ex69f (ifort compiler) 319f0612e4SBarry Smith! CPU time reported by cpu_time() 1.66649300000000 329f0612e4SBarry Smith! Wall clock time reported by system_clock() 0.273980000000000 339f0612e4SBarry Smith! Wall clock time reported by omp_get_wtime() 0.273979902267456 349f0612e4SBarry Smith! 359f0612e4SBarry Smith use petsc 369f0612e4SBarry Smith implicit none 379f0612e4SBarry Smith 389f0612e4SBarry Smith PetscErrorCode ierr 399f0612e4SBarry Smith double precision cputime_start, cputime_end, wtime_start, wtime_end, omp_get_wtime 409f0612e4SBarry Smith integer(kind=8) systime_start, systime_end, systime_rate 419f0612e4SBarry Smith double precision x(100) 429f0612e4SBarry Smith integer i, maxthreads, omp_get_max_threads 439f0612e4SBarry Smith 449f0612e4SBarry Smith PetscCallA(PetscInitialize(ierr)) 459f0612e4SBarry Smith call system_clock(systime_start, systime_rate) 469f0612e4SBarry Smith wtime_start = omp_get_wtime() 479f0612e4SBarry Smith call cpu_time(cputime_start) 489f0612e4SBarry Smith!$OMP PARALLEL DO 499f0612e4SBarry Smith do i = 1, 100 509f0612e4SBarry Smith x(i) = exp(3.0d0*i) 519f0612e4SBarry Smith end do 529f0612e4SBarry Smith call cpu_time(cputime_end) 539f0612e4SBarry Smith call system_clock(systime_end, systime_rate) 549f0612e4SBarry Smith wtime_end = omp_get_wtime() 559f0612e4SBarry Smith print *, 'CPU time reported by cpu_time() ', cputime_end - cputime_start 569f0612e4SBarry Smith print *, 'Wall clock time reported by system_clock() ', real(systime_end - systime_start, kind=8)/real(systime_rate, kind=8) 579f0612e4SBarry Smith print *, 'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start 589f0612e4SBarry Smith print *, 'Value of x(22)', x(22) 599f0612e4SBarry Smith!$ maxthreads = omp_get_max_threads() 609f0612e4SBarry Smith print *, 'Number of threads set', maxthreads 619f0612e4SBarry Smith 629f0612e4SBarry Smith call system_clock(systime_start, systime_rate) 639f0612e4SBarry Smith wtime_start = omp_get_wtime() 649f0612e4SBarry Smith call cpu_time(cputime_start) 659f0612e4SBarry Smith do i = 1, 100 669f0612e4SBarry Smith x(i) = exp(3.0d0*i) 679f0612e4SBarry Smith end do 689f0612e4SBarry Smith call cpu_time(cputime_end) 699f0612e4SBarry Smith call system_clock(systime_end, systime_rate) 709f0612e4SBarry Smith wtime_end = omp_get_wtime() 719f0612e4SBarry Smith print *, 'CPU time reported by cpu_time() ', cputime_end - cputime_start 729f0612e4SBarry Smith print *, 'Wall clock time reported by system_clock() ', real(systime_end - systime_start, kind=8)/real(systime_rate, kind=8) 739f0612e4SBarry Smith print *, 'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start 749f0612e4SBarry Smith print *, 'Value of x(22)', x(22) 759f0612e4SBarry Smith PetscCallA(PetscFinalize(ierr)) 769f0612e4SBarry Smithend program ex69F90 779f0612e4SBarry Smith 789f0612e4SBarry Smith!/*TEST 799f0612e4SBarry Smith! 809f0612e4SBarry Smith! build: 819f0612e4SBarry Smith! requires: openmp 829f0612e4SBarry Smith! 839f0612e4SBarry Smith! test: 849f0612e4SBarry Smith! filter: grep -v "Number of threads" 859f0612e4SBarry Smith! 869f0612e4SBarry Smith!TEST*/ 87