xref: /petsc/src/sys/tests/ex69f.F90 (revision 9b88ac225e01f016352a5f4cd90e158abe5f5675)
1*c5e229c2SMartin Diehl#include <petsc/finclude/petscsys.h>
29f0612e4SBarry Smithprogram ex69F90
39f0612e4SBarry Smith
49f0612e4SBarry Smith!   Demonstrates two issues
59f0612e4SBarry Smith!
69f0612e4SBarry Smith!   A) How using mpiexec to start up a program can dramatically change
79f0612e4SBarry Smith!      the OpenMP thread binding/mapping resulting in poor performance
89f0612e4SBarry Smith!
99f0612e4SBarry Smith!      Set the environmental variable with, for example,
109f0612e4SBarry Smith!        export OMP_NUM_THREADS=4
119f0612e4SBarry Smith!      Run this example on one MPI process three ways
129f0612e4SBarry Smith!        ./ex69f
139f0612e4SBarry Smith!        mpiexec -n 1 ./ex69f
149f0612e4SBarry Smith!        mpiexec --bind-to numa -n 1 ./ex69f
159f0612e4SBarry Smith!
169f0612e4SBarry Smith!      You may get very different wall clock times
17d7c1f440SPierre Jolivet!      It seems some mpiexec implementations change the thread binding/mapping that results with
189f0612e4SBarry Smith!      OpenMP so all the threads are run on a single core
199f0612e4SBarry Smith!
209f0612e4SBarry Smith!      The same differences occur without the PetscInitialize() call indicating
219f0612e4SBarry Smith!      the binding change is done by the mpiexec, not the MPI_Init()
229f0612e4SBarry Smith!
239f0612e4SBarry Smith!   B) How cpu_time() may give unexpected results, much larger than expected,
249f0612e4SBarry Smith!      even for code portions with no OpenMP
259f0612e4SBarry Smith!
269f0612e4SBarry Smith!      Note the CPU time for output of the second loop, it should equal the wallclock time
279f0612e4SBarry Smith!      since the loop is not run in parallel (with OpenMP) but instead it may be listed as
289f0612e4SBarry Smith!      many times higher
299f0612e4SBarry Smith!
309f0612e4SBarry Smith!     $ OMP_NUM_THREADS=8 ./ex69f (ifort compiler)
319f0612e4SBarry Smith!       CPU time reported by cpu_time()              1.66649300000000
329f0612e4SBarry Smith!       Wall clock time reported by system_clock()   0.273980000000000
339f0612e4SBarry Smith!       Wall clock time reported by omp_get_wtime()  0.273979902267456
349f0612e4SBarry Smith!
359f0612e4SBarry Smith  use petsc
369f0612e4SBarry Smith  implicit none
379f0612e4SBarry Smith
389f0612e4SBarry Smith  PetscErrorCode ierr
399f0612e4SBarry Smith  double precision cputime_start, cputime_end, wtime_start, wtime_end, omp_get_wtime
409f0612e4SBarry Smith  integer(kind=8) systime_start, systime_end, systime_rate
419f0612e4SBarry Smith  double precision x(100)
429f0612e4SBarry Smith  integer i, maxthreads, omp_get_max_threads
439f0612e4SBarry Smith
449f0612e4SBarry Smith  PetscCallA(PetscInitialize(ierr))
459f0612e4SBarry Smith  call system_clock(systime_start, systime_rate)
469f0612e4SBarry Smith  wtime_start = omp_get_wtime()
479f0612e4SBarry Smith  call cpu_time(cputime_start)
489f0612e4SBarry Smith!$OMP PARALLEL DO
499f0612e4SBarry Smith  do i = 1, 100
509f0612e4SBarry Smith    x(i) = exp(3.0d0*i)
519f0612e4SBarry Smith  end do
529f0612e4SBarry Smith  call cpu_time(cputime_end)
539f0612e4SBarry Smith  call system_clock(systime_end, systime_rate)
549f0612e4SBarry Smith  wtime_end = omp_get_wtime()
559f0612e4SBarry Smith  print *, 'CPU time reported by cpu_time()            ', cputime_end - cputime_start
569f0612e4SBarry Smith  print *, 'Wall clock time reported by system_clock() ', real(systime_end - systime_start, kind=8)/real(systime_rate, kind=8)
579f0612e4SBarry Smith  print *, 'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start
589f0612e4SBarry Smith  print *, 'Value of x(22)', x(22)
599f0612e4SBarry Smith!$ maxthreads = omp_get_max_threads()
609f0612e4SBarry Smith  print *, 'Number of threads set', maxthreads
619f0612e4SBarry Smith
629f0612e4SBarry Smith  call system_clock(systime_start, systime_rate)
639f0612e4SBarry Smith  wtime_start = omp_get_wtime()
649f0612e4SBarry Smith  call cpu_time(cputime_start)
659f0612e4SBarry Smith  do i = 1, 100
669f0612e4SBarry Smith    x(i) = exp(3.0d0*i)
679f0612e4SBarry Smith  end do
689f0612e4SBarry Smith  call cpu_time(cputime_end)
699f0612e4SBarry Smith  call system_clock(systime_end, systime_rate)
709f0612e4SBarry Smith  wtime_end = omp_get_wtime()
719f0612e4SBarry Smith  print *, 'CPU time reported by cpu_time()            ', cputime_end - cputime_start
729f0612e4SBarry Smith  print *, 'Wall clock time reported by system_clock() ', real(systime_end - systime_start, kind=8)/real(systime_rate, kind=8)
739f0612e4SBarry Smith  print *, 'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start
749f0612e4SBarry Smith  print *, 'Value of x(22)', x(22)
759f0612e4SBarry Smith  PetscCallA(PetscFinalize(ierr))
769f0612e4SBarry Smithend program ex69F90
779f0612e4SBarry Smith
789f0612e4SBarry Smith!/*TEST
799f0612e4SBarry Smith!
809f0612e4SBarry Smith!   build:
819f0612e4SBarry Smith!     requires: openmp
829f0612e4SBarry Smith!
839f0612e4SBarry Smith!   test:
849f0612e4SBarry Smith!     filter: grep -v "Number of threads"
859f0612e4SBarry Smith!
869f0612e4SBarry Smith!TEST*/
87