xref: /petsc/src/sys/tests/ex69f.F90 (revision 5c7eeb11becdfeb7b242fcc1fa72a9500cb0aba8)
1program ex69F90
2
3!   Demonstrates two issues
4!
5!   A) How using mpiexec to start up a program can dramatically change
6!      the OpenMP thread binding/mapping resulting in poor performance
7!
8!      Set the environmental variable with, for example,
9!        export OMP_NUM_THREADS=4
10!      Run this example on one MPI process three ways
11!        ./ex69f
12!        mpiexec -n 1 ./ex69f
13!        mpiexec --bind-to numa -n 1 ./ex69f
14!
15!      You may get very different wall clock times
16!      It seems some mpiexec implementations change the thread binding/mapping that results with
17!      OpenMP so all the threads are run on a single core
18!
19!      The same differences occur without the PetscInitialize() call indicating
20!      the binding change is done by the mpiexec, not the MPI_Init()
21!
22!   B) How cpu_time() may give unexpected results, much larger than expected,
23!      even for code portions with no OpenMP
24!
25!      Note the CPU time for output of the second loop, it should equal the wallclock time
26!      since the loop is not run in parallel (with OpenMP) but instead it may be listed as
27!      many times higher
28!
29!     $ OMP_NUM_THREADS=8 ./ex69f (ifort compiler)
30!       CPU time reported by cpu_time()              1.66649300000000
31!       Wall clock time reported by system_clock()   0.273980000000000
32!       Wall clock time reported by omp_get_wtime()  0.273979902267456
33!
34#include <petsc/finclude/petscsys.h>
35  use petsc
36  implicit none
37
38  PetscErrorCode ierr
39  double precision cputime_start, cputime_end, wtime_start, wtime_end, omp_get_wtime
40  integer(kind=8) systime_start, systime_end, systime_rate
41  double precision x(100)
42  integer i, maxthreads, omp_get_max_threads
43
44  PetscCallA(PetscInitialize(ierr))
45  call system_clock(systime_start, systime_rate)
46  wtime_start = omp_get_wtime()
47  call cpu_time(cputime_start)
48!$OMP PARALLEL DO
49  do i = 1, 100
50    x(i) = exp(3.0d0*i)
51  end do
52  call cpu_time(cputime_end)
53  call system_clock(systime_end, systime_rate)
54  wtime_end = omp_get_wtime()
55  print *, 'CPU time reported by cpu_time()            ', cputime_end - cputime_start
56  print *, 'Wall clock time reported by system_clock() ', real(systime_end - systime_start, kind=8)/real(systime_rate, kind=8)
57  print *, 'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start
58  print *, 'Value of x(22)', x(22)
59!$ maxthreads = omp_get_max_threads()
60  print *, 'Number of threads set', maxthreads
61
62  call system_clock(systime_start, systime_rate)
63  wtime_start = omp_get_wtime()
64  call cpu_time(cputime_start)
65  do i = 1, 100
66    x(i) = exp(3.0d0*i)
67  end do
68  call cpu_time(cputime_end)
69  call system_clock(systime_end, systime_rate)
70  wtime_end = omp_get_wtime()
71  print *, 'CPU time reported by cpu_time()            ', cputime_end - cputime_start
72  print *, 'Wall clock time reported by system_clock() ', real(systime_end - systime_start, kind=8)/real(systime_rate, kind=8)
73  print *, 'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start
74  print *, 'Value of x(22)', x(22)
75  PetscCallA(PetscFinalize(ierr))
76end program ex69F90
77
78!/*TEST
79!
80!   build:
81!     requires: openmp
82!
83!   test:
84!     filter: grep -v "Number of threads"
85!
86!TEST*/
87