xref: /petsc/src/sys/tests/ex69f.F90 (revision a336c15037c72f93cd561f5a5e11e93175f2efd9)
1#include <petsc/finclude/petscsys.h>
2program ex69F90
3
4!   Demonstrates two issues
5!
6!   A) How using mpiexec to start up a program can dramatically change
7!      the OpenMP thread binding/mapping resulting in poor performance
8!
9!      Set the environmental variable with, for example,
10!        export OMP_NUM_THREADS=4
11!      Run this example on one MPI process three ways
12!        ./ex69f
13!        mpiexec -n 1 ./ex69f
14!        mpiexec --bind-to numa -n 1 ./ex69f
15!
16!      You may get very different wall clock times
17!      It seems some mpiexec implementations change the thread binding/mapping that results with
18!      OpenMP so all the threads are run on a single core
19!
20!      The same differences occur without the PetscInitialize() call indicating
21!      the binding change is done by the mpiexec, not the MPI_Init()
22!
23!   B) How cpu_time() may give unexpected results, much larger than expected,
24!      even for code portions with no OpenMP
25!
26!      Note the CPU time for output of the second loop, it should equal the wallclock time
27!      since the loop is not run in parallel (with OpenMP) but instead it may be listed as
28!      many times higher
29!
30!     $ OMP_NUM_THREADS=8 ./ex69f (ifort compiler)
31!       CPU time reported by cpu_time()              1.66649300000000
32!       Wall clock time reported by system_clock()   0.273980000000000
33!       Wall clock time reported by omp_get_wtime()  0.273979902267456
34!
35  use petsc
36  implicit none
37
38  PetscErrorCode ierr
39  double precision cputime_start, cputime_end, wtime_start, wtime_end, omp_get_wtime
40  integer(kind=8) systime_start, systime_end, systime_rate
41  double precision x(100)
42  integer i, maxthreads, omp_get_max_threads
43
44  PetscCallA(PetscInitialize(ierr))
45  call system_clock(systime_start, systime_rate)
46  wtime_start = omp_get_wtime()
47  call cpu_time(cputime_start)
48!$OMP PARALLEL DO
49  do i = 1, 100
50    x(i) = exp(3.0d0*i)
51  end do
52  call cpu_time(cputime_end)
53  call system_clock(systime_end, systime_rate)
54  wtime_end = omp_get_wtime()
55  print *, 'CPU time reported by cpu_time()            ', cputime_end - cputime_start
56  print *, 'Wall clock time reported by system_clock() ', real(systime_end - systime_start, kind=8)/real(systime_rate, kind=8)
57  print *, 'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start
58  print *, 'Value of x(22)', x(22)
59!$ maxthreads = omp_get_max_threads()
60  print *, 'Number of threads set', maxthreads
61
62  call system_clock(systime_start, systime_rate)
63  wtime_start = omp_get_wtime()
64  call cpu_time(cputime_start)
65  do i = 1, 100
66    x(i) = exp(3.0d0*i)
67  end do
68  call cpu_time(cputime_end)
69  call system_clock(systime_end, systime_rate)
70  wtime_end = omp_get_wtime()
71  print *, 'CPU time reported by cpu_time()            ', cputime_end - cputime_start
72  print *, 'Wall clock time reported by system_clock() ', real(systime_end - systime_start, kind=8)/real(systime_rate, kind=8)
73  print *, 'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start
74  print *, 'Value of x(22)', x(22)
75  PetscCallA(PetscFinalize(ierr))
76end program ex69F90
77
78!/*TEST
79!
80!   build:
81!     requires: openmp
82!
83!   test:
84!     filter: grep -v "Number of threads"
85!
86!TEST*/
87