xref: /petsc/src/sys/tests/ex69f.F90 (revision b63c620b66da884efdf690e70d6f9f5511a4e974)
1    program ex69F90
2
3!   Demonstrates two issues
4!
5!   A) How using mpiexec to start up a program can dramatically change
6!      the OpenMP thread binding/mapping resulting in poor performance
7!
8!      Set the environmental variable with, for example,
9!        export OMP_NUM_THREADS=4
10!      Run this example on one MPI process three ways
11!        ./ex69f
12!        mpiexec -n 1 ./ex69f
13!        mpiexec --bind-to numa -n 1 ./ex69f
14!
15!      You may get very different wall clock times
16!      It seems some mpiexec implementations change the thread binding/mapping that results with
17!      OpenMP so all the threads are run on a single core
18!
19!      The same differences occur without the PetscInitialize() call indicating
20!      the binding change is done by the mpiexec, not the MPI_Init()
21!
22!   B) How cpu_time() may give unexpected results, much larger than expected,
23!      even for code portions with no OpenMP
24!
25!      Note the CPU time for output of the second loop, it should equal the wallclock time
26!      since the loop is not run in parallel (with OpenMP) but instead it may be listed as
27!      many times higher
28!
29!     $ OMP_NUM_THREADS=8 ./ex69f (ifort compiler)
30!       CPU time reported by cpu_time()              1.66649300000000
31!       Wall clock time reported by system_clock()   0.273980000000000
32!       Wall clock time reported by omp_get_wtime()  0.273979902267456
33!
34#include <petsc/finclude/petscsys.h>
35    use petsc
36    implicit none
37
38    PetscErrorCode ierr
39    double precision cputime_start,cputime_end,wtime_start,wtime_end,omp_get_wtime
40    integer(kind = 8) systime_start,systime_end,systime_rate
41    double precision x(100)
42    integer i,maxthreads,omp_get_max_threads
43
44    PetscCallA(PetscInitialize(ierr))
45    call system_clock(systime_start,systime_rate)
46    wtime_start = omp_get_wtime()
47    call cpu_time(cputime_start)
48!$OMP PARALLEL DO
49    do i=1,100
50      x(i) = exp(3.0d0*i)
51    enddo
52    call cpu_time(cputime_end)
53    call system_clock(systime_end,systime_rate)
54    wtime_end = omp_get_wtime()
55    print*,'CPU time reported by cpu_time()            ', cputime_end - cputime_start
56    print*,'Wall clock time reported by system_clock() ',real(systime_end - systime_start,kind=8)/real(systime_rate,kind=8)
57    print*,'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start
58    print*,'Value of x(22)',x(22)
59!$  maxthreads = omp_get_max_threads()
60    print*,'Number of threads set',maxthreads
61
62    call system_clock(systime_start,systime_rate)
63    wtime_start = omp_get_wtime()
64    call cpu_time(cputime_start)
65    do i=1,100
66      x(i) = exp(3.0d0*i)
67    enddo
68    call cpu_time(cputime_end)
69    call system_clock(systime_end,systime_rate)
70    wtime_end = omp_get_wtime()
71    print*,'CPU time reported by cpu_time()            ', cputime_end - cputime_start
72    print*,'Wall clock time reported by system_clock() ',real(systime_end - systime_start,kind=8)/real(systime_rate,kind=8)
73    print*,'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start
74    print*,'Value of x(22)',x(22)
75    PetscCallA(PetscFinalize(ierr))
76end program ex69F90
77
78!/*TEST
79!
80!   build:
81!     requires: openmp
82!
83!   test:
84!     filter: grep -v "Number of threads"
85!
86!TEST*/
87