1#include <petsc/finclude/petscsys.h> 2program ex69F90 3 4! Demonstrates two issues 5! 6! A) How using mpiexec to start up a program can dramatically change 7! the OpenMP thread binding/mapping resulting in poor performance 8! 9! Set the environmental variable with, for example, 10! export OMP_NUM_THREADS=4 11! Run this example on one MPI process three ways 12! ./ex69f 13! mpiexec -n 1 ./ex69f 14! mpiexec --bind-to numa -n 1 ./ex69f 15! 16! You may get very different wall clock times 17! It seems some mpiexec implementations change the thread binding/mapping that results with 18! OpenMP so all the threads are run on a single core 19! 20! The same differences occur without the PetscInitialize() call indicating 21! the binding change is done by the mpiexec, not the MPI_Init() 22! 23! B) How cpu_time() may give unexpected results, much larger than expected, 24! even for code portions with no OpenMP 25! 26! Note the CPU time for output of the second loop, it should equal the wallclock time 27! since the loop is not run in parallel (with OpenMP) but instead it may be listed as 28! many times higher 29! 30! $ OMP_NUM_THREADS=8 ./ex69f (ifort compiler) 31! CPU time reported by cpu_time() 1.66649300000000 32! Wall clock time reported by system_clock() 0.273980000000000 33! Wall clock time reported by omp_get_wtime() 0.273979902267456 34! 35 use petsc 36 implicit none 37 38 PetscErrorCode ierr 39 double precision cputime_start, cputime_end, wtime_start, wtime_end, omp_get_wtime 40 integer(kind=8) systime_start, systime_end, systime_rate 41 double precision x(100) 42 integer i, maxthreads, omp_get_max_threads 43 44 PetscCallA(PetscInitialize(ierr)) 45 call system_clock(systime_start, systime_rate) 46 wtime_start = omp_get_wtime() 47 call cpu_time(cputime_start) 48!$OMP PARALLEL DO 49 do i = 1, 100 50 x(i) = exp(3.0d0*i) 51 end do 52 call cpu_time(cputime_end) 53 call system_clock(systime_end, systime_rate) 54 wtime_end = omp_get_wtime() 55 print *, 'CPU time reported by cpu_time() ', cputime_end - cputime_start 56 print *, 'Wall clock time reported by system_clock() ', real(systime_end - systime_start, kind=8)/real(systime_rate, kind=8) 57 print *, 'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start 58 print *, 'Value of x(22)', x(22) 59!$ maxthreads = omp_get_max_threads() 60 print *, 'Number of threads set', maxthreads 61 62 call system_clock(systime_start, systime_rate) 63 wtime_start = omp_get_wtime() 64 call cpu_time(cputime_start) 65 do i = 1, 100 66 x(i) = exp(3.0d0*i) 67 end do 68 call cpu_time(cputime_end) 69 call system_clock(systime_end, systime_rate) 70 wtime_end = omp_get_wtime() 71 print *, 'CPU time reported by cpu_time() ', cputime_end - cputime_start 72 print *, 'Wall clock time reported by system_clock() ', real(systime_end - systime_start, kind=8)/real(systime_rate, kind=8) 73 print *, 'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start 74 print *, 'Value of x(22)', x(22) 75 PetscCallA(PetscFinalize(ierr)) 76end program ex69F90 77 78!/*TEST 79! 80! build: 81! requires: openmp 82! 83! test: 84! filter: grep -v "Number of threads" 85! 86!TEST*/ 87