! Program usage: mpiexec -n 1 chwirut1f [-help] [all TAO options] ! ! Description: This example demonstrates use of the TAO package to solve a ! nonlinear least-squares problem on a single processor. We minimize the ! Chwirut function: ! sum_{i=0}^{n/2-1} ( alpha*(x_{2i+1}-x_{2i}^2)^2 + (1-x_{2i})^2) ! ! The C version of this code is chwirut1.c ! #include module chwirut2fmodule use petsctao implicit none PetscReal t(0:213) PetscReal y(0:213) PetscInt, parameter :: m = 214, n = 3 PetscMPIInt, parameter :: nn = n PetscMPIInt rank PetscMPIInt size PetscMPIInt, parameter :: idle_tag = 2000, die_tag = 3000 PetscMPIInt, parameter :: zero = 0, one = 1 contains subroutine InitializeData() PetscInt i i = 0 y(i) = 92.9000; t(i) = 0.5000; i = i + 1 y(i) = 78.7000; t(i) = 0.6250; i = i + 1 y(i) = 64.2000; t(i) = 0.7500; i = i + 1 y(i) = 64.9000; t(i) = 0.8750; i = i + 1 y(i) = 57.1000; t(i) = 1.0000; i = i + 1 y(i) = 43.3000; t(i) = 1.2500; i = i + 1 y(i) = 31.1000; t(i) = 1.7500; i = i + 1 y(i) = 23.6000; t(i) = 2.2500; i = i + 1 y(i) = 31.0500; t(i) = 1.7500; i = i + 1 y(i) = 23.7750; t(i) = 2.2500; i = i + 1 y(i) = 17.7375; t(i) = 2.7500; i = i + 1 y(i) = 13.8000; t(i) = 3.2500; i = i + 1 y(i) = 11.5875; t(i) = 3.7500; i = i + 1 y(i) = 9.4125; t(i) = 4.2500; i = i + 1 y(i) = 7.7250; t(i) = 4.7500; i = i + 1 y(i) = 7.3500; t(i) = 5.2500; i = i + 1 y(i) = 8.0250; t(i) = 5.7500; i = i + 1 y(i) = 90.6000; t(i) = 0.5000; i = i + 1 y(i) = 76.9000; t(i) = 0.6250; i = i + 1 y(i) = 71.6000; t(i) = 0.7500; i = i + 1 y(i) = 63.6000; t(i) = 0.8750; i = i + 1 y(i) = 54.0000; t(i) = 1.0000; i = i + 1 y(i) = 39.2000; t(i) = 1.2500; i = i + 1 y(i) = 29.3000; t(i) = 1.7500; i = i + 1 y(i) = 21.4000; t(i) = 2.2500; i = i + 1 y(i) = 29.1750; t(i) = 1.7500; i = i + 1 y(i) = 22.1250; t(i) = 2.2500; i = i + 1 y(i) = 17.5125; t(i) = 2.7500; i = i + 1 y(i) = 14.2500; t(i) = 3.2500; i = i + 1 y(i) = 9.4500; t(i) = 3.7500; i = i + 1 y(i) = 9.1500; t(i) = 4.2500; i = i + 1 y(i) = 7.9125; t(i) = 4.7500; i = i + 1 y(i) = 8.4750; t(i) = 5.2500; i = i + 1 y(i) = 6.1125; t(i) = 5.7500; i = i + 1 y(i) = 80.0000; t(i) = 0.5000; i = i + 1 y(i) = 79.0000; t(i) = 0.6250; i = i + 1 y(i) = 63.8000; t(i) = 0.7500; i = i + 1 y(i) = 57.2000; t(i) = 0.8750; i = i + 1 y(i) = 53.2000; t(i) = 1.0000; i = i + 1 y(i) = 42.5000; t(i) = 1.2500; i = i + 1 y(i) = 26.8000; t(i) = 1.7500; i = i + 1 y(i) = 20.4000; t(i) = 2.2500; i = i + 1 y(i) = 26.8500; t(i) = 1.7500; i = i + 1 y(i) = 21.0000; t(i) = 2.2500; i = i + 1 y(i) = 16.4625; t(i) = 2.7500; i = i + 1 y(i) = 12.5250; t(i) = 3.2500; i = i + 1 y(i) = 10.5375; t(i) = 3.7500; i = i + 1 y(i) = 8.5875; t(i) = 4.2500; i = i + 1 y(i) = 7.1250; t(i) = 4.7500; i = i + 1 y(i) = 6.1125; t(i) = 5.2500; i = i + 1 y(i) = 5.9625; t(i) = 5.7500; i = i + 1 y(i) = 74.1000; t(i) = 0.5000; i = i + 1 y(i) = 67.3000; t(i) = 0.6250; i = i + 1 y(i) = 60.8000; t(i) = 0.7500; i = i + 1 y(i) = 55.5000; t(i) = 0.8750; i = i + 1 y(i) = 50.3000; t(i) = 1.0000; i = i + 1 y(i) = 41.0000; t(i) = 1.2500; i = i + 1 y(i) = 29.4000; t(i) = 1.7500; i = i + 1 y(i) = 20.4000; t(i) = 2.2500; i = i + 1 y(i) = 29.3625; t(i) = 1.7500; i = i + 1 y(i) = 21.1500; t(i) = 2.2500; i = i + 1 y(i) = 16.7625; t(i) = 2.7500; i = i + 1 y(i) = 13.2000; t(i) = 3.2500; i = i + 1 y(i) = 10.8750; t(i) = 3.7500; i = i + 1 y(i) = 8.1750; t(i) = 4.2500; i = i + 1 y(i) = 7.3500; t(i) = 4.7500; i = i + 1 y(i) = 5.9625; t(i) = 5.2500; i = i + 1 y(i) = 5.6250; t(i) = 5.7500; i = i + 1 y(i) = 81.5000; t(i) = .5000; i = i + 1 y(i) = 62.4000; t(i) = .7500; i = i + 1 y(i) = 32.5000; t(i) = 1.5000; i = i + 1 y(i) = 12.4100; t(i) = 3.0000; i = i + 1 y(i) = 13.1200; t(i) = 3.0000; i = i + 1 y(i) = 15.5600; t(i) = 3.0000; i = i + 1 y(i) = 5.6300; t(i) = 6.0000; i = i + 1 y(i) = 78.0000; t(i) = .5000; i = i + 1 y(i) = 59.9000; t(i) = .7500; i = i + 1 y(i) = 33.2000; t(i) = 1.5000; i = i + 1 y(i) = 13.8400; t(i) = 3.0000; i = i + 1 y(i) = 12.7500; t(i) = 3.0000; i = i + 1 y(i) = 14.6200; t(i) = 3.0000; i = i + 1 y(i) = 3.9400; t(i) = 6.0000; i = i + 1 y(i) = 76.8000; t(i) = .5000; i = i + 1 y(i) = 61.0000; t(i) = .7500; i = i + 1 y(i) = 32.9000; t(i) = 1.5000; i = i + 1 y(i) = 13.8700; t(i) = 3.0000; i = i + 1 y(i) = 11.8100; t(i) = 3.0000; i = i + 1 y(i) = 13.3100; t(i) = 3.0000; i = i + 1 y(i) = 5.4400; t(i) = 6.0000; i = i + 1 y(i) = 78.0000; t(i) = .5000; i = i + 1 y(i) = 63.5000; t(i) = .7500; i = i + 1 y(i) = 33.8000; t(i) = 1.5000; i = i + 1 y(i) = 12.5600; t(i) = 3.0000; i = i + 1 y(i) = 5.6300; t(i) = 6.0000; i = i + 1 y(i) = 12.7500; t(i) = 3.0000; i = i + 1 y(i) = 13.1200; t(i) = 3.0000; i = i + 1 y(i) = 5.4400; t(i) = 6.0000; i = i + 1 y(i) = 76.8000; t(i) = .5000; i = i + 1 y(i) = 60.0000; t(i) = .7500; i = i + 1 y(i) = 47.8000; t(i) = 1.0000; i = i + 1 y(i) = 32.0000; t(i) = 1.5000; i = i + 1 y(i) = 22.2000; t(i) = 2.0000; i = i + 1 y(i) = 22.5700; t(i) = 2.0000; i = i + 1 y(i) = 18.8200; t(i) = 2.5000; i = i + 1 y(i) = 13.9500; t(i) = 3.0000; i = i + 1 y(i) = 11.2500; t(i) = 4.0000; i = i + 1 y(i) = 9.0000; t(i) = 5.0000; i = i + 1 y(i) = 6.6700; t(i) = 6.0000; i = i + 1 y(i) = 75.8000; t(i) = .5000; i = i + 1 y(i) = 62.0000; t(i) = .7500; i = i + 1 y(i) = 48.8000; t(i) = 1.0000; i = i + 1 y(i) = 35.2000; t(i) = 1.5000; i = i + 1 y(i) = 20.0000; t(i) = 2.0000; i = i + 1 y(i) = 20.3200; t(i) = 2.0000; i = i + 1 y(i) = 19.3100; t(i) = 2.5000; i = i + 1 y(i) = 12.7500; t(i) = 3.0000; i = i + 1 y(i) = 10.4200; t(i) = 4.0000; i = i + 1 y(i) = 7.3100; t(i) = 5.0000; i = i + 1 y(i) = 7.4200; t(i) = 6.0000; i = i + 1 y(i) = 70.5000; t(i) = .5000; i = i + 1 y(i) = 59.5000; t(i) = .7500; i = i + 1 y(i) = 48.5000; t(i) = 1.0000; i = i + 1 y(i) = 35.8000; t(i) = 1.5000; i = i + 1 y(i) = 21.0000; t(i) = 2.0000; i = i + 1 y(i) = 21.6700; t(i) = 2.0000; i = i + 1 y(i) = 21.0000; t(i) = 2.5000; i = i + 1 y(i) = 15.6400; t(i) = 3.0000; i = i + 1 y(i) = 8.1700; t(i) = 4.0000; i = i + 1 y(i) = 8.5500; t(i) = 5.0000; i = i + 1 y(i) = 10.1200; t(i) = 6.0000; i = i + 1 y(i) = 78.0000; t(i) = .5000; i = i + 1 y(i) = 66.0000; t(i) = .6250; i = i + 1 y(i) = 62.0000; t(i) = .7500; i = i + 1 y(i) = 58.0000; t(i) = .8750; i = i + 1 y(i) = 47.7000; t(i) = 1.0000; i = i + 1 y(i) = 37.8000; t(i) = 1.2500; i = i + 1 y(i) = 20.2000; t(i) = 2.2500; i = i + 1 y(i) = 21.0700; t(i) = 2.2500; i = i + 1 y(i) = 13.8700; t(i) = 2.7500; i = i + 1 y(i) = 9.6700; t(i) = 3.2500; i = i + 1 y(i) = 7.7600; t(i) = 3.7500; i = i + 1 y(i) = 5.4400; t(i) = 4.2500; i = i + 1 y(i) = 4.8700; t(i) = 4.7500; i = i + 1 y(i) = 4.0100; t(i) = 5.2500; i = i + 1 y(i) = 3.7500; t(i) = 5.7500; i = i + 1 y(i) = 24.1900; t(i) = 3.0000; i = i + 1 y(i) = 25.7600; t(i) = 3.0000; i = i + 1 y(i) = 18.0700; t(i) = 3.0000; i = i + 1 y(i) = 11.8100; t(i) = 3.0000; i = i + 1 y(i) = 12.0700; t(i) = 3.0000; i = i + 1 y(i) = 16.1200; t(i) = 3.0000; i = i + 1 y(i) = 70.8000; t(i) = .5000; i = i + 1 y(i) = 54.7000; t(i) = .7500; i = i + 1 y(i) = 48.0000; t(i) = 1.0000; i = i + 1 y(i) = 39.8000; t(i) = 1.5000; i = i + 1 y(i) = 29.8000; t(i) = 2.0000; i = i + 1 y(i) = 23.7000; t(i) = 2.5000; i = i + 1 y(i) = 29.6200; t(i) = 2.0000; i = i + 1 y(i) = 23.8100; t(i) = 2.5000; i = i + 1 y(i) = 17.7000; t(i) = 3.0000; i = i + 1 y(i) = 11.5500; t(i) = 4.0000; i = i + 1 y(i) = 12.0700; t(i) = 5.0000; i = i + 1 y(i) = 8.7400; t(i) = 6.0000; i = i + 1 y(i) = 80.7000; t(i) = .5000; i = i + 1 y(i) = 61.3000; t(i) = .7500; i = i + 1 y(i) = 47.5000; t(i) = 1.0000; i = i + 1 y(i) = 29.0000; t(i) = 1.5000; i = i + 1 y(i) = 24.0000; t(i) = 2.0000; i = i + 1 y(i) = 17.7000; t(i) = 2.5000; i = i + 1 y(i) = 24.5600; t(i) = 2.0000; i = i + 1 y(i) = 18.6700; t(i) = 2.5000; i = i + 1 y(i) = 16.2400; t(i) = 3.0000; i = i + 1 y(i) = 8.7400; t(i) = 4.0000; i = i + 1 y(i) = 7.8700; t(i) = 5.0000; i = i + 1 y(i) = 8.5100; t(i) = 6.0000; i = i + 1 y(i) = 66.7000; t(i) = .5000; i = i + 1 y(i) = 59.2000; t(i) = .7500; i = i + 1 y(i) = 40.8000; t(i) = 1.0000; i = i + 1 y(i) = 30.7000; t(i) = 1.5000; i = i + 1 y(i) = 25.7000; t(i) = 2.0000; i = i + 1 y(i) = 16.3000; t(i) = 2.5000; i = i + 1 y(i) = 25.9900; t(i) = 2.0000; i = i + 1 y(i) = 16.9500; t(i) = 2.5000; i = i + 1 y(i) = 13.3500; t(i) = 3.0000; i = i + 1 y(i) = 8.6200; t(i) = 4.0000; i = i + 1 y(i) = 7.2000; t(i) = 5.0000; i = i + 1 y(i) = 6.6400; t(i) = 6.0000; i = i + 1 y(i) = 13.6900; t(i) = 3.0000; i = i + 1 y(i) = 81.0000; t(i) = .5000; i = i + 1 y(i) = 64.5000; t(i) = .7500; i = i + 1 y(i) = 35.5000; t(i) = 1.5000; i = i + 1 y(i) = 13.3100; t(i) = 3.0000; i = i + 1 y(i) = 4.8700; t(i) = 6.0000; i = i + 1 y(i) = 12.9400; t(i) = 3.0000; i = i + 1 y(i) = 5.0600; t(i) = 6.0000; i = i + 1 y(i) = 15.1900; t(i) = 3.0000; i = i + 1 y(i) = 14.6200; t(i) = 3.0000; i = i + 1 y(i) = 15.6400; t(i) = 3.0000; i = i + 1 y(i) = 25.5000; t(i) = 1.7500; i = i + 1 y(i) = 25.9500; t(i) = 1.7500; i = i + 1 y(i) = 81.7000; t(i) = .5000; i = i + 1 y(i) = 61.6000; t(i) = .7500; i = i + 1 y(i) = 29.8000; t(i) = 1.7500; i = i + 1 y(i) = 29.8100; t(i) = 1.7500; i = i + 1 y(i) = 17.1700; t(i) = 2.7500; i = i + 1 y(i) = 10.3900; t(i) = 3.7500; i = i + 1 y(i) = 28.4000; t(i) = 1.7500; i = i + 1 y(i) = 28.6900; t(i) = 1.7500; i = i + 1 y(i) = 81.3000; t(i) = .5000; i = i + 1 y(i) = 60.9000; t(i) = .7500; i = i + 1 y(i) = 16.6500; t(i) = 2.7500; i = i + 1 y(i) = 10.0500; t(i) = 3.7500; i = i + 1 y(i) = 28.9000; t(i) = 1.7500; i = i + 1 y(i) = 28.9500; t(i) = 1.7500; i = i + 1 end subroutine TaskWorker(ierr) PetscErrorCode ierr PetscReal x(n), f(1) PetscMPIInt tag PetscInt index #if defined(PETSC_USE_MPI_F08) MPIU_Status status #else MPIU_Status status(MPI_STATUS_SIZE) #endif tag = IDLE_TAG f = 0.0 ! Send check-in message to rank-0 PetscCallMPI(MPI_Send(f, one, MPIU_SCALAR, zero, IDLE_TAG, PETSC_COMM_WORLD, ierr)) do while (tag /= DIE_TAG) PetscCallMPI(MPI_Recv(x, nn, MPIU_SCALAR, zero, MPI_ANY_TAG, PETSC_COMM_WORLD, status, ierr)) #if defined(PETSC_USE_MPI_F08) tag = status%MPI_TAG #else tag = status(MPI_TAG) #endif if (tag == IDLE_TAG) then PetscCallMPI(MPI_Send(f, one, MPIU_SCALAR, zero, IDLE_TAG, PETSC_COMM_WORLD, ierr)) else if (tag /= DIE_TAG) then index = tag ! Compute local part of residual PetscCall(RunSimulation(x, index, f(1), ierr)) ! Return residual to rank-0 PetscCallMPI(MPI_Send(f, one, MPIU_SCALAR, zero, tag, PETSC_COMM_WORLD, ierr)) end if end do ierr = 0 end subroutine RunSimulation(x, i, f, ierr) PetscReal x(n), f PetscInt i PetscErrorCode ierr f = y(i) - exp(-x(1)*t(i))/(x(2) + x(3)*t(i)) ierr = 0 end subroutine StopWorkers(ierr) integer checkedin #if defined(PETSC_USE_MPI_F08) MPIU_Status status #else MPIU_Status status(MPI_STATUS_SIZE) #endif PetscMPIInt source PetscReal f(1), x(n) PetscErrorCode ierr PetscInt i checkedin = 0 do while (checkedin < size - 1) PetscCallMPI(MPI_Recv(f, one, MPIU_SCALAR, MPI_ANY_SOURCE, MPI_ANY_TAG, PETSC_COMM_WORLD, status, ierr)) checkedin = checkedin + 1 #if defined(PETSC_USE_MPI_F08) source = status%MPI_SOURCE #else source = status(MPI_SOURCE) #endif do i = 1, n x(i) = 0.0 end do PetscCallMPI(MPI_Send(x, nn, MPIU_SCALAR, source, DIE_TAG, PETSC_COMM_WORLD, ierr)) end do ierr = 0 end ! -------------------------------------------------------------------- ! FormFunction - Evaluates the function f(X) and gradient G(X) ! ! Input Parameters: ! tao - the Tao context ! X - input vector ! dummy - not used ! ! Output Parameters: ! f - function vector subroutine FormFunction(ta, x, f, dummy, ierr) Tao ta Vec x, f PetscErrorCode ierr PetscInt i, checkedin PetscInt finished_tasks PetscMPIInt next_task PetscMPIInt tag, source #if defined(PETSC_USE_MPI_F08) MPIU_Status status #else MPIU_Status status(MPI_STATUS_SIZE) #endif PetscInt dummy PetscReal, pointer :: f_v(:), x_v(:) PetscReal fval(1) ierr = 0 ! Get pointers to vector data PetscCall(VecGetArrayRead(x, x_v, ierr)) PetscCall(VecGetArray(f, f_v, ierr)) ! Compute F(X) if (size == 1) then ! Single processor do i = 1, m PetscCall(RunSimulation(x_v, i, f_v(i), ierr)) end do else ! Multiprocessor main next_task = zero finished_tasks = 0 checkedin = 0 do while (finished_tasks < m .or. checkedin < size - 1) PetscCallMPI(MPI_Recv(fval, one, MPIU_SCALAR, MPI_ANY_SOURCE, MPI_ANY_TAG, PETSC_COMM_WORLD, status, ierr)) #if defined(PETSC_USE_MPI_F08) tag = status%MPI_TAG source = status%MPI_SOURCE #else tag = status(MPI_TAG) source = status(MPI_SOURCE) #endif if (tag == IDLE_TAG) then checkedin = checkedin + 1 else f_v(tag + 1) = fval(1) finished_tasks = finished_tasks + 1 end if if (next_task < m) then ! Send task to worker PetscCallMPI(MPI_Send(x_v, nn, MPIU_SCALAR, source, next_task, PETSC_COMM_WORLD, ierr)) next_task = next_task + one else ! Send idle message to worker PetscCallMPI(MPI_Send(x_v, nn, MPIU_SCALAR, source, IDLE_TAG, PETSC_COMM_WORLD, ierr)) end if end do end if ! Restore vectors PetscCall(VecRestoreArrayRead(x, x_v, ierr)) PetscCall(VecRestoreArray(F, f_v, ierr)) end subroutine FormStartingPoint(x) Vec x PetscReal, pointer :: x_v(:) PetscErrorCode ierr PetscCall(VecGetArray(x, x_v, ierr)) x_v(1) = 0.15 x_v(2) = 0.008 x_v(3) = 0.01 PetscCall(VecRestoreArray(x, x_v, ierr)) end end module chwirut2fmodule program main use chwirut2fmodule implicit none ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! Variable declarations ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! ! See additional variable declarations in the file chwirut2f.h PetscErrorCode ierr ! used to check for functions returning nonzeros Vec x ! solution vector Vec f ! vector of functions Tao ta ! Tao context ! Initialize TAO and PETSc PetscCallA(PetscInitialize(ierr)) PetscCallMPIA(MPI_Comm_size(PETSC_COMM_WORLD, size, ierr)) PetscCallMPIA(MPI_Comm_rank(PETSC_COMM_WORLD, rank, ierr)) ! Initialize problem parameters call InitializeData() if (rank == 0) then ! Allocate vectors for the solution and gradient PetscCallA(VecCreateSeq(PETSC_COMM_SELF, n, x, ierr)) PetscCallA(VecCreateSeq(PETSC_COMM_SELF, m, f, ierr)) ! The TAO code begins here ! Create TAO solver PetscCallA(TaoCreate(PETSC_COMM_SELF, ta, ierr)) PetscCallA(TaoSetType(ta, TAOPOUNDERS, ierr)) ! Set routines for function, gradient, and hessian evaluation PetscCallA(TaoSetResidualRoutine(ta, f, FormFunction, 0, ierr)) ! Optional: Set initial guess call FormStartingPoint(x) PetscCallA(TaoSetSolution(ta, x, ierr)) ! Check for TAO command line options PetscCallA(TaoSetFromOptions(ta, ierr)) ! SOLVE THE APPLICATION PetscCallA(TaoSolve(ta, ierr)) ! Free TAO data structures PetscCallA(TaoDestroy(ta, ierr)) ! Free PETSc data structures PetscCallA(VecDestroy(x, ierr)) PetscCallA(VecDestroy(f, ierr)) PetscCallA(StopWorkers(ierr)) else PetscCallA(TaskWorker(ierr)) end if PetscCallA(PetscFinalize(ierr)) end !/*TEST ! ! build: ! requires: !complex ! ! test: ! nsize: 3 ! args: -tao_monitor_short -tao_max_it 100 -tao_type pounders -tao_gatol 1.e-5 ! requires: !single ! ! !TEST*/