C*******************************************************************************
C       Start of the header for a fortran source file for a subroutine
C       of the Free_EOS stellar interior equation of state code
C       Copyright (C) 2006 Alan W. Irwin
C
C       $Id: bfgs.f,v 1.4 2006/04/18 16:53:34 airwin Exp $
C
C       For the latest version of this source code, please contact
C       Alan W. Irwin
C       Department of Physics and Astronomy
C       University of Victoria,
C       Box 3055
C       Victoria, B.C., Canada
C       V8W 3P6
C       e-mail: address@hidden
C
C    This program is free software; you can redistribute it and/or modify
C    it under the terms of the GNU General Public License as published by
C    the Free Software Foundation; either version 2 of the License, or
C    (at your option) any later version.
C
C    This program is distributed in the hope that it will be useful,
C    but WITHOUT ANY WARRANTY; without even the implied warranty of
C    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
C    GNU General Public License for more details.
C
C    You should have received a copy of the GNU General Public License
C    along with this program; if not, write to the Free Software
C    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
C
C       End of the header for a fortran source file for a subroutine
C       of the Free_EOS stellar interior equation of state code
C*************************************************************

C      BFGS algorithm based on 
C      R. Fletcher, "Practical Methods of Optimization" 2nd ed.
C      John Wiley and Sons, 1987.
C      N.B. in following comments this reference is referred to as PMOO.

      subroutine bfgs_constant_vector(n, constant, x)
C      set vector x(n) to constant
      implicit none
      integer*4 n,i
      real*8 constant, x(n)
      do i = 1,n
        x(i) = constant
      enddo
      end

      subroutine bfgs_set(ffunction, dfsubroutine,
     &  n, x, f, gradient, p)
C      initialize line search
      implicit none
      include 'bfgs.h'
      integer*4 n
      real*8 ffunction, p(n), x(n), f, gradient
      iter = 0
      f = ffunction(n, x)
      function_count = 1
      call dfsubroutine(n, x, gradient)
      gradient_count = 1
C      initial preferred direction for the line search is in the
C      direction of the gradient, i.e, steepest ascent.  (Negative
C      sign of direction, i.e., initial steepest descent
C      figured out later.)
      call dcopy(n, gradient, 1, p, 1)
      end
      
      subroutine bfgs_take_step(n, x, p, step, lambda, x1, dx)
C      take step in the desired -lambda*p direction.
C      unchanged input:
C      x(n) is the current position vector
C      lambda*p(n) is the unit vector in the currently desired uphill
C      (not necessarily steepest ascent) direction.
C      output results:
C      dx(n) = -step*lambda*p
C      x1(n) = x + dx
      implicit none
      integer*4 n
      real*8 x(n), p(n), step, lambda, x1(n), dx(1)
      call bfgs_constant_vector(n, 0.d0, dx)
      call daxpy (n, -step*lambda, p, 1, dx, 1)
      call dcopy(n, x, 1, x1, 1)
      call daxpy (n, 1.d0, dx, 1, x1, 1)
      end

      real*8 function bfgs_minimum(a, fa, fprimea, b, fb, x_lo, x_hi)
C      Use quadratic interpolation or extrapolation to calculate
C      minimum value within a specified range.
C      Note, PMOO, p. 37 recommends cubics for this, but Numerical Recipes
C      is much more conservative and does not even use derivative
C      information.  I will adopt a middle course and use a quadratic
C      which will have smaller errors than a cubic when far from the solution
C      and which should not compromise final rapid convergence (I think).

C      input variables:
C      a is first defined point
C      fa and fprimea are the function and derivative values at that point.
C      b is the second defined point
C      fb is the function value at that point.
C      x_lo through x_hi specify the range where the minimum value
C      is to be calculated.

C      The returned function value is the minimum of the
C      interpolated/extrapolated function within [x_lo, x_hi].

      implicit none
      real*8 a, fa, fprimea, b, fb, x_lo, x_hi
      real*8 dx, q0, q1, q2, z_lo, z_hi, q2_lo, q2_hi,
     &  q_lo, q_hi, z_stationary, q_stationary, x_min

      dx = b - a
C      Find coefficients of Hermite polynomial on z which ranges from
C      0 to 1 within defined interval [a, b], which agrees with fa and
C      fprimea*dx at 0 and which agrees with fb at 1.
C      z = (x - a)/dx or x = a + z*dx
C      f(z) = q0 + z*(q1 + z*q2)
C      f'(z) = q1 + 2.d0*q2*z with stationary point z = -q1*0.5/q2
      q0 = fa
      q1 = fprimea*dx
      q2 = fb-fa-q1
C      z_lo and z_hi correspond to x_lo and x_hi.
      z_lo = (x_lo - a)/dx
      z_hi = (x_hi - a)/dx
C      q2 corresponding to z_stationary = z_lo
      q2_lo = -q1*0.5d0/z_lo
C      q2 corresponding to z_stationary = z_hi
      q2_hi = -q1*0.5d0/z_hi
      q_lo = q0 + z_lo*(q1 + z_lo*q2)
      q_hi = q0 + z_hi*(q1 + z_hi*q2)
      if((q2_lo.le.q2.and.q2.le.q2_hi).or.
     &    (q2_hi.le.q2.and.q2.le.q2_lo)) then
C        z_stationary within range from z_lo to z_hi and can be
C        calculated without fear of overflow problems at least
C        for the usual calls of bfgs_minimum from bfgs_linesearch
        z_stationary = -q1*0.5d0/q2
        q_stationary = q0 + z_stationary*(q1 + z_stationary*q2)
        if(q_stationary.le.min(q_lo, q_hi)) then
          x_min = a + dx*z_stationary
        elseif(q_lo.le.min(q_stationary, q_hi)) then
          x_min = a + dx*z_lo
        elseif(q_hi.le.min(q_lo, q_stationary)) then
          x_min = a + dx*z_hi
        else
          stop 'bfgs_minimum: internal logic error'
        endif
      else
C        z_stationary outside of range from z_lo to z_hi
        if(q_lo.le.q_hi) then
          x_min = a + dx*z_lo
        else
          x_min = a + dx*z_hi
        endif
      endif
      bfgs_minimum = x_min
      end

      subroutine bfgs_linesearch(ffunction, dfsubroutine,
     &  fbar, epsilon, n, p, alpha1, x, f, gradient, dx)
C      Do a linesearch using algorithm presented by PMOO, pp. 34-39

C      passed routine names:
C      ffunction is the name of the real*8 function ffunction(n, x) that
C        provides the function value.
C      dfsubroutine is the name of the subroutine dfsubroutine(n, x, gradient)
C        that provides the gradient.

C      input quantities:
C      fbar is a user-specified minimum possible f used as per
C        PMOO, 2.6.1 to control the bracketing step size.  If actual f
C        values are <= fbar, then this routine stops with an error so
C        be realistic in how you specify fbar making it small enough
C        to avoid the error, but large enough to provide some control
C        over the maximum size of the bracketing step.
C      epsilon is the user-specified roundoff error tolerance on f
C        discussed on p. 38 of PMOO.  This quantity is not used to test
C        for convergence, but if the user uses a convergence test on f, it
C        should be larger than epsilon to avoid "lack of
C        convergence" error messages.
C      p(n) is the unscaled line search direction.  The PMOO scaled s vector
C        is defined by
C        s = -(dir/pnorm)*p
C        where dir = 1 if p has an acute angle with the input gradient and
C        dir = -1 otherwise.  pnorm is the norm of p.  dir and pnorm are
C        calculated internally.

C      input and output quantities:
C      alpha1 is the initial step size used for the line search on input
C        and on output is the estimate of the initial step size
C        for the next line search.
C      x(n) is the starting point of the line search on input and on output
C        is the ending point of the line search.
C      N.B. both f and gradient must be precalculated on input and calculated
C        on output.  That is:
C      f is f(x(n)) on both input and output.
C      gradient(n) is the gradient(x(n)) on both input and output.

C      output quantity:
C      dx(n) is the vector of differences between the initial x and final x.
      implicit none
      include 'bfgs.h'
      integer*4 n
      real*8 ffunction, fbar, epsilon,
     &  p(n), alpha1, x(n), f, gradient(n), dx(n)
C      dimensionless line search constants recommended by PMOO, page. 37
C      (superseded by PMOO, page 69 for tau2 = 0.05 rather than tau2 = 0.10).
      real*8 sigma, rho, tau1, tau2, tau3
C      use fairly accurate line search.
      parameter(sigma = 0.1d0)
C      rho must be less than or equal to sigma
      parameter(rho = 0.01d0)
C      jump size factor increase used in bracketing phase.
      parameter(tau1 = 9.d0)
C      0 < tau2 < tau3 <= 0.5 and tau2 <= sigma is advisable
      parameter(tau2 = 0.05d0)
      parameter(tau3 = 0.5d0)
      logical bracket
      real*8 pnorm, dnrm2, pg, ddot, dir,
     &  f0, fprime0, mu,
     &  alphaim, fim, fprimeim, alphai, fi, fprimei,
     &  xi(nmax_bfgs),
     &  ai, fai, fprimeai, bi, fbi,
     &  dalpha, alphaip, bfgs_minimum
      logical acceptable
      real*8 aip, bip, fmin
      logical debug
      parameter(debug=.false.)
      
C      sanity check
      if(n.gt.nmax_bfgs) stop 'bfgs_linesearch: n too large'
      iter = iter + 1

C      parameters of PMOO scaled s vector where s = -(dir/pnorm)*p
      pnorm = dnrm2(n, p, 1)
C      pg subsequently used to calculate fprime0
      pg = ddot(n, p, 1, gradient, 1)
      dir = sign(1.d0, pg)

C      early convergence test to avoid divide by zero.
C      Note, pnorm can be zero, if p is initialized to the gradient and
C      the gradient is zero.
      if(pnorm.eq.0.d0) then
        call bfgs_constant_vector(n, 0.d0, dx)
        alpha1 = 0.d0
        return
      endif
C      Initialize "0" variables:
      f0 = f
C      fprime = s dot gradient (PMOO, 1.2.6)
      fprime0 = -(dir/pnorm)*pg

C      early convergence test on fprime0 to avoid a divide by zero
      if(fprime0.ge.0.d0) then
        call bfgs_constant_vector(n, 0.d0, dx)
        alpha1 = 0.d0
        return
      endif
      if(f0.le.fbar) then
        write(0,*) 'bfgs_linesearch: ERROR f0 = f(alpha=0) '//
     &    '<= fbar, the minimum possible function value.'
        write(0,*) 'bfgs_linesearch: respecify fbar and try again.'
        stop
      else
C        maximum line search range from PMOO, 2.6.1
C        Note from above test this must always be positive
C        (unless underflow zeroes it)
        mu = (fbar - f0)/(rho*fprime0)
      endif
      
C      Initialize "i-1" iteration variables (which have "im" suffix).
      alphaim = 0.d0
      fim = f0
      fprimeim = fprime0
C      note condition that fim <= f0+0.*rho*fprime0 is
C      automatically satisfied.
      fmin = fim
      if(debug) then
        write(0,*) "alpha, f(alpha) = ", alphaim, fim
        write(0,*) "alpha, f'(alpha) =", alphaim, fprimeim
      endif

C      Initialize "i" iteration variables (which have "i" suffix).
      alphai = alpha1

C      force at least one bracketing attempt.
      bracket = .false.
      do while (.not.bracket)
C        All code in this loop follows pseudo-code in PMOO, 2.6.2.

C        Compute new trial point at alphai corresponding to
C        xi = x + alphai * s
        call bfgs_take_step(n, x, p, alphai, dir/pnorm, xi, dx)

C        EVALUATE f(alphai) = function at xi
        fi = ffunction(n, xi)
        function_count = function_count + 1
        if(fi.le.f0+alphai*rho*fprime0) fmin = min(fi, fmin)
        if(debug) then
          write(0,*) "alpha, f(alpha) = ", alphai, fi
        endif
        
        if(fi.le.fbar) then
          write(0,*) 'bfgs_linesearch: ERROR fi = f(alphai) '//
     &      '<= fbar, the minimum possible function value.'
          write(0,*) 'bfgs_linesearch: respecify fbar and try again.'
          stop
        endif
C        N.B. in my copy of PMO, there is a misprint in the 2.6.2 pseudo-code
C        where the rho factor in the next uncommented line is ignored.  But
C        the rho factor must be there if the conditions given by 2.6.3 
C        are to be satisfied at the end of the bracket loop.
        if(fi.gt.f0+alphai*rho*fprime0.or.fi.ge.fim) then
          ai = alphaim
          fai = fim
          fprimeai = fprimeim
          bi = alphai
          fbi = fi
          bracket = .true.
        else
C          EVALUATE gradient of function at xi
          call dfsubroutine(n, xi, gradient)
          gradient_count = gradient_count + 1
C          from PMOO, 1.2.6
C          f'(alphai) = s dot gradient
          fprimei = (-dir/pnorm)*ddot(n, p, 1, gradient, 1)
C          good converged solution to line search if xi satisfies
C          two-sided test (PMOO, equation 2.5.6)
          if(debug) then
            write(0,*) "alpha, f'(alpha) =", alphai, fprimei
          endif
          if(abs(fprimei).le.-sigma*fprime0) then
C            acceptable point found.
C            output x = xi
            call dcopy(n, xi, 1, x, 1)
C            output f = f(xi)
            f = fi
C            gradient at xi already stored ready for output.

C            OPP, 2.6.7, 2.6.8.  Note in the final stages of quadratic
C            convergence, 2*(f0-fi)/(-fprimei) should approach unity
C            OPP, Lemma 2.5.3.
            alpha1 = 2.d0*max((f0 - fi), 10.d0*epsilon)
            if(alpha1.ge.-fprimei) then
              alpha1 = 1.d0
            else
              alpha1 = alpha1/(-fprimei)
            endif
            return
          endif
          if(fprimei.ge.0.d0) then
            ai = alphai
            fai = fi
            fprimeai = fprimei
            bi = alphaim
            fbi = fim
            bracket = .true.
          else
            dalpha = alphai - alphaim
            if(mu.le.alphai + dalpha) then
              alphaip = mu
            else
C              extrapolation to find possible bracket in the range
C              [alphai + dalpha, min(mu, alphai + tau*dalpha)]
              alphaip = bfgs_minimum(
     &          alphaim, fim, fprimeim, alphai, fi,
     &          alphai + dalpha, min(mu, alphai + tau1*dalpha))
            endif
C            prepare for the next bracket attempt
            alphaim = alphai
            fim = fi
            fprimeim = fprimei
            alphai = alphaip
          endif
        endif
      enddo
C      found bracket where the following conditions occur:
C      (i) ai is the current best trial point (least f) that
C        satisfies PMOO, 2.5.1, i.e., fai = f(ai) <= f(0) + ai*rho*f'(0)
C      (ii) fprimeai = f'(ai) has been evaluated and satisfies
C        (bi-ai)*f'(ai) <0 but |f'(ai)| > -sigma*f'(0)
C      (iii) bi satisfies either fbi = f(bi) > f(0) + bi*rho*f'(0) or
C        fbi = f(bi) >= f(ai) or both conditions are true.
C      PMOO, Lemma 2.6.1:
C      if sigma >= rho then such [ai, bi] brackets contain an interval of
C      acceptable alpha points such that
C      (i) f(alpha) <= f(0) + alpha*rho*f'(0)  PMO, 2.5.1.
C      (ii) |f'(alpha)'| <= -sigma*f'(0) PMO, 2.5.6 (two sided condition).

C      Use sectioning (while preserving above properties) to find
C      an acceptable alpha
      acceptable = .false.
      do while(.not.acceptable)
        if(debug) then
          write(0,*) 'ai, fai, fprimeai = ', ai, fai, fprimeai
          write(0,*) 'bi, fbi = ', bi, fbi
          write(0,*) 'fai.eq.fmin', fai.eq.fmin
          write(0,*) 'fai.le.f0+ai*rho*fprime0',
     &      fai.le.f0+ai*rho*fprime0
          write(0,*) '(bi-ai)*fprimeai.lt.0.d0.and.'//
     &      'abs(fprimeai).gt.-sigma*fprime0 =',
     &      (bi-ai)*fprimeai.lt.0.d0.and.
     &      abs(fprimeai).gt.-sigma*fprime0
          write(0,*) 'fbi.gt.f0+bi*rho*fprime0.or.fbi.ge.fai = ',
     &      fbi.gt.f0+bi*rho*fprime0.or.fbi.ge.fai
        endif
        if(.not.(
     &    (fai.eq.fmin).and.
     &    (fai.le.f0+ai*rho*fprime0).and.
     &    ((bi-ai)*fprimeai.lt.0.d0).and.
     &    (abs(fprimeai).gt.-sigma*fprime0).and.
     &    (fbi.gt.f0+bi*rho*fprime0.or.fbi.ge.fai)))
     &    stop 'bfgs_linesearch: internal bracketing logic error'
        dalpha = bi - ai
C        interpolation to find acceptable point in the
C        range [ai + tau2*dalpha, bi - tau3*dalpha].
        alphai = bfgs_minimum(
     &    ai, fai, fprimeai, bi, fbi,
     &    ai + tau2*dalpha, bi - tau3*dalpha)

C        Compute new trial point at alphai corresponding to
C        xi = x + alphai * s
        call bfgs_take_step(n, x, p, alphai, dir/pnorm, xi, dx)

C        EVALUATE f(alphai) = function at xi
        fi = ffunction(n, xi)
        function_count = function_count + 1
        if(fi.le.f0+alphai*rho*fprime0) fmin = min(fi, fmin)
        if(debug) then
          write(0,*) "alpha, f(alpha) =", alphai, fi
        endif
        if((fi.gt.f0 + rho*alphai*fprime0).or.(fi.ge.fai)) then
          aip = ai
C          fai and fprimeai unchanged.
          bip = alphai
          fbi = fi
        else
C          EVALUATE gradient of function at xi
          call dfsubroutine(n, xi, gradient)
          gradient_count = gradient_count + 1
C          from PMOO, 1.2.6
C          f'(alphai) = s dot gradient
          fprimei = (-dir/pnorm)*ddot(n, p, 1, gradient, 1)
          if(debug) then
            write(0,*) "alpha, f'(alpha) =", alphai, fprimei
          endif
          if(abs(fprimei).le.-sigma*fprime0) then
C            acceptable point found.
C            output x = xi
            call dcopy(n, xi, 1, x, 1)
C            output f = f(xi)
            f = fi
C            gradient at xi already stored ready for output.

C            OPP, 2.6.7, 2.6.8.  Note in the final stages of quadratic
C            convergence, 2*(f0-fi)/(-fprimei) should approach unity
C            OPP, Lemma 2.5.3.
            alpha1 = 2.d0*max((f0 - fi), 10.d0*epsilon)
            if(alpha1.ge.-fprimei) then
              alpha1 = 1.d0
            else
              alpha1 = alpha1/(-fprimei)
            endif
            return
          endif
          aip = alphai
          if(dalpha*fprimei.ge.0.d0) then
            bip = ai
            fbi = fai
          else
            bip = bi
C            fbi unchanged
          endif
          fai = fi
          fprimeai = fprimei
        endif
        ai = aip
        bi = bip
      enddo
      end

      subroutine bfgs_iterate(ffunction, dfsubroutine,
     &  fbar, epsilon, n, p, alpha1, x, f, gradient, dx)
C      do one iteration of the BFGS minimization algorithm consisting
C      of a line search plus BFGS update.
C      passed routine names:
C      ffunction is the name of the real*8 function ffunction(n, x) that
C        provides the function value.
C      dfsubroutine is the name of the subroutine dfsubroutine(n, x, gradient)
C        that provides the gradient.

C      input quantities:
C      fbar is a user-specified minimum possible f used as per
C        PMOO, 2.6.1 to control the bracketing step size.  If actual f
C        values are <= fbar, then this routine stops with an error so
C        be realistic in how you specify fbar making it small enough
C        to avoid the error, but large enough to provide some control
C        over the maximum size of the bracketing step.
C      epsilon is the user-specified roundoff error tolerance on f
C        discussed on p. 38 of PMOO.  This quantity is not used to test
C        for convergence, but if the user uses a convergence test on f, it
C        should be larger than epsilon to avoid "lack of
C        convergence" error messages.
C      p(n) is the unscaled line search direction.  The PMOO scaled s vector
C        is defined by
C        s = -(dir/pnorm)*p
C        where dir = 1 if p has an acute angle with the input gradient and
C        dir = -1 otherwise.  pnorm is the norm of p.  dir and pnorm are
C        calculated internally.

C      input and output quantities:
C      alpha1 is the initial step size used for the line search on input
C        and on output is the estimate of the initial step size for the
C        next line search.
C      x(n) is the starting point of the line search on input and on output
C        is the ending point of the line search.
C      N.B. both f and gradient must be precalculated on input and calculated
C        on output.  That is:
C      f is f(x(n)) on both input and output.
C      gradient(n) is the gradient(x(n)) on both input and output.

C      output quantity:
C      dx(n) is the vector of differences between the initial x and final x.
      implicit none
      external ffunction, dfsubroutine
      include 'bfgs.h'
      integer*4 n
      real*8 ffunction, fbar, epsilon,
     &  p(n), alpha1, x(n), f, gradient(n), dx(n)
      real*8
     &  x0(nmax_bfgs), dx0(nmax_bfgs),
     &  g0(nmax_bfgs), dg0(nmax_bfgs),
     &  ddot, dnrm2, dxg, dgg, dxdg, dgnorm, b, a
      
C      sanity check
      if(n.gt.nmax_bfgs) stop 'bfgs_iterate: n too large'

C      save old values.
      call dcopy(n, x, 1, x0, 1)
      call dcopy(n, gradient, 1, g0, 1)
      
      call bfgs_linesearch(ffunction, dfsubroutine,
     &  fbar, epsilon, n, p, alpha1, x, f, gradient, dx)
      
C      This is the BFGS update:
C      p' = g1 - A dx - B dg
C      A = - (1+ dg.dg/dx.dg) B + dg.g/dx.dg
C      B = dx.g/dx.dg

C      dx0 = x - x0
      call dcopy(n, x, 1, dx0, 1)
      call daxpy(n, -1.d0, x0, 1, dx0, 1)

C        dg0 = gradient - g0
      call dcopy(n, gradient, 1, dg0, 1)
      call daxpy(n, -1.d0, g0, 1, dg0, 1)

      dxg = ddot(n, dx0, 1, gradient, 1)
      dgg = ddot(n, dg0, 1, gradient, 1)
      dxdg = ddot(n, dx0, 1, dg0, 1)

      dgnorm = dnrm2(n, dg0, 1)

C      B = dx.g/dx.dg
      b = dxg/dxdg

C      A = - (1+ dg.dg/dx.dg) B + dg.g/dx.dg
      a = -(1.d0 + dgnorm*dgnorm/dxdg)*b + dgg/dxdg

C      p' = g1 - A dx - B dg
      call dcopy(n, gradient, 1, p, 1)
      call daxpy(n, -a, dx0, 1, p, 1)
      call daxpy(n, -b, dg0, 1, p, 1)

      end