optimize_lib.f90.erb

<%
   RKINDS = [32, 64]
   ORDERS = [0, 1]

   def s_or_d(k)
      if k == 64
         'd'
      elsif k == 32
         's'
      else
         raise "Unsupported kind: #{k}"
      end
   end
%>
#include "fortran_lib.h"
module optimize_lib
   USE_FORTRAN_LIB_H
   use, intrinsic:: iso_fortran_env, only: INPUT_UNIT, OUTPUT_UNIT, ERROR_UNIT
   use, intrinsic:: iso_fortran_env, only: int32, int64, real32, real64
   use, non_intrinsic:: constant_lib, only: get_infinity
   use, non_intrinsic:: array_lib, only: l2_norm, iota
   use, non_intrinsic:: comparable_lib, only: almost_equal

   implicit none

   private
   public:: nnls
   public:: update, init
   ! for test
   public:: combination
   public:: combinations


   <% RKINDS.each{|k| %>
      type, public:: BoundNewtonState<%= k %>
         Logical:: is_convex = .false.
         Logical:: is_within = .false.
         Logical:: is_saddle_or_peak = .false.
         Logical:: is_at_corner = .false.
         Logical:: is_line_search = .false.
         Integer(kind=int64):: dim
         Integer(kind=int64):: iter = 0
         Real(kind=real<%= k %>):: base_step_size = -1
         Real(kind=real<%= k %>), allocatable:: x(:)
         Real(kind=real<%= k %>), allocatable:: x_prev(:)
         Real(kind=real<%= k %>):: f_prev
         Real(kind=real<%= k %>), allocatable:: g_prev(:)
         Real(kind=real<%= k %>), allocatable:: lower(:)
         Real(kind=real<%= k %>), allocatable:: upper(:)
         Logical, allocatable:: on_lower(:)
         Logical, allocatable:: on_upper(:)
         Logical, allocatable:: on_lower_prev(:)
         Logical, allocatable:: on_upper_prev(:)
      end type BoundNewtonState<%= k %>

      type, public:: NewtonState<%= k %>
         Logical:: is_convex = .false.
         Logical:: is_within = .false.
         Logical:: is_saddle_or_peak = .false.
         Logical:: is_line_search = .false.
         Integer(kind=int64):: dim
         Integer(kind=int64):: iter = 0
         Real(kind=real<%= k %>):: base_step_size = -1
         Real(kind=real<%= k %>), allocatable:: x(:)
         Real(kind=real<%= k %>), allocatable:: x_prev(:)
         Real(kind=real<%= k %>):: f_prev
         Real(kind=real<%= k %>), allocatable:: g_prev(:)
      end type NewtonState<%= k %>

      type, public:: LineSearchState<%= k %>_0
         Logical:: is_convex = .false.
         Logical:: is_within = .false.
         Integer(kind=int64):: iter = 0
         Real(kind=real<%= k %>):: dx
         Real(kind=real<%= k %>):: x
         Real(kind=real<%= k %>):: xl
         Real(kind=real<%= k %>):: xr
         Real(kind=real<%= k %>):: fl
         Real(kind=real<%= k %>):: fr
      end type LineSearchState<%= k %>_0

      type, public:: LineSearchState<%= k %>_1
         Logical:: is_convex = .false.
         Logical:: is_within = .false.
         Integer(kind=int64):: iter = 0
         Real(kind=real<%= k %>):: dx
         Real(kind=real<%= k %>):: x
         Real(kind=real<%= k %>):: x_best
         Real(kind=real<%= k %>):: f_best
         Real(kind=real<%= k %>):: g_best
      end type LineSearchState<%= k %>_1
   <% } %>


   <% RKINDS.each{|k| %>
      interface update
         module procedure updateNewtonState<%= k %>
         module procedure updateNewtonState_<%= k %>
         module procedure updateBoundNewtonState<%= k %>
         module procedure updateBoundNewtonState_<%= k %>
      end interface update

      interface update_
         module procedure update_NewtonState<%= k %>
         module procedure update_BoundNewtonState<%= k %>
      end interface update_

      interface restrict_step_size
         module procedure restrict_step_size<%= k %>
      end interface restrict_step_size

      interface init
         module procedure initNewtonState<%= k %>
         module procedure initBoundNewtonState<%= k %>
      end interface init

      interface adaptive_steepest_descent
         module procedure adaptive_steepest_descent<%= k %>
      end interface adaptive_steepest_descent

      interface newton
         module procedure newton<%= k %>
      end interface newton

      interface syevd
         module procedure syevd<%= k %>
      end interface syevd

      <% ORDERS.each{|order| %>
         interface update
            module procedure updateLineSearchState<%= k %>_<%= order %>
         end interface update

         interface update_
            module procedure update_LineSearchState<%= k %>_<%= order %>
         end interface update_

         interface init
            module procedure initLineSearchState<%= k %>_<%= order %>
         end interface init

         interface line_search_interpolate
            module procedure line_search_interpolate<%= k %>_<%= order %>
         end interface line_search_interpolate
      <% } %>

      interface nnls
         module procedure nnls<%= k %>
      end interface nnls
   <% } %>

contains

   <% RKINDS.each{|k| %>
      <% sd = s_or_d(k) %>

      ! Newton

      subroutine updateNewtonState<%= k %>(s, f, g, H, uplo)
         ! call init(s, x0, r)
         ! do
         !    call update(s, f(s%x), g(s%x), H(s%x), 'u')
         ! end do
         Real(kind=real<%= k %>), parameter:: max_scale = 2
         type(NewtonState<%= k %>), intent(inout):: s
         Real(kind=real<%= k %>), intent(in):: f, g(s%dim), H(s%dim, s%dim)
         Character(len=1), intent(in):: uplo


         DEBUG_ASSERT(s%base_step_size >= 0)
         call update(s, f, g, H, uplo, max_scale)
      end subroutine updateNewtonState<%= k %>

      subroutine updateNewtonState_<%= k %>(s, f, g, H, uplo, max_scale)
         Real(kind=real<%= k %>), parameter:: zero = 0
         type(NewtonState<%= k %>), intent(inout):: s
         Real(kind=real<%= k %>), intent(in):: f, g(s%dim), H(s%dim, s%dim)
         Character(len=1), intent(in):: uplo
         Real(kind=real<%= k %>), intent(in):: max_scale
         Real(kind=real<%= k %>):: r, r_new, dx(s%dim), dx_new(s%dim)


         if(s%iter < 1)then
            ERROR('`NewtonState<%= k %>` should be `init`ialized before `update`d')
         end if
         s%iter = s%iter + 1

         dx(:) = s%x - s%x_prev
         s%is_line_search = f >= s%f_prev ! decreasing condition
         if(s%is_line_search)then
            DEBUG_PRINT('Line Search')
            r = norm2(dx)
            call line_search_interpolate(zero, s%f_prev, dot_product(dx, s%g_prev)/r, r, f, r_new, s%is_convex)

            ASSERT(s%is_convex)
            DEBUG_PRINT_VARIABLE(r)
            DEBUG_PRINT_VARIABLE(r_new)
            s%is_within = .true.
            call update_base_step_size<%= k %>(s%base_step_size, r_new, max_scale)
            s%x(:) = s%x_prev + r_new/r*dx
            return
         end if

         call update_adaptive_steepest_descent<%= k %>(g, H, uplo, s%is_convex, s%is_saddle_or_peak, s%is_within, dx_new)
         ! call update_newton<%= k %>(g, H, uplo, s%is_convex, s%is_saddle_or_peak, s%is_within, dx_new)
         ! restrict step size based on s%base_step_size
         call restrict_step_size(dx_new, s%base_step_size/norm2(dx)*dx, max_scale, s%is_within)
         call update_base_step_size<%= k %>(s%base_step_size, norm2(dx_new), max_scale)
         call update_(s, dx_new, f, g)
      end subroutine updateNewtonState_<%= k %>


      subroutine updateBoundNewtonState<%= k %>(s, f, g, H, uplo)
         ! call init(s, x0, r, lower, upper)
         ! do
         !    call update(s, f(s%x), g(s%x), H(s%x), 'u')
         ! end do
         Real(kind=real<%= k %>), parameter:: max_scale = 2
         type(BoundNewtonState<%= k %>), intent(inout):: s
         Real(kind=real<%= k %>), intent(in):: f, g(s%dim), H(s%dim, s%dim)
         Character(len=1), intent(in):: uplo

         DEBUG_ASSERT(s%base_step_size >= 0)
         call update(s, f, g, H, uplo, max_scale)
      end subroutine updateBoundNewtonState<%= k %>


      subroutine updateBoundNewtonState_<%= k %>(s, f, g, H, uplo, max_scale)
         ! todo: refactoring
         Real(kind=real<%= k %>), parameter:: zero = 0
         type(BoundNewtonState<%= k %>), intent(inout):: s
         Real(kind=real<%= k %>), intent(in):: f, g(s%dim), H(s%dim, s%dim)
         Character(len=1), intent(in):: uplo
         Real(kind=real<%= k %>), intent(in):: max_scale
         Real(kind=real<%= k %>):: r, r_new, dx(s%dim)
         Real(kind=real<%= k %>), allocatable:: dx_new(:)

         Integer(kind=int64):: one_to_dim(s%dim)
         Integer(kind=int64), allocatable:: bound_indices(:), bound_indices_list(:, :)
         Integer(kind=int64):: i_bound_indices_list, n_active_bounds
         Real(kind=real<%= k %>), allocatable:: g_(:), H_(:, :)
         Real(kind=real<%= k %>):: c
         Logical:: active_bounds(s%dim)
         Logical:: on_bound(s%dim)


         ASSERT(all(s%lower <= s%x) .and. all(s%x <= s%upper))
         ASSERT(.not.any(s%on_lower .and. s%on_upper))
         ASSERT(all(pack(almost_equal(s%x, s%lower), s%on_lower)))
         ASSERT(all(pack(almost_equal(s%x, s%upper), s%on_upper)))
         if(s%iter < 1)then
            ERROR('`BoundNewtonState<%= k %>` should be `init`ialized before `update`d')
         end if
         s%iter = s%iter + 1

         dx = s%x - s%x_prev
         s%is_line_search = f >= s%f_prev ! decreasing condition
         if(s%is_line_search)then
            r = norm2(dx)
            call line_search_interpolate(zero, s%f_prev, dot_product(dx, s%g_prev)/r, r, f, r_new, s%is_convex)

            ASSERT(s%is_convex)
            DEBUG_PRINT_VARIABLE(r)
            DEBUG_PRINT_VARIABLE(r_new)
            s%is_within = .true.
            c = r_new/r
            ASSERT(c < 1)
            s%on_lower = s%on_lower .and. s%on_lower_prev
            s%on_upper = s%on_upper .and. s%on_upper_prev
            s%x(:) = s%x_prev + c*dx
            call update_base_step_size<%= k %>(s%base_step_size, r_new, max_scale)
            call adjust_active_bounds<%= k %>(s)
            return
         end if

         one_to_dim = iota(1, s%dim)
         active_bounds = .false.
         allocate(dx_new(s%dim))
         call update_adaptive_steepest_descent<%= k %>(g, H, uplo, s%is_convex, s%is_saddle_or_peak, s%is_within, dx_new)
         ! call update_newton<%= k %>(g, H, uplo, s%is_convex, s%is_saddle_or_peak, s%is_within, dx_new)
         if(any(still_on_bound<%= k %>(s%lower, s%upper, s%on_lower, s%on_upper, .false., s%x + dx_new)))then
            on_bound(:) = s%on_lower .or. s%on_upper
            bound_indices = pack(one_to_dim, on_bound)
            ! increase number of active bounds
            n_active_bounds_loop: do n_active_bounds = 1, count(on_bound, kind=kind(n_active_bounds))
               if(n_active_bounds == s%dim)then
                  ENSURE_DEALLOCATED(dx_new)
                  allocate(dx_new(s%dim))
                  dx_new = 0
                  s%is_at_corner = .true.
                  call update_(s, dx_new, f, g)
                  return
               end if
               bound_indices_list = combinations(bound_indices, n_active_bounds)
               ! Try all combinations where `n_active_bounds` bounds are active
               do i_bound_indices_list = 1, size(bound_indices_list, 2, kind=kind(i_bound_indices_list))
                  active_bounds = mask_indices(bound_indices_list(:, i_bound_indices_list), s%dim)
                  call pack_active_bounds<%= k %>(g, H, active_bounds, g_, H_)
                  deallocate(dx_new)
                  allocate(dx_new(s%dim - count(active_bounds)))
                  call update_adaptive_steepest_descent<%= k %>(g_, H_, uplo, s%is_convex, s%is_saddle_or_peak, s%is_within, dx_new)
                  ! call update_newton<%= k %>(g_, H_, uplo, s%is_convex, s%is_saddle_or_peak, s%is_within, dx_new)
                  call unpack_active_bounds<%= k %>(dx_new, active_bounds)
                  ! todo: choose active bounds based on expected reduction
                  if(.not.any(still_on_bound<%= k %>(s%lower, s%upper, s%on_lower, s%on_upper, active_bounds, s%x + dx_new))) exit n_active_bounds_loop
               end do
            end do n_active_bounds_loop
         end if
         ! restrict step size based on s%base_step_size
         call restrict_step_size(dx_new, s%base_step_size/norm2(dx)*dx, max_scale, s%is_within)
         s%on_lower_prev = s%on_lower
         s%on_upper_prev = s%on_upper
         call bound_constraint<%= k %>(dx_new, s%x, s%lower, s%upper, s%on_lower, s%on_upper, active_bounds)
         call update_base_step_size<%= k %>(s%base_step_size, norm2(dx_new), max_scale)
         call update_(s, dx_new, f, g)
      end subroutine updateBoundNewtonState_<%= k %>


      subroutine bound_constraint<%= k %>(dx, x, lower, upper, on_lower, on_upper, active_bounds)
         Real(kind=real<%= k %>), intent(inout):: dx(:)
         Real(kind=kind(dx)), intent(in):: x(size64(dx)), lower(size64(dx)), upper(size64(dx))
         Logical, intent(inout):: on_lower(size64(lower)), on_upper(size64(upper))
         Logical, intent(in):: active_bounds(size64(dx))
         Logical:: on_lower_new(size64(on_lower)), on_upper_new(size64(on_upper))
         ! Multiple variables can reach bounds in the same iteration.
         ! However, `c == c_min` may not hold due to rounding error.
         ! In such cases, `c` (hopefully) becomes 0 at the next iteration.
         ! We should treat those "missed" bounds separately.
         Logical:: on_lower_missed(size64(on_lower)), on_upper_missed(size64(on_upper))
         Real(kind=kind(dx)):: c, c_min, xi, xi_new, dxi, x_new(size64(x))
         Integer(kind=int64):: i, n

         ASSERT(.not.any((.not.(on_lower .or. on_upper)) .and. active_bounds))
         n = size(dx, kind=kind(n))
         on_lower = on_lower .and. active_bounds
         on_upper = on_upper .and. active_bounds
         c = 1
         c_min = c
         on_lower_new = .false.
         on_upper_new = .false.
         on_lower_missed = .false.
         on_upper_missed = .false.
         do i = 1, n
            if(on_lower(i) .or. on_upper(i)) cycle
            xi = x(i)
            dxi = dx(i)
            xi_new = xi + dxi
            if(xi_new <= lower(i))then
               c = (lower(i) - xi)/dxi
               if(c <= 0)then
                  on_lower_missed(i) = .true.
               else if(c < c_min)then
                  on_lower_new = .false.
                  on_upper_new = .false.
                  on_lower_new(i) = .true.
                  c_min = c
               else if(c == c_min)then
                  on_lower_new(i) = .true.
               end if
            else if(xi_new >= upper(i))then
               c = (upper(i) - xi)/dxi
               if(c <= 0)then
                  on_upper_missed(i) = .true.
               else if(c < c_min)then
                  on_lower_new = .false.
                  on_upper_new = .false.
                  on_upper_new(i) = .true.
                  c_min = c
               else if(c == c_min)then
                  on_upper_new(i) = .true.
               end if
            end if
         end do
         ASSERT(c_min > 0)
         dx = c_min*dx
         x_new = x + dx
         on_lower = on_lower .or. on_lower_new .or. on_lower_missed .or. x_new <= lower
         on_upper = on_upper .or. on_upper_new .or. on_upper_missed .or. x_new >= upper
      end subroutine bound_constraint<%= k %>


      subroutine unpack_active_bounds<%= k %>(dx_new, active_bounds)
         Real(kind=real<%= k %>), allocatable, intent(inout):: dx_new(:)
         Logical, intent(in):: active_bounds(:)
         Real(kind=kind(dx_new)):: buf(size64(dx_new))
         Integer(kind=int64):: i, ii, n

         buf = dx_new
         n = size(active_bounds, kind=kind(n))
         deallocate(dx_new)
         allocate(dx_new(n))
         ii = 1
         do i = 1, n
            if(active_bounds(i))then
               dx_new(i) = 0
            else
               dx_new(i) = buf(ii)
               ii = ii + 1
            end if
         end do
      end subroutine unpack_active_bounds<%= k %>


      subroutine pack_active_bounds<%= k %>(g, H, active_bounds, g_, H_)
         Real(kind=real<%= k %>), intent(in):: g(:), H(size64(g), size64(g))
         Logical, intent(in):: active_bounds(size64(g))
         Real(kind=real<%= k %>), allocatable, intent(inout):: g_(:), H_(:, :)
         Integer(kind=int64):: j, jj, n, m

         n = size(g, kind=kind(n))
         m = n - count(active_bounds, kind=kind(n))
         g_ = pack(g, .not.active_bounds)
         ENSURE_DEALLOCATED(H_)
         allocate(H_(m, m))
         jj = 1
         do j = 1, n
            if(active_bounds(j)) cycle
            H_(:, jj) = pack(H(:, j), .not.active_bounds)
            jj = jj + 1
         end do
         ASSERT(jj == m + 1)
      end subroutine pack_active_bounds<%= k %>


      elemental function still_on_bound<%= k %>(lower, upper, on_lower, on_upper, ignore, x) result(ret)
         Real(kind=real<%= k %>), intent(in):: lower, upper, x
         Logical, intent(in):: on_lower, on_upper, ignore
         Logical:: ret

         ret = (.not. ignore) .and. ((on_lower .and. x <= lower) .or. (on_upper .and. x >= upper))
      end function still_on_bound<%= k %>


      subroutine update_adaptive_steepest_descent<%= k %>(g, H, uplo, is_convex, is_saddle_or_peak, is_within, dx)
         Real(kind=real<%= k %>), intent(in):: g(:), H(size64(g), size64(g))
         Character(len=1), intent(in):: uplo
         Logical, intent(out):: is_convex, is_saddle_or_peak, is_within
         Real(kind=real<%= k %>), intent(out):: dx(size64(g))

         call adaptive_steepest_descent(uplo, g, H, dx, is_convex, is_saddle_or_peak)
         is_within = is_convex
      end subroutine update_adaptive_steepest_descent<%= k %>


      subroutine update_newton<%= k %>(g, H, uplo, is_convex, is_saddle_or_peak, is_within, dx)
         Real(kind=real<%= k %>), intent(in):: g(:), H(size64(g), size64(g))
         Character(len=1), intent(in):: uplo
         Logical, intent(out):: is_convex, is_saddle_or_peak, is_within
         Real(kind=real<%= k %>), intent(out):: dx(size64(g))

         call newton(uplo, g, H, dx, is_convex)
         is_within = .true.
         if(is_convex) return

         is_saddle_or_peak = norm2(g) <= 0
         if(is_saddle_or_peak)then
            DEBUG_WARN('Saddle or peak')
            dx = 0
         end if

         dx = -g/2
         is_within = .false.
      end subroutine update_newton<%= k %>


      subroutine update_NewtonState<%= k %>(s, dx_new, f, g)
         type(NewtonState<%= k %>), intent(inout):: s
         Real(kind=real<%= k %>), intent(in):: dx_new(s%dim), f, g(s%dim)

         s%x_prev(:) = s%x
         s%x(:) = s%x_prev + dx_new
         s%f_prev = f
         s%g_prev(:) = g
      end subroutine update_NewtonState<%= k %>

      subroutine update_BoundNewtonState<%= k %>(s, dx_new, f, g)
         type(BoundNewtonState<%= k %>), intent(inout):: s
         Real(kind=real<%= k %>), intent(in):: dx_new(s%dim), f, g(s%dim)

         s%x_prev(:) = s%x
         s%x(:) = s%x_prev + dx_new
         call adjust_active_bounds<%= k %>(s)
         s%f_prev = f
         s%g_prev(:) = g
      end subroutine update_BoundNewtonState<%= k %>

      subroutine update_base_step_size<%= k %>(base_step_size, r_new, max_scale)
         Real(kind=real<%= k %>), intent(inout):: base_step_size
         Real(kind=real<%= k %>), intent(in):: r_new, max_scale

         base_step_size = max(base_step_size/(2*max_scale), r_new)
      end subroutine update_base_step_size<%= k %>

      subroutine adjust_active_bounds<%= k %>(s)
         type(BoundNewtonState<%= k %>), intent(inout):: s

         where(s%on_lower)
            s%x = s%lower
         elsewhere(s%on_upper)
            s%x = s%upper
         end where
      end subroutine adjust_active_bounds<%= k %>

      subroutine restrict_step_size<%= k %>(dx, dx_prev, max_scale, is_within)
         Real(kind=real<%= k %>), intent(inout):: dx(:)
         Real(kind=real<%= k %>), intent(in):: dx_prev(size64(dx))
         Real(kind=real<%= k %>), intent(in):: max_scale
         Logical, intent(out):: is_within
         Real(kind=real<%= k %>):: r, r_prev, c

         r_prev = norm2(dx_prev)
         r = norm2(dx)
         if(r > max_scale*r_prev)then
            dx(:) = max_scale*r_prev/r*dx
            is_within = .false.
         end if
         c = dot_product(dx_prev, dx)/r_prev
         if(c < -r_prev/2)then
            dx(:) = -r_prev/(2*c)*dx
            is_within = .false.
         end if
      end subroutine restrict_step_size<%= k %>


      subroutine initNewtonState<%= k %>(s, x0, r)
         type(NewtonState<%= k %>), intent(out):: s
         Real(kind=real<%= k %>), intent(in):: x0(:) ! initial solution
         Real(kind=real<%= k %>), intent(in):: r ! initial step length

         ASSERT(r > 0)
         s%iter = 1
         s%dim = size(x0, kind=kind(s%dim))
         s%base_step_size = r
         s%x = x0
         s%x_prev = s%x - sqrt(r/s%dim)
         s%f_prev = get_infinity()
         allocate(s%g_prev(s%dim))
         s%g_prev(:) = 0
      end subroutine initNewtonState<%= k %>


      subroutine initBoundNewtonState<%= k %>(s, x0, r, lower, upper)
         type(BoundNewtonState<%= k %>), intent(out):: s
         Real(kind=real<%= k %>), intent(in):: x0(:) ! initial solution
         Real(kind=real<%= k %>), intent(in):: r ! initial step length
         Real(kind=real<%= k %>), intent(in):: lower(size64(x0))
         Real(kind=real<%= k %>), intent(in):: upper(size64(x0))

         ASSERT(r > 0)
         ASSERT(all(lower < upper))
         ASSERT(all(lower <= x0) .and. all(x0 <= upper))
         s%iter = 1
         s%dim = size(x0, kind=kind(s%dim))
         s%base_step_size = r
         s%x = x0
         s%x_prev = s%x - r/sqrt(real(s%dim, kind=kind(r)))
         s%lower = lower
         s%upper = upper
         s%on_lower = s%x <= s%lower
         s%on_upper = s%upper <= s%x
         allocate(s%on_lower_prev(s%dim))
         allocate(s%on_upper_prev(s%dim))
         s%on_lower_prev(:) = .true.
         s%on_upper_prev(:) = .true.
         s%f_prev = get_infinity()

         allocate(s%g_prev(s%dim))
         s%g_prev(:) = 0
      end subroutine initBoundNewtonState<%= k %>


      subroutine adaptive_steepest_descent<%= k %>(uplo, g, H, dx, is_convex, is_saddle_or_peak)
         Integer(kind=int32), parameter:: nrhs = 1
         Character(len=1), intent(in):: uplo
         Real(kind=real<%= k %>), intent(in):: g(:), H(size64(g), size64(g))
         Real(kind=kind(g)), intent(out):: dx(size64(g))
         Logical, intent(out):: is_convex, is_saddle_or_peak
         Real(kind=kind(g)):: V(size64(g), size64(g)), e(size64(g)), dx_0(size64(g)), dx_scale(size64(g)), Vi(size64(g)), Vig
         Integer(kind=kind(nrhs)):: n, i, info

         n = size(dx, kind=kind(n))
         ASSERT(n > 0)

         V = H
         call syevd(uplo, V, e, info)
         if(info /= 0)then
            write(output_unit, *) 'H: ', H
            write(error_unit, *) 'info: ', info
            ASSERT(info == 0)
         end if
         ASSERT(info == 0)
         is_convex = e(1) > 0 ! `e` is sorted
         is_saddle_or_peak = (.not. is_convex) .and. norm2(g) <= 0
         if(is_saddle_or_peak)then
            DEBUG_WARN('Saddle or peak')
            if(e(1) < 0)then
               DEBUG_WARN('Set dx = V(:, 1) (optimum direction to escape)')
               dx = V(:, 1)
            else
               dx = 0
            end if
            return
         end if

         dx_0 = 0
         dx_scale = 0
         do i = 1, n
            Vi = V(:, i)
            Vig = dot_product(Vi, g)
            if(e(i) == 0)then
               dx_0 = dx_0 - Vig*Vi
            else
               dx_scale = dx_scale - Vig/abs(e(i))*Vi
            end if
         end do
         dx = dx_scale
         if(norm2(dx_0) > 0)then ! this branch will not happen in practice
            DEBUG_WARN("some eigen values of H are 0")
            if(norm2(dx_scale) > 0)then
               DEBUG_WARN("heuristically add gradient")
               dx = dx + norm2(dx_scale)/(2*norm2(dx_0))*dx_0 ! 2 is a heuristics
            else
               DEBUG_WARN("dx is gradient descent, since H is 0")
               dx = dx + dx_0
            end if
         end if
      end subroutine adaptive_steepest_descent<%= k %>


      subroutine newton<%= k %>(uplo, g, H, dx, is_convex)
         Integer(kind=int32), parameter:: nrhs = 1
         Character(len=1), intent(in):: uplo
         Real(kind=real<%= k %>), intent(in):: g(:), H(size64(g), size64(g))
         Real(kind=real<%= k %>), intent(out):: dx(size64(g))
         Logical, intent(out):: is_convex
         Real(kind=real<%= k %>):: A(size64(g), size64(g))
         Integer(kind=kind(nrhs)):: n, info

         n = size(dx, kind=kind(n))
         ASSERT(n > 0)

         A = H
         dx = -g
         call <%= sd %>posv(uplo, n, nrhs, A, n, dx, n, info)
         ASSERT(info >= 0)
         is_convex = info == 0
      end subroutine newton<%= k %>


      subroutine syevd<%= k %>(uplo, H, e, info)
         Character(len=1), intent(in):: uplo
         Real(kind=real<%= k %>), intent(inout):: H(:, :)
         Real(kind=real<%= k %>), intent(out):: e(:)
         Integer(kind=kind(0)), intent(out):: info
         Real(kind=real<%= k %>), allocatable:: work(:)
         Integer(kind=kind(info)), allocatable:: iwork(:)
         Integer(kind=kind(info)):: n, lwork, liwork

          n = size(H, 1, kind=kind(n))
         ASSERT(size(H, 2, kind=kind(n)) == n)
         ASSERT(size(e, kind=kind(n)) == n)
         allocate(work(1))
         allocate(iwork(1))
         lwork = -1
         liwork = -1
         call <%= sd %>syevd('v', uplo, n, H, n, e, work, lwork, iwork, liwork, info)
         lwork = nint(work(1), kind=kind(lwork))
         liwork = iwork(1)
         deallocate(work, iwork)
         allocate(work(lwork))
         allocate(iwork(liwork))
         call <%= sd %>syevd('v', uplo, n, H, n, e, work, lwork, iwork, liwork, info)
      end subroutine syevd<%= k %>


      ! derivative free

      subroutine updateLineSearchState<%= k %>_0(s, f)
         ! call init(s, dx)
         ! do
         !    call update(s, f(x0 + s%x))
         ! end do
         type(LineSearchState<%= k %>_0), intent(inout):: s
         Real(kind=real<%= k %>), intent(in):: f
         Real(kind=real<%= k %>):: x_new, x1, x2, x3, f1, f2, f3, step, xlim
         Real(kind=real<%= k %>), parameter:: enlarge = real(5, kind=kind(enlarge))/4

         if(s%iter < 1)then
            ERROR('`LineSearchState<%= k %>_0` should be `init`ialized before `update`d')
         end if
         s%iter = s%iter + 1
         if(s%iter == 2)then
            s%x = s%dx
            if(s%dx > 0)then
               s%fl = f
            else
               s%fr = f
            end if
            return
         else if(s%iter == 3)then
            if(s%dx > 0)then
               ASSERT(s%x > s%xl)
               s%xr = s%x
               s%fr = f
            else
               ASSERT(s%x < s%xr)
               s%xl = s%x
               s%fl = f
            end if
            step = enlarge*(s%xr - s%xl)
            if(s%fr < s%fl)then
               s%x = s%xr + step
            else
               s%x = s%xl - step
            end if
            return
         else
            ! converged case should be treated by caller
            ASSERT(s%x /= s%xl .and. s%x /= s%xr)
            if(s%x < s%xl)then
               x1 = s%x
               x2 = s%xl
               x3 = s%xr
               f1 = f
               f2 = s%fl
               f3 = s%fr
            else if(s%xr < s%x)then
               x1 = s%xl
               x2 = s%xr
               x3 = s%x
               f1 = s%fl
               f2 = s%fr
               f3 = f
            else ! xl < x < xr
               x1 = s%xl
               x2 = s%x
               x3 = s%xr
               f1 = s%fl
               f2 = f
               f3 = s%fr
            end if
         end if

         call line_search_interpolate(x1, x2, x3, f1, f2, f3, x_new, s%is_convex)
         step = enlarge*(x3 - x1)
         if(s%is_convex)then
            if(x_new < x1)then
               xlim = x1 - step
               s%is_within = xlim <= x_new
               if(s%is_within)then
                  s%x = x_new
               else
                  s%x = xlim
               end if
               call update_(s, x1, x2, f1, f2)
            else if(x3 < x_new)then
               xlim = x3 + step
               s%is_within = x_new <= xlim
               if(s%is_within)then
                  s%x = x_new
               else
                  s%x = xlim
               end if
               call update_(s, x2, x3, f2, f3)
            else
               s%is_within = .true.
               s%x = x_new
               if(f1 < f3)then
                  call update_(s, x1, x2, f1, f2)
               else
                  call update_(s, x2, x3, f2, f3)
               end if
            end if
         else
            s%is_within = .false.
            if(x_new < x2)then
               s%x = x1 - step
               call update_(s, x1, x2 ,f1, f2)
            else
               s%x = x3 + step
               call update_(s, x2, x3 ,f2, f3)
            end if
         end if
      end subroutine updateLineSearchState<%= k %>_0


      subroutine update_LineSearchState<%= k %>_0(s, xl, xr, fl, fr)
         type(LineSearchState<%= k %>_0), intent(inout):: s
         Real(kind=real<%= k %>), intent(in):: xl, xr, fl, fr

         s%xl = xl
         s%xr = xr
         s%fl = fl
         s%fr = fr
      end subroutine update_LineSearchState<%= k %>_0


      subroutine initLineSearchState<%= k %>_0(s, dx)
         type(LineSearchState<%= k %>_0), intent(out):: s
         Real(kind=real<%= k %>), intent(in):: dx

         ASSERT(abs(dx) > 0)
         s%iter = 1
         s%dx = dx
         if(s%dx > 0)then
            s%xl = 0
            s%xr = dx
            s%x = s%xl
         else
            s%xr = 0
            s%xl = dx
            s%x = s%xr
         end if
      end subroutine initLineSearchState<%= k %>_0


      subroutine line_search_interpolate<%= k %>_0(x1, x2, x3, f1, f2, f3, x_opt, is_convex)
         Real(kind=real<%= k %>), intent(in):: x1, x2, x3, f1, f2, f3
         Real(kind=real<%= k %>), intent(out):: x_opt
         Logical, intent(out):: is_convex
         Real(kind=real<%= k %>):: x12, x13, x23, f1x1213, f2x1223, f3x1323, a, b_neg

         x12 = x1 - x2
         x13 = x1 - x3
         x23 = x2 - x3
         f1x1213 = f1/(x12*x13)
         f2x1223 = f2/(x12*x23)
         f3x1323 = f3/(x13*x23)
         a = f1x1213 - f2x1223 + f3x1323
         is_convex = a > 0
         if(is_convex)then
            b_neg = f1x1213*(x2 + x3) - f2x1223*(x1 + x3) + f3x1323*(x1 + x2)
            x_opt = b_neg/(2*a)
         else if(f1 < f2)then
            if(f1 < f3)then
               x_opt = x1
            else
               x_opt = x3
            end if
         else if(f2 < f3)then
            x_opt = x2
         else
            x_opt = x3
         end if
      end subroutine line_search_interpolate<%= k %>_0


      ! use gradient information

      subroutine updateLineSearchState<%= k %>_1(s, f, g)
         ! call init(s, dx)
         ! do
         !    call update(s, f(x0 + s%x), grad(x0 + s%x))
         ! end do
         type(LineSearchState<%= k %>_1), intent(inout):: s
         Real(kind=real<%= k %>), intent(in):: f, g
         Real(kind=real<%= k %>):: x1, f1, g1, x2, f2, g2, step, xl, xr, x_new, xlim
         Real(kind=real<%= k %>), parameter:: enlarge = 2

         if(s%iter < 1)then
            ERROR('`LineSearchState<%= k %>_0` should be `init`ialized before `update`d')
         end if
         s%iter = s%iter + 1
         if(s%iter == 2)then
            s%x_best = s%x
            s%f_best = f
            s%g_best = g
            if(g*s%dx > 0)then
               s%x = s%x_best - s%dx
            else
               s%x = s%x_best + s%dx
            end if
            return
         end if

         if(f < s%f_best)then
            x1 = s%x
            f1 = f
            g1 = g
            x2 = s%x_best
            f2 = s%f_best
            g2 = s%g_best
         else
            x1 = s%x_best
            f1 = s%f_best
            g1 = s%g_best
            x2 = s%x
            f2 = f
            g2 = g
         end if

         if(g1 == 0)then
            call update_(s, x1, f1, g1)
            s%is_convex = .true.
            s%is_within = .true.
            s%x = x1
            return
         end if

         xl = min(x1, x2)
         xr = max(x1, x2)
         step = enlarge*(xr - xl)

         call line_search_interpolate(x1, f1, g1, x2, f2, x_new, s%is_convex)
         call update_(s, x1, f1, g1)
         if(s%is_convex)then
            if(x_new < xl)then
               xlim = xl - step
               s%is_within = xlim <= x_new
            else if(xr < x_new)then
               xlim = xr + step
               s%is_within = x_new <= xlim
            else
               s%is_within = .true.
            end if
            if(s%is_within)then
               s%x = x_new
            else
               s%x = xlim
            end if
         else
            s%is_within = .false.
            if(g1 < 0)then
               s%x = x1 + step
            else if(g1 > 0)then
               s%x = x1 - step
            end if
         end if
      end subroutine updateLineSearchState<%= k %>_1


      subroutine update_LineSearchState<%= k %>_1(s, x_best, f_best, g_best)
         type(LineSearchState<%= k %>_1), intent(inout):: s
         Real(kind=real<%= k %>), intent(in):: x_best, f_best, g_best

         s%x_best = x_best
         s%f_best = f_best
         s%g_best = g_best
      end subroutine update_LineSearchState<%= k %>_1


      subroutine initLineSearchState<%= k %>_1(s, dx)
         type(LineSearchState<%= k %>_1), intent(out):: s
         Real(kind=real<%= k %>), intent(in):: dx

         ASSERT(abs(dx) > 0)
         s%iter = 1
         s%dx = dx
         s%x = 0
         s%x_best = get_infinity()
         s%f_best = get_infinity()
         s%g_best = get_infinity()
      end subroutine initLineSearchState<%= k %>_1


      subroutine line_search_interpolate<%= k %>_1(x1, f1, g1, x2, f2, x_new, is_convex)
         Real(kind=real<%= k %>), intent(in):: x1, f1, g1, x2, f2
         Real(kind=real<%= k %>), intent(out):: x_new
         Logical, intent(out):: is_convex
         Real(kind=real<%= k %>):: x12, a

         x12 = x2 - x1
         a = ((f2 - f1) - g1*x12)/x12**2
         is_convex = a > 0
         if(is_convex)then
            x_new = x1 - g1/(2*a)
         else if(f1 < f2)then
            x_new = x1
         else
            x_new = x2
         end if
      end subroutine line_search_interpolate<%= k %>_1


      pure function nnls<%= k %>(A, b) result(ret)
         Real(kind=real<%= k %>), intent(in):: A(:, :), b(:)
         Real(kind=kind(A)), allocatable:: tAA(:, :), tAb(:), x(:), x_pre(:), g(:), ret(:)
         Real(kind=kind(A)):: d, d_pre, alpha, alpha_, l2_Ag
         Integer(kind=INT64):: i, n

         n = size(A, 2, kind=kind(n))
         tAA = matmul(transpose(A), A)
         tAb = matmul(transpose(A), b)
         allocate(x_pre(n))
         allocate(g(n))
         x_pre(:) = 0
         x = x_pre
         d_pre = get_infinity()
         do
            d = l2_norm(b - matmul(A, x))
            ! use `.not.` not to leak `NaN`.
            ! `=` here is essential in some cases.
            if(.not.(d <= d_pre))then
               ! numerical error causes `x(i) < 0` for some `i`
               ret = max(x_pre, real(0, kind=kind(x_pre)))
               return
            end if
            d_pre = d
            g(:) = tAb - matmul(tAA, x)
            alpha = get_infinity()
            do ALL_OF(i, g, 1)
               if(g(i) < 0)then
                  if(x(i) <= 0)then
                     g(i) = 0
                  else
                     alpha_ = x(i)/(-g(i))
                     if(alpha_ < alpha)then
                        if(alpha_ <= 0)then
                           g(i) = 0
                        else
                           alpha = alpha_
                        end if
                     end if
                  end if
               end if
            end do

            x_pre(:) = x
            l2_Ag = l2_norm(matmul(A, g))
            if(l2_Ag <= 0)then
               ret = max(x_pre, real(0, kind=kind(x_pre)))
               return
            end if
            x(:) = x + min(l2_norm(g)/l2_Ag, alpha)*g
         end do
      end function nnls<%= k %>
   <% } %>


   function combinations(is, n) result(ret)
      ! Assumptions:
      ! - `unique(xs) == xs`
      Integer(kind=int64), intent(in):: is(:), n
      Integer(kind=kind(is)):: ret(n, combination(size(is, kind=kind(n)), n))

      ASSERT(0 <= n .and. n <= size(is, kind=kind(n)))
      call combinations_(size(is, kind=kind(is)), is, n, ret)
   end function combinations


   pure recursive subroutine combinations_(n_is, is, n, ret)
      ! todo: optimize
      Integer(kind=int64), intent(in):: n_is, is(n_is), n
      Integer(kind=kind(is)), intent(out):: ret(:, :)
      Integer(kind=kind(is)):: m


      if(n <= 0)then
         return
      else if(n == 1)then
         ret(1, :) = is
      else if(n == n_is)then
         ret(:, 1) = is
      else
         m = combination(n_is - 1, n - 1)
         ret(1, :m) = is(1)
         call combinations_(n_is - 1, is(2:), n - 1, ret(2:, :m))
         call combinations_(n_is - 1, is(2:), n, ret(:, (m + 1):))
      end if
   end subroutine combinations_


   elemental function combination(n, k) result(ret)
      Integer(kind=int64), intent(in):: n, k
      Integer(kind=kind(n)):: ret
      Integer(kind=kind(n)):: i

      ret = 1
      do i = n - k + 1, n
         ret = ret*i
      end do
      do i = 2, k
         ret = ret/i
      end do
   end function combination


   pure function mask_indices(xs, n) result(ret)
      Integer(kind=int64), intent(in):: xs(:), n
      Logical:: ret(n)

      ret(:) = .false.
      ret(xs) = .true.
   end function mask_indices
end module optimize_lib