Upgrade ensmallen 2 10 0 (#14)

* Upgrade ensmallen to 2.10.0 * Update version and armadillo dependency * Update package description * Add NEWS.md update * Add changelog update
coatless-rpkg · Sep 7, 2019 · e6fd577 · e6fd577
1 parent 0bafbaa
commit e6fd577
Show file tree

Hide file tree

Showing 209 changed files with 10,282 additions and 4,673 deletions.
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,12 @@
+2019-09-07  James Balamuta  <balamut2@illinois.edu>
+
+	* DESCRIPTION (Version, Date): Release 2.10.0 and armadillo dependency
+
+	* NEWS.md: Update for Ensmallen release 2.10.0
+
+	* inst/include/ensmallen_bits: Upgraded to Ensmallen 2.10.0
+	* inst/include/ensmallen.hpp: ditto
+
 2019-08-09  James Balamuta  <balamut2@illinois.edu>
 
 	* DESCRIPTION (Version, Date): Release 1.16.0

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: RcppEnsmallen
 Title: Header-Only C++ Mathematical Optimization Library for 'Armadillo'
-Version: 0.1.16.0.1
+Version: 0.2.10.0.1
 Authors@R: c(
     person("James Joseph", "Balamuta", email = "balamut2@illinois.edu", 
            role = c("aut", "cre", "cph"), 
@@ -22,14 +22,14 @@ Description: 'Ensmallen' is a templated C++ mathematical optimization library
  'RcppArmadillo' (the 'Rcpp' bindings/bridge to 'Armadillo') is licensed under 
  the GNU GPL version 2 or later. Thus, 'RcppEnsmallen' is also licensed under
  similar terms. Note that 'Ensmallen' requires a compiler that supports 
- 'C++11' and 'Armadillo' 6.500 or later.
+ 'C++11' and 'Armadillo' 8.400 or later.
 Depends: R (>= 3.3.0)
 License: GPL (>= 2)
 URL: https://github.com/coatless/rcppensmallen, https://github.com/mlpack/ensmallen, http://ensmallen.org/
 BugReports: https://github.com/coatless/rcppensmallen/issues
 Encoding: UTF-8
 LazyData: true
-LinkingTo: Rcpp, RcppArmadillo
+LinkingTo: Rcpp, RcppArmadillo (>= 0.8.400.0.0)
 Imports: Rcpp
 RoxygenNote: 6.1.1
 Roxygen: list(markdown = TRUE)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,17 @@
+# RcppEnsmallen 0.2.10.0.1
+
+- Upgraded to ensmallen 2.10.0: "Fried Chicken" (2019-09-07)
+  - All `Optimize()` functions now take any matrix type; so, e.g., `arma::fmat`
+    or `arma::sp_mat` can be used for optimization.  See the documentation for
+    more details ([#113](https://github.com/mlpack/ensmallen/pull/113),
+    [#119](https://github.com/mlpack/ensmallen/pull/119)).
+  - Introduce callback support.  Callbacks can be appended as the last arguments
+    of an `Optimize()` call, and can perform custom behavior at different points
+    during the optimization.  See the documentation for more details
+    ([#119](https://github.com/mlpack/ensmallen/pull/119)).
+  - Slight speedups for `FrankWolfe` optimizer
+    ([#127](https://github.com/mlpack/ensmallen/pull/127)).
+
 # RcppEnsmallen 0.1.16.0.1
 
 - Upgraded to ensmallen release 1.16.0 "Loud Alarm Clock" (2019-08-09)

diff --git a/inst/include/ensmallen.hpp b/inst/include/ensmallen.hpp
@@ -59,6 +59,17 @@
 #include "ensmallen_bits/ens_version.hpp"
 #include "ensmallen_bits/log.hpp" // TODO: should move to another place
 
+#include "ensmallen_bits/utility/any.hpp"
+#include "ensmallen_bits/utility/arma_traits.hpp"
+
+// Callbacks.
+#include "ensmallen_bits/callbacks/callbacks.hpp"
+#include "ensmallen_bits/callbacks/early_stop_at_min_loss.hpp"
+#include "ensmallen_bits/callbacks/print_loss.hpp"
+#include "ensmallen_bits/callbacks/progress_bar.hpp"
+#include "ensmallen_bits/callbacks/store_best_coordinates.hpp"
+#include "ensmallen_bits/callbacks/timer_stop.hpp"
+
 #include "ensmallen_bits/problems/problems.hpp" // TODO: should move to another place
 
 #include "ensmallen_bits/ada_delta/ada_delta.hpp"

diff --git a/inst/include/ensmallen_bits/ada_delta/ada_delta.hpp b/inst/include/ensmallen_bits/ada_delta/ada_delta.hpp
@@ -85,15 +85,40 @@ class AdaDelta
    * objective value is returned. The DecomposableFunctionType is checked for
    * API consistency at compile time.
    *
-   * @tparam DecomposableFunctionType Type of the function to optimize.
+   * @tparam DecomposableFunctionType Type of the function to be optimized.
+   * @tparam MatType Type of matrix to optimize with.
+   * @tparam GradType Type of matrix to use to represent function gradients.
+   * @tparam CallbackTypes Types of callback functions.
    * @param function Function to optimize.
    * @param iterate Starting point (will be modified).
+   * @param callbacks Callback functions.
    * @return Objective value of the final point.
    */
-  template<typename DecomposableFunctionType>
-  double Optimize(DecomposableFunctionType& function, arma::mat& iterate)
+  template<typename DecomposableFunctionType,
+           typename MatType,
+           typename GradType,
+           typename... CallbackTypes>
+  typename std::enable_if<IsArmaType<GradType>::value,
+      typename MatType::elem_type>::type
+  Optimize(DecomposableFunctionType& function,
+           MatType& iterate,
+           CallbackTypes&&... callbacks)
   {
-    return optimizer.Optimize(function, iterate);
+    return optimizer.Optimize<DecomposableFunctionType, MatType, GradType,
+        CallbackTypes...>(function, iterate, callbacks...);
+  }
+
+  //! Forward the MatType as GradType.
+  template<typename DecomposableFunctionType,
+           typename MatType,
+           typename... CallbackTypes>
+  typename MatType::elem_type Optimize(DecomposableFunctionType& function,
+                                       MatType& iterate,
+                                       CallbackTypes&&... callbacks)
+  {
+    return Optimize<DecomposableFunctionType, MatType, MatType,
+        CallbackTypes...>(function, iterate,
+        std::forward<CallbackTypes>(callbacks)...);
   }
 
   //! Get the step size.

diff --git a/inst/include/ensmallen_bits/ada_delta/ada_delta_update.hpp b/inst/include/ensmallen_bits/ada_delta/ada_delta_update.hpp
@@ -51,49 +51,6 @@ class AdaDeltaUpdate
     // Nothing to do.
   }
 
-  /**
-   * The Initialize method is called by SGD Optimizer method before the start of
-   * the iteration update process. In AdaDelta update policy, the mean squared
-   * and the delta mean squared gradient matrices are initialized to the zeros
-   * matrix with the same size as gradient matrix (see ens::SGD<>).
-   *
-   * @param rows Number of rows in the gradient matrix.
-   * @param cols Number of columns in the gradient matrix.
-   */
-  void Initialize(const size_t rows, const size_t cols)
-  {
-    // Initialize empty matrices for mean sum of squares of parameter gradient.
-    meanSquaredGradient = arma::zeros<arma::mat>(rows, cols);
-    meanSquaredGradientDx = arma::zeros<arma::mat>(rows, cols);
-  }
-
-  /**
-   * Update step for SGD. The AdaDelta update dynamically adapts over time using
-   * only first order information. Additionally, AdaDelta requires no manual
-   * tuning of a learning rate.
-   *
-   * @param iterate Parameters that minimize the function.
-   * @param stepSize Step size to be used for the given iteration.
-   * @param gradient The gradient matrix.
-   */
-  void Update(arma::mat& iterate,
-              const double stepSize,
-              const arma::mat& gradient)
-  {
-    // Accumulate gradient.
-    meanSquaredGradient *= rho;
-    meanSquaredGradient += (1 - rho) * (gradient % gradient);
-    arma::mat dx = arma::sqrt((meanSquaredGradientDx + epsilon) /
-        (meanSquaredGradient + epsilon)) % gradient;
-
-    // Accumulate updates.
-    meanSquaredGradientDx *= rho;
-    meanSquaredGradientDx += (1 - rho) * (dx % dx);
-
-    // Apply update.
-    iterate -= (stepSize * dx);
-  }
-
   //! Get the smoothing parameter.
   double Rho() const { return rho; }
   //! Modify the smoothing parameter.
@@ -104,18 +61,77 @@ class AdaDeltaUpdate
   //! Modify the value used to initialise the mean squared gradient parameter.
   double& Epsilon() { return epsilon; }
 
+  /**
+   * The UpdatePolicyType policy classes must contain an internal 'Policy'
+   * template class with two template arguments: MatType and GradType.  This is
+   * instantiated at the start of the optimization, and holds parameters
+   * specific to an individual optimization.
+   */
+  template<typename MatType, typename GradType>
+  class Policy
+  {
+   public:
+    /**
+     * This constructor is called by the SGD optimizer method before the start
+     * of the iteration update process. In AdaDelta update policy, the mean
+     * squared and the delta mean squared gradient matrices are initialized to
+     * the zeros matrix with the same size as gradient matrix (see ens::SGD<>).
+     *
+     * @param parent AdaDeltaUpdate object.
+     * @param rows Number of rows in the gradient matrix.
+     * @param cols Number of columns in the gradient matrix.
+     */
+    Policy(AdaDeltaUpdate& parent, const size_t rows, const size_t cols) :
+        parent(parent)
+    {
+      meanSquaredGradient.zeros(rows, cols);
+      meanSquaredGradientDx.zeros(rows, cols);
+    }
+
+    /**
+     * Update step for SGD. The AdaDelta update dynamically adapts over time
+     * using only first order information. Additionally, AdaDelta requires no
+     * manual tuning of a learning rate.
+     *
+     * @param iterate Parameters that minimize the function.
+     * @param stepSize Step size to be used for the given iteration.
+     * @param gradient The gradient matrix.
+     */
+    void Update(MatType& iterate,
+                const double stepSize,
+                const GradType& gradient)
+    {
+      // Accumulate gradient.
+      meanSquaredGradient *= parent.rho;
+      meanSquaredGradient += (1 - parent.rho) * (gradient % gradient);
+      GradType dx = arma::sqrt((meanSquaredGradientDx + parent.epsilon) /
+          (meanSquaredGradient + parent.epsilon)) % gradient;
+
+      // Accumulate updates.
+      meanSquaredGradientDx *= parent.rho;
+      meanSquaredGradientDx += (1 - parent.rho) * (dx % dx);
+
+      // Apply update.
+      iterate -= (stepSize * dx);
+    }
+
+   private:
+    // The instantiated parent class.
+    AdaDeltaUpdate& parent;
+
+    // The mean squared gradient matrix.
+    GradType meanSquaredGradient;
+
+    // The delta mean squared gradient matrix.
+    GradType meanSquaredGradientDx;
+  };
+
  private:
   // The smoothing parameter.
   double rho;
 
   // The epsilon value used to initialise the mean squared gradient parameter.
   double epsilon;
-
-  // The mean squared gradient matrix.
-  arma::mat meanSquaredGradient;
-
-  // The delta mean squared gradient matrix.
-  arma::mat meanSquaredGradientDx;
 };
 
 } // namespace ens

diff --git a/inst/include/ensmallen_bits/ada_grad/ada_grad.hpp b/inst/include/ensmallen_bits/ada_grad/ada_grad.hpp
@@ -81,15 +81,40 @@ class AdaGrad
    * be modified to store the finishing point of the algorithm, and the final
    * objective value is returned.
    *
-   * @tparam DecomposableFunctionType Type of the function to optimize.
+   * @tparam DecomposableFunctionType Type of the function to be optimized.
+   * @tparam MatType Type of matrix to optimize with.
+   * @tparam GradType Type of matrix to use to represent function gradients.
+   * @tparam CallbackTypes Types of callback functions.
    * @param function Function to optimize.
    * @param iterate Starting point (will be modified).
+   * @param callbacks Callback functions.
    * @return Objective value of the final point.
    */
-  template<typename DecomposableFunctionType>
-  double Optimize(DecomposableFunctionType& function, arma::mat& iterate)
+  template<typename DecomposableFunctionType,
+           typename MatType,
+           typename GradType,
+           typename... CallbackTypes>
+  typename std::enable_if<IsArmaType<GradType>::value,
+      typename MatType::elem_type>::type
+  Optimize(DecomposableFunctionType& function,
+           MatType& iterate,
+           CallbackTypes&&... callbacks)
   {
-    return optimizer.Optimize(function, iterate);
+    return optimizer.Optimize<DecomposableFunctionType, MatType, GradType,
+        CallbackTypes...>(function, iterate, callbacks...);
+  }
+
+  //! Forward the MatType as GradType.
+  template<typename DecomposableFunctionType,
+           typename MatType,
+           typename... CallbackTypes>
+  typename MatType::elem_type Optimize(DecomposableFunctionType& function,
+                                       MatType& iterate,
+                                       CallbackTypes&&... callbacks)
+  {
+    return Optimize<DecomposableFunctionType, MatType, MatType,
+        CallbackTypes...>(function, iterate,
+        std::forward<CallbackTypes>(callbacks)...);
   }
 
   //! Get the step size.

diff --git a/inst/include/ensmallen_bits/ada_grad/ada_grad_update.hpp b/inst/include/ensmallen_bits/ada_grad/ada_grad_update.hpp
@@ -49,49 +49,67 @@ class AdaGradUpdate
     // Nothing to do.
   }
 
-  /**
-   * The Initialize method is called by SGD Optimizer method before the start of
-   * the iteration update process. In AdaGrad update policy, squared
-   * gradient matrix is initialized to the zeros matrix with the same size as
-   * gradient matrix (see ens::SGD<>).
-   *
-   * @param rows Number of rows in the gradient matrix.
-   * @param cols Number of columns in the gradient matrix.
-   */
-  void Initialize(const size_t rows, const size_t cols)
-  {
-    // Initialize an empty matrix for sum of squares of parameter gradient.
-    squaredGradient = arma::zeros<arma::mat>(rows, cols);
-  }
+  //! Get the value used to initialise the squared gradient parameter.
+  double Epsilon() const { return epsilon; }
+  //! Modify the value used to initialise the squared gradient parameter.
+  double& Epsilon() { return epsilon; }
 
   /**
-   * Update step for SGD. The AdaGrad update adapts the learning rate by
-   * performing larger updates for more sparse parameters and smaller updates
-   * for less sparse parameters .
-   *
-   * @param iterate Parameters that minimize the function.
-   * @param stepSize Step size to be used for the given iteration.
-   * @param gradient The gradient matrix.
+   * The UpdatePolicyType policy classes must contain an internal 'Policy'
+   * template class with two template arguments: MatType and GradType.  This is
+   * instantiated at the start of the optimization, and holds parameters
+   * specific to an individual optimization.
    */
-  void Update(arma::mat& iterate,
-              const double stepSize,
-              const arma::mat& gradient)
+  template<typename MatType, typename GradType>
+  class Policy
   {
-    squaredGradient += (gradient % gradient);
-    iterate -= (stepSize * gradient) / (arma::sqrt(squaredGradient) + epsilon);
-  }
+   public:
+    /**
+     * This constructor is called by the SGD optimizer before the start of the
+     * iteration update process. In AdaGrad update policy, squared gradient
+     * matrix is initialized to the zeros matrix with the same size as gradient
+     * matrix (see ens::SGD<>).
+     *
+     * @param parent Instantiated parent class.
+     * @param rows Number of rows in the gradient matrix.
+     * @param cols Number of columns in the gradient matrix.
+     */
+    Policy(AdaGradUpdate& parent, const size_t rows, const size_t cols) :
+        parent(parent),
+        squaredGradient(rows, cols)
+    {
+      // Initialize an empty matrix for sum of squares of parameter gradient.
+      squaredGradient.zeros();
+    }
 
-  //! Get the value used to initialise the squared gradient parameter.
-  double Epsilon() const { return epsilon; }
-  //! Modify the value used to initialise the squared gradient parameter.
-  double& Epsilon() { return epsilon; }
+    /**
+     * Update step for SGD. The AdaGrad update adapts the learning rate by
+     * performing larger updates for more sparse parameters and smaller updates
+     * for less sparse parameters.
+     *
+     * @param iterate Parameters that minimize the function.
+     * @param stepSize Step size to be used for the given iteration.
+     * @param gradient The gradient matrix.
+     */
+    void Update(MatType& iterate,
+                const double stepSize,
+                const GradType& gradient)
+    {
+      squaredGradient += (gradient % gradient);
+      iterate -= (stepSize * gradient) / (arma::sqrt(squaredGradient) +
+          parent.epsilon);
+    }
+
+   private:
+    // Instantiated parent class.
+    AdaGradUpdate& parent;
+    // The squared gradient matrix.
+    GradType squaredGradient;
+  };
 
  private:
   // The epsilon value used to initialise the squared gradient parameter.
   double epsilon;
-
-  // The squared gradient matrix.
-  arma::mat squaredGradient;
 };
 
 } // namespace ens