// Copyright (C) 2010 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_OPTIMIZATION_OCA_ABsTRACT_Hh_
#ifdef DLIB_OPTIMIZATION_OCA_ABsTRACT_Hh_
#include <chrono>
// ----------------------------------------------------------------------------------------
namespace dlib
{
template <typename matrix_type>
class oca_problem
{
/*!
REQUIREMENTS ON matrix_type
- matrix_type == a dlib::matrix capable of storing column vectors
WHAT THIS OBJECT REPRESENTS
This object is the interface used to define the optimization
problems solved by the oca optimizer defined later in this file.
OCA solves optimization problems with the following form:
Minimize: f(w) == 0.5*length_squared(w) + C*R(w)
Where R(w) is a user-supplied convex function and C > 0. Optionally,
there can also be non-negativity constraints on some or all of the
elements of w.
Or it can alternatively solve:
Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
Where prior is a user supplied vector and R(w) has the same
interpretation as above.
Or it can use the elastic net regularizer:
Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w)
Where lasso_lambda is a number in the range [0, 1) and controls
trade-off between doing L1 and L2 regularization. R(w) has the same
interpretation as above.
Note that the stopping condition must be provided by the user
in the form of the optimization_status() function.
!*/
public:
typedef typename matrix_type::type scalar_type;
virtual ~oca_problem() {}
virtual bool risk_has_lower_bound (
scalar_type& lower_bound
) const { return false; }
/*!
ensures
- if (R(w) >= a constant for all values of w) then
- returns true
- #lower_bound == the constant that lower bounds R(w)
- else
- returns false
!*/
virtual bool optimization_status (
scalar_type current_objective_value,
scalar_type current_error_gap,
scalar_type current_risk_value,
scalar_type current_risk_gap,
unsigned long num_cutting_planes,
unsigned long num_iterations
) const = 0;
/*!
requires
- This function is called by the OCA optimizer each iteration.
- current_objective_value == the current value of the objective function f(w)
- current_error_gap == The bound on how much lower the objective function
can drop before we reach the optimal point. At the optimal solution the
error gap is equal to 0.
- current_risk_value == the current value of the R(w) term of the objective function.
- current_risk_gap == the bound on how much lower the risk term can go. At the optimal
solution the risk gap is zero.
- num_cutting_planes == the number of cutting planes the algorithm is currently using.
- num_iterations == A count of the total number of iterations that have executed
since we started running the optimization.
ensures
- If it is appropriate to terminate the optimization then this function returns true
and false otherwise.
!*/
virtual scalar_type get_c (
) const = 0;
/*!
ensures
- returns the C parameter
!*/
virtual long get_num_dimensions (
) const = 0;
/*!
ensures
- returns the number of free variables in this optimization problem
!*/
virtual void get_risk (
matrix_type& current_solution,
scalar_type& risk_value,
matrix_type& risk_subgradient
) const = 0;
/*!
requires
- is_col_vector(current_solution) == true
- current_solution.size() == get_num_dimensions()
ensures
- #current_solution will be set to one of the following:
- current_solution (i.e. it won't be modified at all)
- The result of a line search passing through current_solution.
- #risk_value == R(#current_solution)
- #risk_subgradient == an element of the subgradient of R() at the
point #current_solution
- Note that #risk_value and #risk_subgradient are NOT multiplied by get_c()
!*/
};
// ----------------------------------------------------------------------------------------
class oca
{
/*!
INITIAL VALUE
- get_subproblem_epsilon() == 1e-2
- get_subproblem_max_iterations() == 50000
- get_inactive_plane_threshold() == 20
- get_max_runtime() == std::chrono::hours(24*356*290)
(i.e. 290 years, so basically forever)
WHAT THIS OBJECT REPRESENTS
This object is a tool for solving the optimization problem defined above
by the oca_problem abstract class.
For reference, OCA solves optimization problems with the following form:
Minimize: f(w) == 0.5*length_squared(w) + C*R(w)
Where R(w) is a user-supplied convex function and C > 0. Optionally,
this object can also add non-negativity constraints to some or all
of the elements of w.
Or it can alternatively solve:
Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
Where prior is a user supplied vector and R(w) has the same
interpretation as above.
Or it can use the elastic net regularizer:
Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w)
Where lasso_lambda is a number in the range [0, 1) and controls
trade-off between doing L1 and L2 regularization. R(w) has the same
interpretation as above.
For a detailed discussion you should consult the following papers
from the Journal of Machine Learning Research:
Optimized Cutting Plane Algorithm for Large-Scale Risk Minimization
Vojtech Franc, Soren Sonnenburg; 10(Oct):2157--2192, 2009.
Bundle Methods for Regularized Risk Minimization
Choon Hui Teo, S.V.N. Vishwanthan, Alex J. Smola, Quoc V. Le; 11(Jan):311-365, 2010.
!*/
public:
oca (
);
/*!
ensures
- this object is properly initialized
!*/
template <
typename matrix_type
>
typename matrix_type::type operator() (
const oca_problem<matrix_type>& problem,
matrix_type& w,
unsigned long num_nonnegative = 0,
unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max()
) const;
/*!
requires
- problem.get_c() > 0
- problem.get_num_dimensions() > 0
ensures
- solves the given oca problem and stores the solution in #w. In particular,
this function solves:
Minimize: f(w) == 0.5*length_squared(w) + C*R(w)
- The optimization algorithm runs until problem.optimization_status()
indicates it is time to stop.
- returns the objective value at the solution #w
- if (num_nonnegative != 0) then
- Adds the constraint that #w(i) >= 0 for all i < num_nonnegative.
That is, the first num_nonnegative elements of #w will always be
non-negative. This includes the copies of w passed to get_risk()
in the form of the current_solution vector as well as the final
output of this function.
- if (force_weight_to_1 < problem.get_num_dimensions()) then
- The optimizer enforces the following constraints:
- #w(force_weight_to_1) == 1
- for all i > force_weight_to_1:
- #w(i) == 0
- That is, the element in the weight vector at the index indicated
by force_weight_to_1 will have a value of 1 upon completion of
this function, while all subsequent elements of w will have
values of 0.
!*/
template <
typename matrix_type
>
typename matrix_type::type operator() (
const oca_problem<matrix_type>& problem,
matrix_type& w,
const matrix_type& prior
) const;
/*!
requires
- problem.get_c() > 0
- problem.get_num_dimensions() > 0
- is_col_vector(prior) == true
- prior.size() == problem.get_num_dimensions()
ensures
- solves the given oca problem and stores the solution in #w.
- In this mode, we solve a version of the problem with a different
regularizer. In particular, this function solves:
Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
- The optimization algorithm runs until problem.optimization_status()
indicates it is time to stop.
- returns the objective value at the solution #w
!*/
template <
typename matrix_type
>
typename matrix_type::type solve_with_elastic_net (
const oca_problem<matrix_type>& problem,
matrix_type& w,
scalar_type lasso_lambda,
unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max()
) const;
/*!
requires
- problem.get_c() > 0
- problem.get_num_dimensions() > 0
- 0 <= lasso_lambda < 1
ensures
- Solves the given oca problem and stores the solution in #w, but uses an
elastic net regularizer instead of the normal L2 regularizer. In
particular, this function solves:
Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w)
- The optimization algorithm runs until problem.optimization_status()
indicates it is time to stop.
- returns the objective value at the solution #w
- if (force_weight_to_1 < problem.get_num_dimensions()) then
- The optimizer enforces the following constraints:
- #w(force_weight_to_1) == 1
- for all i > force_weight_to_1:
- #w(i) == 0
- That is, the element in the weight vector at the index indicated
by force_weight_to_1 will have a value of 1 upon completion of
this function, while all subsequent elements of w will have
values of 0.
!*/
void set_subproblem_epsilon (
double eps
);
/*!
requires
- eps > 0
ensures
- #get_subproblem_epsilon() == eps
!*/
double get_subproblem_epsilon (
) const;
/*!
ensures
- returns the accuracy used in solving the quadratic programming
subproblem that is part of the overall OCA algorithm.
!*/
void set_subproblem_max_iterations (
unsigned long sub_max_iter
);
/*!
requires
- sub_max_iter > 0
ensures
- #get_subproblem_max_iterations() == sub_max_iter
!*/
unsigned long get_subproblem_max_iterations (
) const;
/*!
ensures
- returns the maximum number of iterations this object will perform
while attempting to solve each quadratic programming subproblem.
!*/
void set_inactive_plane_threshold (
unsigned long inactive_thresh
);
/*!
requires
- inactive_thresh > 0
ensures
- #get_inactive_plane_threshold() == inactive_thresh
!*/
unsigned long get_inactive_plane_threshold (
) const;
/*!
ensures
- As OCA runs it builds up a set of cutting planes. Typically
cutting planes become inactive after a certain point and can then
be removed. This function returns the number of iterations of
inactivity required before a cutting plane is removed.
!*/
void set_max_runtime (
const std::chrono::nanoseconds& max_runtime
) const;
/*!
ensures
- #get_max_runtime() == max_runtime
!*/
std::chrono::nanoseconds get_max_runtime (
) const;
/*!
ensures
- returns the maximum amount of time we will let the solver run before
making it terminate.
!*/
};
}
// ----------------------------------------------------------------------------------------
#endif // DLIB_OPTIMIZATION_OCA_ABsTRACT_Hh_