1 #ifndef STAN_OPTIMIZATION_BFGS_HPP
2 #define STAN_OPTIMIZATION_BFGS_HPP
4 #include <stan/math/prim/mat/fun/Eigen.hpp>
5 #include <stan/math/prim/mat/meta/index_type.hpp>
10 #include <boost/math/special_functions/fpclassify.hpp>
19 namespace optimization {
31 template<
typename Scalar =
double>
54 template<
typename Scalar =
double>
68 template<
typename FunctorType,
typename QNUpdateType,
69 typename Scalar = double,
int DimAtCompile = Eigen::Dynamic>
72 typedef Eigen::Matrix<Scalar, DimAtCompile, 1>
VectorT;
73 typedef Eigen::Matrix<Scalar, DimAtCompile, DimAtCompile>
HessianT;
103 return -_pk.dot(_gk) / std::max(std::fabs(_fk), _conv_opts.
fScale);
106 return std::fabs(_fk_1 - _fk) / std::max(std::fabs(_fk_1),
107 std::max(std::fabs(_fk),
120 return std::string(
"Successful step completed");
122 return std::string(
"Convergence detected: absolute change "
123 "in objective function was below tolerance");
125 return std::string(
"Convergence detected: relative change "
126 "in objective function was below tolerance");
128 return std::string(
"Convergence detected: "
129 "gradient norm is below tolerance");
131 return std::string(
"Convergence detected: relative "
132 "gradient magnitude is below tolerance");
134 return std::string(
"Convergence detected: "
135 "absolute parameter change was below tolerance");
137 return std::string(
"Maximum number of iterations hit, "
138 "may not be at an optima");
140 return std::string(
"Line search failed to achieve a sufficient "
141 "decrease, no more progress can be made");
143 return std::string(
"Unknown termination code");
152 ret =
_func(_xk, _fk, _gk);
154 throw std::runtime_error(
"Error evaluating initial BFGS point.");
163 Scalar gradNorm, stepNorm;
181 _pk.noalias() = -
_gk;
185 if (_itNum > 1 && resetB != 2) {
187 _alpha0 = _alpha = std::min(1.0,
196 _alpha0 = _alpha = _ls_opts.
alpha0;
203 _ls_opts.
c1, _ls_opts.
c2,
215 _note +=
"LS failed, Hessian reset";
224 std::swap(_fk, _fk_1);
229 sk.noalias() = _xk -
_xk_1;
230 yk.noalias() = _gk -
_gk_1;
232 gradNorm = _gk.norm();
233 stepNorm = sk.norm();
239 Scalar B0fact = _qn.update(yk, sk,
true);
241 _alphak_1 = _alpha*B0fact;
247 _qn.search_direction(_pk, _gk);
250 if (std::fabs(_fk_1 - _fk) < _conv_opts.
tolAbsF) {
253 }
else if (gradNorm < _conv_opts.
tolAbsGrad) {
255 }
else if (stepNorm < _conv_opts.
tolAbsX) {
257 }
else if (_itNum >= _conv_opts.
maxIts) {
261 * std::numeric_limits<Scalar>::epsilon()) {
266 * std::numeric_limits<Scalar>::epsilon()) {
280 while (!(retcode =
step()))
291 std::vector<int> _params_i;
293 std::vector<double> _x, _g;
298 const std::vector<int>& params_i,
300 : _model(model), _params_i(params_i), _msgs(msgs), _fevals(0) {}
302 size_t fevals()
const {
return _fevals; }
303 int operator()(
const Eigen::Matrix<double, Eigen::Dynamic, 1> &x,
306 using Eigen::Dynamic;
307 using stan::math::index_type;
309 typedef typename index_type<Matrix<double, Dynamic, 1> >::type idx_t;
312 for (idx_t i = 0; i < x.size(); i++)
316 f = - log_prob_propto<false>(_model, _x, _params_i, _msgs);
317 }
catch (
const std::exception& e) {
319 (*_msgs) << e.what() << std::endl;
323 if (boost::math::isfinite(f)) {
327 *_msgs <<
"Error evaluating model log probability: "
328 "Non-finite function evaluation." << std::endl;
332 int operator()(
const Eigen::Matrix<double, Eigen::Dynamic, 1> &x,
334 Eigen::Matrix<double, Eigen::Dynamic, 1> &g) {
336 using Eigen::Dynamic;
337 using stan::math::index_type;
339 typedef typename index_type<Matrix<double, Dynamic, 1> >::type idx_t;
342 for (idx_t i = 0; i < x.size(); i++)
348 f = - log_prob_grad<true, false>(_model, _x, _params_i, _g, _msgs);
349 }
catch (
const std::exception& e) {
351 (*_msgs) << e.what() << std::endl;
356 for (
size_t i = 0; i < _g.size(); i++) {
357 if (!boost::math::isfinite(_g[i])) {
359 *_msgs <<
"Error evaluating model log probability: "
360 "Non-finite gradient." << std::endl;
366 if (boost::math::isfinite(f)) {
370 *_msgs <<
"Error evaluating model log probability: "
371 <<
"Non-finite function evaluation."
376 int df(
const Eigen::Matrix<double, Eigen::Dynamic, 1> &x,
377 Eigen::Matrix<double, Eigen:: Dynamic, 1> &g) {
379 return (*
this)(x, f, g);
383 template<
typename M,
typename QNUpdateType,
typename Scalar = double,
384 int DimAtCompile = Eigen::Dynamic>
387 Scalar, DimAtCompile> {
395 typedef typename stan::math::index_type<vector_t>::type
idx_t;
398 const std::vector<double>&
params_r,
399 const std::vector<int>& params_i,
400 std::ostream* msgs = 0)
402 _adaptor(model, params_i, msgs) {
407 Eigen::Matrix<double, Eigen::Dynamic, 1> x;
408 x.resize(params_r.size());
409 for (
size_t i = 0; i < params_r.size(); i++)
417 void grad(std::vector<double>& g) {
418 const vector_t &cg(this->
curr_g());
420 for (idx_t i = 0; i < cg.size(); i++)
424 const vector_t &cx(this->
curr_x());
426 for (idx_t i = 0; i < cx.size(); i++)
const size_t iter_num() const
void params_r(std::vector< double > &x)
QNUpdateType & get_qnupdate()
const Scalar & alpha0() const
ModelAdaptor(M &model, const std::vector< int > ¶ms_i, std::ostream *msgs)
double log_prob_grad(const M &model, std::vector< double > ¶ms_r, std::vector< int > ¶ms_i, std::vector< double > &gradient, std::ostream *msgs=0)
Compute the gradient using reverse-mode automatic differentiation, writing the result into the specif...
BFGSMinimizer< ModelAdaptor< M >, QNUpdateType, Scalar, DimAtCompile > BFGSBase
const VectorT & curr_x() const
ConvergenceOptions< Scalar > _conv_opts
const VectorT & prev_g() const
Probability, optimization and sampling library.
BFGSMinimizer(FunctorType &f)
int WolfeLineSearch(FunctorType &func, Scalar &alpha, XType &x1, Scalar &f1, XType &gradx1, const XType &p, const XType &x0, const Scalar &f0, const XType &gradx0, const Scalar &c1, const Scalar &c2, const Scalar &minAlpha)
Perform a line search which finds an approximate solution to: satisfying the strong Wolfe conditions...
stan::math::index_type< vector_t >::type idx_t
const Scalar & prev_f() const
int minimize(VectorT &x0)
const std::string & note() const
std::string get_code_string(int retCode)
void grad(std::vector< double > &g)
const VectorT & prev_p() const
void initialize(const VectorT &x0)
int df(const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, Eigen::Matrix< double, Eigen::Dynamic, 1 > &g)
Eigen::Matrix< Scalar, DimAtCompile, 1 > VectorT
Scalar rel_obj_decrease() const
BFGSBase::VectorT vector_t
const QNUpdateType & get_qnupdate() const
Scalar prev_step_size() const
int operator()(const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &g)
const Scalar & alpha() const
Eigen::Matrix< Scalar, DimAtCompile, DimAtCompile > HessianT
Scalar CubicInterp(const Scalar &df0, const Scalar &x1, const Scalar &f1, const Scalar &df1, const Scalar &loX, const Scalar &hiX)
Find the minima in an interval [loX, hiX] of a cubic function which interpolates the points...
LSOptions< Scalar > _ls_opts
const VectorT & prev_x() const
BFGSLineSearch(M &model, const std::vector< double > ¶ms_r, const std::vector< int > ¶ms_i, std::ostream *msgs=0)
const VectorT & curr_p() const
const Scalar & curr_f() const
void initialize(const std::vector< double > ¶ms_r)
int operator()(const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f)
Scalar rel_grad_norm() const
const VectorT & curr_g() const
double log_prob_propto(const M &model, std::vector< double > ¶ms_r, std::vector< int > ¶ms_i, std::ostream *msgs=0)
Helper function to calculate log probability for double scalars up to a proportion.