1 #ifndef STAN_MODEL_UTIL_HPP
2 #define STAN_MODEL_UTIL_HPP
5 #include <stan/math/fwd/scal/fun/square.hpp>
6 #include <stan/math/fwd/core.hpp>
7 #include <stan/math/prim/mat/fun/Eigen.hpp>
8 #include <stan/math/rev/mat/fun/grad.hpp>
9 #include <stan/math/rev/core.hpp>
10 #include <stan/math/mix/mat/functor/derivative.hpp>
11 #include <stan/math/mix/mat/functor/grad_hessian.hpp>
12 #include <stan/math/mix/mat/functor/grad_tr_mat_times_hessian.hpp>
13 #include <stan/math/rev/mat/functor/gradient.hpp>
14 #include <stan/math/fwd/mat/functor/gradient.hpp>
15 #include <stan/math/mix/mat/functor/gradient_dot_vector.hpp>
16 #include <stan/math/mix/mat/functor/hessian.hpp>
17 #include <stan/math/mix/mat/functor/hessian_times_vector.hpp>
18 #include <stan/math/fwd/mat/functor/jacobian.hpp>
19 #include <stan/math/rev/mat/functor/jacobian.hpp>
20 #include <stan/math/mix/mat/functor/partial_derivative.hpp>
52 template <
bool jacobian_adjust_transform,
class M>
54 std::vector<double>& params_r,
55 std::vector<int>& params_i,
56 std::ostream* msgs = 0) {
57 using stan::math::var;
59 vector<var> ad_params_r;
60 for (
size_t i = 0; i < model.num_params_r(); ++i)
61 ad_params_r.push_back(params_r[i]);
64 = model.template log_prob<true, jacobian_adjust_transform>
65 (ad_params_r, params_i, msgs).val();
66 stan::math::recover_memory();
68 }
catch (std::exception &ex) {
69 stan::math::recover_memory();
91 template <
bool propto,
bool jacobian_adjust_transform,
class M>
93 std::vector<double>& params_r,
94 std::vector<int>& params_i,
96 std::ostream* msgs = 0) {
98 using stan::math::var;
101 vector<var> ad_params_r(params_r.size());
102 for (
size_t i = 0; i < model.num_params_r(); ++i) {
103 stan::math::var var_i(params_r[i]);
104 ad_params_r[i] = var_i;
107 = model.template log_prob<propto, jacobian_adjust_transform>
108 (ad_params_r, params_i, msgs);
109 lp = adLogProb.val();
110 adLogProb.grad(ad_params_r, gradient);
111 }
catch (
const std::exception &ex) {
112 stan::math::recover_memory();
115 stan::math::recover_memory();
139 template <
bool jacobian_adjust_transform,
class M>
141 Eigen::VectorXd& params_r,
142 std::ostream* msgs = 0) {
143 using stan::math::var;
145 vector<int> params_i(0);
149 vector<var> ad_params_r;
150 for (
size_t i = 0; i < model.num_params_r(); ++i)
151 ad_params_r.push_back(params_r(i));
154 .template log_prob<
true,
155 jacobian_adjust_transform>(ad_params_r,
158 }
catch (std::exception &ex) {
159 stan::math::recover_memory();
162 stan::math::recover_memory();
182 template <
bool propto,
bool jacobian_adjust_transform,
class M>
184 Eigen::VectorXd& params_r,
186 std::ostream* msgs = 0) {
188 using stan::math::var;
190 Eigen::Matrix<var, Eigen::Dynamic, 1> ad_params_r(params_r.size());
191 for (
size_t i = 0; i < model.num_params_r(); ++i) {
192 stan::math::var var_i(params_r[i]);
193 ad_params_r[i] = var_i;
198 .template log_prob<propto,
199 jacobian_adjust_transform>(ad_params_r, msgs);
200 double val = adLogProb.val();
201 stan::math::grad(adLogProb, ad_params_r, gradient);
203 }
catch (std::exception &ex) {
204 stan::math::recover_memory();
227 template <
bool propto,
bool jacobian_adjust_transform,
class M>
229 std::vector<double>& params_r,
230 std::vector<int>& params_i,
231 std::vector<double>& grad,
232 double epsilon = 1e-6,
233 std::ostream* msgs = 0) {
234 std::vector<double> perturbed(params_r);
235 grad.resize(params_r.size());
236 for (
size_t k = 0; k < params_r.size(); k++) {
237 perturbed[k] += epsilon;
240 .template log_prob<propto,
241 jacobian_adjust_transform>(perturbed, params_i,
243 perturbed[k] = params_r[k] - epsilon;
246 .template log_prob<propto,
247 jacobian_adjust_transform>(perturbed, params_i,
249 double gradest = (logp_plus - logp_minus) / (2*epsilon);
251 perturbed[k] = params_r[k];
277 template <
bool propto,
bool jacobian_adjust_transform,
class M>
279 std::vector<double>& params_r,
280 std::vector<int>& params_i,
284 std::stringstream msg;
285 std::vector<double> grad;
287 = log_prob_grad<propto, jacobian_adjust_transform>(model,
292 if (msg.str().length() > 0)
295 std::vector<double> grad_fd;
302 if (msg.str().length() > 0)
308 msg <<
" Log probability=" << lp;
315 msg << std::setw(10) <<
"param idx"
316 << std::setw(16) <<
"value"
317 << std::setw(16) <<
"model"
318 << std::setw(16) <<
"finite diff"
319 << std::setw(16) <<
"error";
323 for (
size_t k = 0; k < params_r.size(); k++) {
325 msg << std::setw(10) << k
326 << std::setw(16) << params_r[k]
327 << std::setw(16) << grad[k]
328 << std::setw(16) << grad_fd[k]
329 << std::setw(16) << (grad[k] - grad_fd[k]);
331 if (std::fabs(grad[k] - grad_fd[k]) > error)
360 template <
bool propto,
bool jacobian_adjust_transform,
class M>
362 std::vector<double>& params_r,
363 std::vector<int>& params_i,
366 std::ostream* msgs = 0) {
367 const double epsilon = 1e-3;
369 const double perturbations[order]
370 = {-2*epsilon, -1*epsilon, epsilon, 2*epsilon};
371 const double coefficients[order]
378 = log_prob_grad<propto, jacobian_adjust_transform>(model,
383 hessian.assign(params_r.size() * params_r.size(), 0);
384 std::vector<double> temp_grad(params_r.size());
385 std::vector<double> perturbed_params(params_r.begin(), params_r.end());
386 for (
size_t d = 0; d < params_r.size(); d++) {
387 double* row = &hessian[d*params_r.size()];
388 for (
int i = 0; i < order; i++) {
389 perturbed_params[d] = params_r[d] + perturbations[i];
390 log_prob_grad<propto, jacobian_adjust_transform>(model,
394 for (
size_t dd = 0; dd < params_r.size(); dd++) {
395 row[dd] += 0.5 * coefficients[i] * temp_grad[dd] / epsilon;
396 hessian[d + dd*params_r.size()]
397 += 0.5 * coefficients[i] * temp_grad[dd] / epsilon;
400 perturbed_params[d] = params_r[d];
413 : model(m), o(out) {}
415 template <
typename T>
417 return model.template log_prob<true, true, T>(x,
o);
423 const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
425 Eigen::Matrix<double, Eigen::Dynamic, 1>& grad_f,
426 std::ostream* msgs = 0) {
432 const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
434 Eigen::Matrix<double, Eigen::Dynamic, 1>& grad_f,
436 std::stringstream ss;
441 std::string msg = ss.str();
442 if (msg.length() > 1)
443 writer(msg.substr(0, msg.length() - 1));
448 const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
450 Eigen::Matrix<double, Eigen::Dynamic, 1>& grad_f,
451 Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>& hess_f,
452 std::ostream* msgs = 0) {
454 x, f, grad_f, hess_f);
459 const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
460 const Eigen::Matrix<double, Eigen::Dynamic, 1>& v,
462 double& grad_f_dot_v,
463 std::ostream* msgs = 0) {
465 x, v, f, grad_f_dot_v);
470 const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
471 const Eigen::Matrix<double, Eigen::Dynamic, 1>& v,
473 Eigen::Matrix<double, Eigen::Dynamic, 1>&
475 std::ostream* msgs = 0) {
477 x, v, f, hess_f_dot_v);
482 const Eigen::Matrix <double, Eigen::Dynamic, 1>& x,
483 const Eigen::Matrix <double, Eigen::Dynamic, Eigen::Dynamic>& X,
484 Eigen::Matrix<double, Eigen::Dynamic, 1>&
486 std::ostream* msgs = 0) {
488 x, X, grad_tr_X_hess_f);
double log_prob_grad(const M &model, std::vector< double > ¶ms_r, std::vector< int > ¶ms_i, std::vector< double > &gradient, std::ostream *msgs=0)
Compute the gradient using reverse-mode automatic differentiation, writing the result into the specif...
Probability, optimization and sampling library.
void hessian(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &grad_f, Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic > &hess_f, std::ostream *msgs=0)
void gradient(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &grad_f, std::ostream *msgs=0)
int test_gradients(const M &model, std::vector< double > ¶ms_r, std::vector< int > ¶ms_i, double epsilon, double error, stan::interface_callbacks::writer::base_writer &writer)
Test the log_prob_grad() function's ability to produce accurate gradients using finite differences...
void hessian_times_vector(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &v, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &hess_f_dot_v, std::ostream *msgs=0)
void finite_diff_grad(const M &model, std::vector< double > ¶ms_r, std::vector< int > ¶ms_i, std::vector< double > &grad, double epsilon=1e-6, std::ostream *msgs=0)
Compute the gradient using finite differences for the specified parameters, writing the result into t...
model_functional(const M &m, std::ostream *out)
base_writer is an abstract base class defining the interface for Stan writer callbacks.
void gradient_dot_vector(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &v, double &f, double &grad_f_dot_v, std::ostream *msgs=0)
T operator()(Eigen::Matrix< T, Eigen::Dynamic, 1 > &x) const
void grad_tr_mat_times_hessian(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, const Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic > &X, Eigen::Matrix< double, Eigen::Dynamic, 1 > &grad_tr_X_hess_f, std::ostream *msgs=0)
double grad_hess_log_prob(const M &model, std::vector< double > ¶ms_r, std::vector< int > ¶ms_i, std::vector< double > &gradient, std::vector< double > &hessian, std::ostream *msgs=0)
Evaluate the log-probability, its gradient, and its Hessian at params_r.
void gradient(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &grad_f, stan::interface_callbacks::writer::base_writer &writer)
double log_prob_propto(const M &model, std::vector< double > ¶ms_r, std::vector< int > ¶ms_i, std::ostream *msgs=0)
Helper function to calculate log probability for double scalars up to a proportion.