Stan  2.10.0
probability, sampling & optimization
util.hpp
Go to the documentation of this file.
1 #ifndef STAN_MODEL_UTIL_HPP
2 #define STAN_MODEL_UTIL_HPP
3 
5 #include <stan/math/fwd/scal/fun/square.hpp>
6 #include <stan/math/fwd/core.hpp>
7 #include <stan/math/prim/mat/fun/Eigen.hpp>
8 #include <stan/math/rev/mat/fun/grad.hpp>
9 #include <stan/math/rev/core.hpp>
10 #include <stan/math/mix/mat/functor/derivative.hpp>
11 #include <stan/math/mix/mat/functor/grad_hessian.hpp>
12 #include <stan/math/mix/mat/functor/grad_tr_mat_times_hessian.hpp>
13 #include <stan/math/rev/mat/functor/gradient.hpp>
14 #include <stan/math/fwd/mat/functor/gradient.hpp>
15 #include <stan/math/mix/mat/functor/gradient_dot_vector.hpp>
16 #include <stan/math/mix/mat/functor/hessian.hpp>
17 #include <stan/math/mix/mat/functor/hessian_times_vector.hpp>
18 #include <stan/math/fwd/mat/functor/jacobian.hpp>
19 #include <stan/math/rev/mat/functor/jacobian.hpp>
20 #include <stan/math/mix/mat/functor/partial_derivative.hpp>
21 #include <cmath>
22 #include <iomanip>
23 #include <iostream>
24 #include <string>
25 #include <vector>
26 
27 namespace stan {
28 
29  namespace model {
30 
52  template <bool jacobian_adjust_transform, class M>
53  double log_prob_propto(const M& model,
54  std::vector<double>& params_r,
55  std::vector<int>& params_i,
56  std::ostream* msgs = 0) {
57  using stan::math::var;
58  using std::vector;
59  vector<var> ad_params_r;
60  for (size_t i = 0; i < model.num_params_r(); ++i)
61  ad_params_r.push_back(params_r[i]);
62  try {
63  double lp
64  = model.template log_prob<true, jacobian_adjust_transform>
65  (ad_params_r, params_i, msgs).val();
66  stan::math::recover_memory();
67  return lp;
68  } catch (std::exception &ex) {
69  stan::math::recover_memory();
70  throw;
71  }
72  }
73 
91  template <bool propto, bool jacobian_adjust_transform, class M>
92  double log_prob_grad(const M& model,
93  std::vector<double>& params_r,
94  std::vector<int>& params_i,
95  std::vector<double>& gradient,
96  std::ostream* msgs = 0) {
97  using std::vector;
98  using stan::math::var;
99  double lp;
100  try {
101  vector<var> ad_params_r(params_r.size());
102  for (size_t i = 0; i < model.num_params_r(); ++i) {
103  stan::math::var var_i(params_r[i]);
104  ad_params_r[i] = var_i;
105  }
106  var adLogProb
107  = model.template log_prob<propto, jacobian_adjust_transform>
108  (ad_params_r, params_i, msgs);
109  lp = adLogProb.val();
110  adLogProb.grad(ad_params_r, gradient);
111  } catch (const std::exception &ex) {
112  stan::math::recover_memory();
113  throw;
114  }
115  stan::math::recover_memory();
116  return lp;
117  }
118 
139  template <bool jacobian_adjust_transform, class M>
140  double log_prob_propto(const M& model,
141  Eigen::VectorXd& params_r,
142  std::ostream* msgs = 0) {
143  using stan::math::var;
144  using std::vector;
145  vector<int> params_i(0);
146 
147  double lp;
148  try {
149  vector<var> ad_params_r;
150  for (size_t i = 0; i < model.num_params_r(); ++i)
151  ad_params_r.push_back(params_r(i));
152  lp
153  = model
154  .template log_prob<true,
155  jacobian_adjust_transform>(ad_params_r,
156  params_i, msgs)
157  .val();
158  } catch (std::exception &ex) {
159  stan::math::recover_memory();
160  throw;
161  }
162  stan::math::recover_memory();
163  return lp;
164  }
165 
182  template <bool propto, bool jacobian_adjust_transform, class M>
183  double log_prob_grad(const M& model,
184  Eigen::VectorXd& params_r,
185  Eigen::VectorXd& gradient,
186  std::ostream* msgs = 0) {
187  using std::vector;
188  using stan::math::var;
189 
190  Eigen::Matrix<var, Eigen::Dynamic, 1> ad_params_r(params_r.size());
191  for (size_t i = 0; i < model.num_params_r(); ++i) {
192  stan::math::var var_i(params_r[i]);
193  ad_params_r[i] = var_i;
194  }
195  try {
196  var adLogProb
197  = model
198  .template log_prob<propto,
199  jacobian_adjust_transform>(ad_params_r, msgs);
200  double val = adLogProb.val();
201  stan::math::grad(adLogProb, ad_params_r, gradient);
202  return val;
203  } catch (std::exception &ex) {
204  stan::math::recover_memory();
205  throw;
206  }
207  }
208 
227  template <bool propto, bool jacobian_adjust_transform, class M>
228  void finite_diff_grad(const M& model,
229  std::vector<double>& params_r,
230  std::vector<int>& params_i,
231  std::vector<double>& grad,
232  double epsilon = 1e-6,
233  std::ostream* msgs = 0) {
234  std::vector<double> perturbed(params_r);
235  grad.resize(params_r.size());
236  for (size_t k = 0; k < params_r.size(); k++) {
237  perturbed[k] += epsilon;
238  double logp_plus
239  = model
240  .template log_prob<propto,
241  jacobian_adjust_transform>(perturbed, params_i,
242  msgs);
243  perturbed[k] = params_r[k] - epsilon;
244  double logp_minus
245  = model
246  .template log_prob<propto,
247  jacobian_adjust_transform>(perturbed, params_i,
248  msgs);
249  double gradest = (logp_plus - logp_minus) / (2*epsilon);
250  grad[k] = gradest;
251  perturbed[k] = params_r[k];
252  }
253  }
254 
255 
277  template <bool propto, bool jacobian_adjust_transform, class M>
278  int test_gradients(const M& model,
279  std::vector<double>& params_r,
280  std::vector<int>& params_i,
281  double epsilon,
282  double error,
284  std::stringstream msg;
285  std::vector<double> grad;
286  double lp
287  = log_prob_grad<propto, jacobian_adjust_transform>(model,
288  params_r,
289  params_i,
290  grad,
291  &msg);
292  if (msg.str().length() > 0)
293  writer(msg.str());
294 
295  std::vector<double> grad_fd;
296  finite_diff_grad<false,
297  true,
298  M>(model,
299  params_r, params_i,
300  grad_fd, epsilon,
301  &msg);
302  if (msg.str().length() > 0)
303  writer(msg.str());
304 
305  int num_failed = 0;
306 
307  msg.str("");
308  msg << " Log probability=" << lp;
309 
310  writer();
311  writer(msg.str());
312  writer();
313 
314  msg.str("");
315  msg << std::setw(10) << "param idx"
316  << std::setw(16) << "value"
317  << std::setw(16) << "model"
318  << std::setw(16) << "finite diff"
319  << std::setw(16) << "error";
320 
321  writer(msg.str());
322 
323  for (size_t k = 0; k < params_r.size(); k++) {
324  msg.str("");
325  msg << std::setw(10) << k
326  << std::setw(16) << params_r[k]
327  << std::setw(16) << grad[k]
328  << std::setw(16) << grad_fd[k]
329  << std::setw(16) << (grad[k] - grad_fd[k]);
330  writer(msg.str());
331  if (std::fabs(grad[k] - grad_fd[k]) > error)
332  num_failed++;
333  }
334  return num_failed;
335  }
336 
337 
360  template <bool propto, bool jacobian_adjust_transform, class M>
361  double grad_hess_log_prob(const M& model,
362  std::vector<double>& params_r,
363  std::vector<int>& params_i,
364  std::vector<double>& gradient,
365  std::vector<double>& hessian,
366  std::ostream* msgs = 0) {
367  const double epsilon = 1e-3;
368  const int order = 4;
369  const double perturbations[order]
370  = {-2*epsilon, -1*epsilon, epsilon, 2*epsilon};
371  const double coefficients[order]
372  = { 1.0 / 12.0,
373  -2.0 / 3.0,
374  2.0 / 3.0,
375  -1.0 / 12.0 };
376 
377  double result
378  = log_prob_grad<propto, jacobian_adjust_transform>(model,
379  params_r,
380  params_i,
381  gradient,
382  msgs);
383  hessian.assign(params_r.size() * params_r.size(), 0);
384  std::vector<double> temp_grad(params_r.size());
385  std::vector<double> perturbed_params(params_r.begin(), params_r.end());
386  for (size_t d = 0; d < params_r.size(); d++) {
387  double* row = &hessian[d*params_r.size()];
388  for (int i = 0; i < order; i++) {
389  perturbed_params[d] = params_r[d] + perturbations[i];
390  log_prob_grad<propto, jacobian_adjust_transform>(model,
391  perturbed_params,
392  params_i,
393  temp_grad);
394  for (size_t dd = 0; dd < params_r.size(); dd++) {
395  row[dd] += 0.5 * coefficients[i] * temp_grad[dd] / epsilon;
396  hessian[d + dd*params_r.size()]
397  += 0.5 * coefficients[i] * temp_grad[dd] / epsilon;
398  }
399  }
400  perturbed_params[d] = params_r[d];
401  }
402  return result;
403  }
404 
405  // Interface for automatic differentiation of models
406 
407  template <class M>
409  const M& model;
410  std::ostream* o;
411 
412  model_functional(const M& m, std::ostream* out)
413  : model(m), o(out) {}
414 
415  template <typename T>
416  T operator()(Eigen::Matrix<T, Eigen::Dynamic, 1>& x) const {
417  return model.template log_prob<true, true, T>(x, o);
418  }
419  };
420 
421  template <class M>
422  void gradient(const M& model,
423  const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
424  double& f,
425  Eigen::Matrix<double, Eigen::Dynamic, 1>& grad_f,
426  std::ostream* msgs = 0) {
427  stan::math::gradient(model_functional<M>(model, msgs), x, f, grad_f);
428  }
429 
430  template <class M>
431  void gradient(const M& model,
432  const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
433  double& f,
434  Eigen::Matrix<double, Eigen::Dynamic, 1>& grad_f,
436  std::stringstream ss;
437  stan::math::gradient(model_functional<M>(model, &ss), x, f, grad_f);
438  // FIXME(DL): remove blank line at the end of the gradient call
439  // this assumes the last character is a newline
440  // this matches the v2.8.0 behavior
441  std::string msg = ss.str();
442  if (msg.length() > 1)
443  writer(msg.substr(0, msg.length() - 1));
444  }
445 
446  template <class M>
447  void hessian(const M& model,
448  const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
449  double& f,
450  Eigen::Matrix<double, Eigen::Dynamic, 1>& grad_f,
451  Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>& hess_f,
452  std::ostream* msgs = 0) {
454  x, f, grad_f, hess_f);
455  }
456 
457  template <class M>
458  void gradient_dot_vector(const M& model,
459  const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
460  const Eigen::Matrix<double, Eigen::Dynamic, 1>& v,
461  double& f,
462  double& grad_f_dot_v,
463  std::ostream* msgs = 0) {
465  x, v, f, grad_f_dot_v);
466  }
467 
468  template <class M>
469  void hessian_times_vector(const M& model,
470  const Eigen::Matrix<double, Eigen::Dynamic, 1>& x,
471  const Eigen::Matrix<double, Eigen::Dynamic, 1>& v,
472  double& f,
473  Eigen::Matrix<double, Eigen::Dynamic, 1>&
474  hess_f_dot_v,
475  std::ostream* msgs = 0) {
477  x, v, f, hess_f_dot_v);
478  }
479 
480  template <class M>
481  void grad_tr_mat_times_hessian(const M& model,
482  const Eigen::Matrix <double, Eigen::Dynamic, 1>& x,
483  const Eigen::Matrix <double, Eigen::Dynamic, Eigen::Dynamic>& X,
484  Eigen::Matrix<double, Eigen::Dynamic, 1>&
485  grad_tr_X_hess_f,
486  std::ostream* msgs = 0) {
488  x, X, grad_tr_X_hess_f);
489  }
490 
491  }
492 }
493 #endif
double log_prob_grad(const M &model, std::vector< double > &params_r, std::vector< int > &params_i, std::vector< double > &gradient, std::ostream *msgs=0)
Compute the gradient using reverse-mode automatic differentiation, writing the result into the specif...
Definition: util.hpp:92
Probability, optimization and sampling library.
void hessian(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &grad_f, Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic > &hess_f, std::ostream *msgs=0)
Definition: util.hpp:447
void gradient(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &grad_f, std::ostream *msgs=0)
Definition: util.hpp:422
int test_gradients(const M &model, std::vector< double > &params_r, std::vector< int > &params_i, double epsilon, double error, stan::interface_callbacks::writer::base_writer &writer)
Test the log_prob_grad() function's ability to produce accurate gradients using finite differences...
Definition: util.hpp:278
void hessian_times_vector(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &v, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &hess_f_dot_v, std::ostream *msgs=0)
Definition: util.hpp:469
void finite_diff_grad(const M &model, std::vector< double > &params_r, std::vector< int > &params_i, std::vector< double > &grad, double epsilon=1e-6, std::ostream *msgs=0)
Compute the gradient using finite differences for the specified parameters, writing the result into t...
Definition: util.hpp:228
model_functional(const M &m, std::ostream *out)
Definition: util.hpp:412
base_writer is an abstract base class defining the interface for Stan writer callbacks.
Definition: base_writer.hpp:20
void gradient_dot_vector(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &v, double &f, double &grad_f_dot_v, std::ostream *msgs=0)
Definition: util.hpp:458
T operator()(Eigen::Matrix< T, Eigen::Dynamic, 1 > &x) const
Definition: util.hpp:416
void grad_tr_mat_times_hessian(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, const Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic > &X, Eigen::Matrix< double, Eigen::Dynamic, 1 > &grad_tr_X_hess_f, std::ostream *msgs=0)
Definition: util.hpp:481
double grad_hess_log_prob(const M &model, std::vector< double > &params_r, std::vector< int > &params_i, std::vector< double > &gradient, std::vector< double > &hessian, std::ostream *msgs=0)
Evaluate the log-probability, its gradient, and its Hessian at params_r.
Definition: util.hpp:361
void gradient(const M &model, const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &grad_f, stan::interface_callbacks::writer::base_writer &writer)
Definition: util.hpp:431
double log_prob_propto(const M &model, std::vector< double > &params_r, std::vector< int > &params_i, std::ostream *msgs=0)
Helper function to calculate log probability for double scalars up to a proportion.
Definition: util.hpp:53

     [ Stan Home Page ] © 2011–2016, Stan Development Team.