Stan  2.10.0
probability, sampling & optimization
bfgs_linesearch.hpp
Go to the documentation of this file.
1 #ifndef STAN_OPTIMIZATION_BFGS_LINESEARCH_HPP
2 #define STAN_OPTIMIZATION_BFGS_LINESEARCH_HPP
3 
4 #include <boost/math/special_functions/fpclassify.hpp>
5 #include <algorithm>
6 #include <cmath>
7 #include <cstdlib>
8 #include <string>
9 #include <limits>
10 
11 namespace stan {
12  namespace optimization {
34  template<typename Scalar>
35  Scalar CubicInterp(const Scalar &df0,
36  const Scalar &x1, const Scalar &f1, const Scalar &df1,
37  const Scalar &loX, const Scalar &hiX) {
38  const Scalar c3((-12*f1 + 6*x1*(df0 + df1))/(x1*x1*x1));
39  const Scalar c2(-(4*df0 + 2*df1)/x1 + 6*f1/(x1*x1));
40  const Scalar &c1(df0);
41 
42  const Scalar t_s = std::sqrt(c2*c2 - 2.0*c1*c3);
43  const Scalar s1 = - (c2 + t_s)/c3;
44  const Scalar s2 = - (c2 - t_s)/c3;
45 
46  Scalar tmpF;
47  Scalar minF, minX;
48 
49  // Check value at lower bound
50  minF = loX*(loX*(loX*c3/3.0 + c2)/2.0 + c1);
51  minX = loX;
52 
53  // Check value at upper bound
54  tmpF = hiX*(hiX*(hiX*c3/3.0 + c2)/2.0 + c1);
55  if (tmpF < minF) {
56  minF = tmpF;
57  minX = hiX;
58  }
59 
60  // Check value of first root
61  if (loX < s1 && s1 < hiX) {
62  tmpF = s1*(s1*(s1*c3/3.0 + c2)/2.0 + c1);
63  if (tmpF < minF) {
64  minF = tmpF;
65  minX = s1;
66  }
67  }
68 
69  // Check value of second root
70  if (loX < s2 && s2 < hiX) {
71  tmpF = s2*(s2*(s2*c3/3.0 + c2)/2.0 + c1);
72  if (tmpF < minF) {
73  minF = tmpF;
74  minX = s2;
75  }
76  }
77 
78  return minX;
79  }
80 
102  template<typename Scalar>
103  Scalar CubicInterp(const Scalar &x0, const Scalar &f0, const Scalar &df0,
104  const Scalar &x1, const Scalar &f1, const Scalar &df1,
105  const Scalar &loX, const Scalar &hiX) {
106  return x0 + CubicInterp(df0, x1-x0, f1-f0, df1, loX-x0, hiX-x0);
107  }
108 
109  namespace {
113  template<typename FunctorType, typename Scalar, typename XType>
114  int WolfLSZoom(Scalar &alpha, XType &newX, Scalar &newF, XType &newDF,
115  FunctorType &func,
116  const XType &x, const Scalar &f, const Scalar &dfp,
117  const Scalar &c1dfp, const Scalar &c2dfp, const XType &p,
118  Scalar alo, Scalar aloF, Scalar aloDFp,
119  Scalar ahi, Scalar ahiF, Scalar ahiDFp,
120  const Scalar &min_range) {
121  Scalar d1, d2, newDFp;
122  int itNum(0);
123 
124  while (1) {
125  itNum++;
126 
127  if (std::fabs(alo-ahi) < min_range)
128  return 1;
129 
130  if (itNum%5 == 0) {
131  alpha = 0.5*(alo+ahi);
132  } else {
133  // Perform cubic interpolation to determine next point to try
134  d1 = aloDFp + ahiDFp - 3*(aloF-ahiF)/(alo-ahi);
135  d2 = std::sqrt(d1*d1 - aloDFp*ahiDFp);
136  if (ahi < alo)
137  d2 = -d2;
138  alpha = ahi
139  - (ahi - alo) * (ahiDFp + d2 - d1) / (ahiDFp - aloDFp + 2*d2);
140  if (!boost::math::isfinite(alpha) ||
141  alpha < std::min(alo, ahi) + 0.01 * std::fabs(alo - ahi) ||
142  alpha > std::max(alo, ahi) - 0.01 * std::fabs(alo - ahi))
143  alpha = 0.5 * (alo + ahi);
144  }
145 
146  newX = x + alpha * p;
147  while (func(newX, newF, newDF)) {
148  alpha = 0.5 * (alpha + std::min(alo, ahi));
149  if (std::fabs(std::min(alo, ahi) - alpha) < min_range)
150  return 1;
151  newX = x + alpha * p;
152  }
153  newDFp = newDF.dot(p);
154  if (newF > (f + alpha * c1dfp) || newF >= aloF) {
155  ahi = alpha;
156  ahiF = newF;
157  ahiDFp = newDFp;
158  } else {
159  if (std::fabs(newDFp) <= -c2dfp)
160  break;
161  if (newDFp*(ahi-alo) >= 0) {
162  ahi = alo;
163  ahiF = aloF;
164  ahiDFp = aloDFp;
165  }
166  alo = alpha;
167  aloF = newF;
168  aloDFp = newDFp;
169  }
170  }
171  return 0;
172  }
173  }
174 
221  template<typename FunctorType, typename Scalar, typename XType>
222  int WolfeLineSearch(FunctorType &func,
223  Scalar &alpha,
224  XType &x1, Scalar &f1, XType &gradx1,
225  const XType &p,
226  const XType &x0, const Scalar &f0, const XType &gradx0,
227  const Scalar &c1, const Scalar &c2,
228  const Scalar &minAlpha) {
229  const Scalar dfp(gradx0.dot(p));
230  const Scalar c1dfp(c1*dfp);
231  const Scalar c2dfp(c2*dfp);
232 
233  Scalar alpha0(minAlpha);
234  Scalar alpha1(alpha);
235 
236  Scalar prevF(f0);
237  XType prevDF(gradx0);
238  Scalar prevDFp(dfp);
239  Scalar newDFp;
240 
241  int retCode = 0, nits = 0, ret;
242 
243  while (1) {
244  x1.noalias() = x0 + alpha1 * p;
245  ret = func(x1, f1, gradx1);
246  if (ret != 0) {
247  alpha1 = 0.5 * (alpha0 + alpha1);
248  continue;
249  }
250  newDFp = gradx1.dot(p);
251  if ((f1 > f0 + alpha * c1dfp) || (f1 >= prevF && nits > 0)) {
252  retCode = WolfLSZoom(alpha, x1, f1, gradx1,
253  func,
254  x0, f0, dfp,
255  c1dfp, c2dfp, p,
256  alpha0, prevF, prevDFp,
257  alpha1, f1, newDFp,
258  1e-16);
259  break;
260  }
261  if (std::fabs(newDFp) <= -c2dfp) {
262  alpha = alpha1;
263  break;
264  }
265  if (newDFp >= 0) {
266  retCode = WolfLSZoom(alpha, x1, f1, gradx1,
267  func,
268  x0, f0, dfp,
269  c1dfp, c2dfp, p,
270  alpha1, f1, newDFp,
271  alpha0, prevF, prevDFp,
272  1e-16);
273  break;
274  }
275 
276  alpha0 = alpha1;
277  prevF = f1;
278  std::swap(prevDF, gradx1);
279  prevDFp = newDFp;
280 
281  alpha1 *= 10.0;
282 
283  nits++;
284  }
285  return retCode;
286  }
287  }
288 }
289 
290 #endif
Probability, optimization and sampling library.
int WolfeLineSearch(FunctorType &func, Scalar &alpha, XType &x1, Scalar &f1, XType &gradx1, const XType &p, const XType &x0, const Scalar &f0, const XType &gradx0, const Scalar &c1, const Scalar &c2, const Scalar &minAlpha)
Perform a line search which finds an approximate solution to: satisfying the strong Wolfe conditions...
Scalar CubicInterp(const Scalar &df0, const Scalar &x1, const Scalar &f1, const Scalar &df1, const Scalar &loX, const Scalar &hiX)
Find the minima in an interval [loX, hiX] of a cubic function which interpolates the points...

     [ Stan Home Page ] © 2011–2016, Stan Development Team.