LineSearch.cpp 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. /*
  2. * Copyright (c) Contributors to the Open 3D Engine Project.
  3. * For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. *
  5. * SPDX-License-Identifier: Apache-2.0 OR MIT
  6. *
  7. */
  8. #include <Optimization/LineSearch.h>
  9. #include <Optimization/Utilities.h>
  10. #include <Optimization/Constants.h>
  11. #include <float.h>
  12. #include <math.h>
  13. namespace NumericalMethods::Optimization
  14. {
  15. bool IsFailure(const LineSearchResult& result)
  16. {
  17. return result.m_outcome >= LineSearchOutcome::FailureExceededIterations;
  18. }
  19. ScalarVariable CubicMinimum(const double a, const double f_a, const double df_a, const double b, const double f_b,
  20. const double c, const double f_c)
  21. {
  22. double coefficients[4] = {};
  23. coefficients[1] = df_a;
  24. const double db = b - a;
  25. const double dc = c - a;
  26. const double denominator = (db * db * dc * dc) * (db - dc);
  27. double e[2];
  28. e[0] = f_b - f_a - coefficients[1] * db;
  29. e[1] = f_c - f_a - coefficients[1] * dc;
  30. coefficients[3] = (dc * dc * e[0] - db * db * e[1]) / denominator;
  31. coefficients[2] = (-dc * dc * dc * e[0] + db * db * db * e[1]) / denominator;
  32. const double radical = coefficients[2] * coefficients[2] - 3.0 * coefficients[3] * coefficients[1];
  33. return a + (-coefficients[2] + sqrt(radical)) / (3.0 * coefficients[3]);
  34. }
  35. ScalarVariable QuadraticMinimum(const double a, const double f_a, const double df_a, const double b, const double f_b)
  36. {
  37. double coefficients[3] = {};
  38. const double db = b - a;
  39. coefficients[1] = df_a;
  40. coefficients[2] = (f_b - f_a - coefficients[1] * db) / (db * db);
  41. return a - coefficients[1] / (2.0 * coefficients[2]);
  42. }
  43. bool ValidateStepSize(const ScalarVariable alphaNew, const double alpha0, const double alpha1, const double edgeThreshold)
  44. {
  45. const double alphaMin = AZStd::GetMin(alpha0, alpha1);
  46. const double alphaMax = AZStd::GetMax(alpha0, alpha1);
  47. const double range = alphaMax - alphaMin;
  48. return (azisfinite(alphaNew) && (alphaNew > alphaMin + edgeThreshold * range) &&
  49. (alphaNew < alphaMax - edgeThreshold * range));
  50. }
  51. LineSearchResult SelectStepSizeFromInterval(double alpha0, double alpha1, double f_alpha0, double f_alpha1,
  52. double df_alpha0, const Function& f, const VectorVariable& x0, const VectorVariable& searchDirection,
  53. const double f_x0, const double df_x0, const double c1, const double c2)
  54. {
  55. const double cubicEdgeThreshold = 0.2;
  56. const double quadraticEdgeThreshold = 0.1;
  57. double alphaLast = 0.0;
  58. double f_alphaLast = f_x0;
  59. LineSearchResult result;
  60. for (AZ::u32 iteration = 0; iteration < lineSearchIterations; iteration++)
  61. {
  62. ScalarVariable alphaNew = 0.0;
  63. if (iteration > 0)
  64. {
  65. // first try selecting a new alpha value based on cubic interpolation through the most recent points
  66. alphaNew = CubicMinimum(alpha0, f_alpha0, df_alpha0, alpha1, f_alpha1, alphaLast, f_alphaLast);
  67. }
  68. // if this is the first iteration, or the cubic method failed or is invalid, try a quadratic
  69. if (iteration == 0 || !ValidateStepSize(alphaNew, alpha0, alpha1, cubicEdgeThreshold))
  70. {
  71. alphaNew = QuadraticMinimum(alpha0, f_alpha0, df_alpha0, alpha1, f_alpha1);
  72. // if the quadratic is invalid, use bisection
  73. if (!ValidateStepSize(alphaNew, alpha0, alpha1, quadraticEdgeThreshold))
  74. {
  75. alphaNew = ScalarVariable(0.5 * (alpha0 + alpha1));
  76. }
  77. }
  78. // Check if alphaNew satisfies the Wolfe conditions
  79. // First the sufficient decrease condition
  80. const double f_alphaNew = FunctionValue(f, x0 + alphaNew * searchDirection);
  81. if ((f_alphaNew > f_x0 + c1 * alphaNew * df_x0) || (f_alphaNew >= f_alpha0))
  82. {
  83. // The decrease is not sufficient, so set up the parameters for the next iteration
  84. f_alphaLast = f_alpha1;
  85. alphaLast = alpha1;
  86. alpha1 = alphaNew;
  87. f_alpha1 = f_alphaNew;
  88. }
  89. else
  90. {
  91. // There is sufficient decrease, so test the second Wolfe condition i.e. whether the derivative
  92. // corresponding to alphaNew is shallower than the derivative at x0
  93. double df_alphaNew = DirectionalDerivative(f, x0 + alphaNew * searchDirection, searchDirection);
  94. if (fabs(df_alphaNew) <= -c2 * df_x0)
  95. {
  96. // alphaNew satisfies the Wolfe conditions, so return it
  97. result.m_outcome = LineSearchOutcome::Success;
  98. result.m_stepSize = alphaNew;
  99. result.m_functionValue = f_alphaNew;
  100. result.m_derivativeValue = df_alphaNew;
  101. return result;
  102. }
  103. if (df_alphaNew * (alpha1 - alpha0) >= 0.0)
  104. {
  105. f_alphaLast = f_alpha1;
  106. alphaLast = alpha1;
  107. alpha1 = alpha0;
  108. f_alpha1 = f_alpha0;
  109. }
  110. else
  111. {
  112. f_alphaLast = f_alpha0;
  113. alphaLast = alpha0;
  114. }
  115. alpha0 = alphaNew;
  116. f_alpha0 = f_alphaNew;
  117. df_alpha0 = df_alphaNew;
  118. }
  119. }
  120. // Failed to find a conforming step size
  121. result.m_stepSize = 0.0;
  122. result.m_functionValue = 0.0;
  123. result.m_derivativeValue = 0.0;
  124. result.m_outcome = LineSearchOutcome::FailureExceededIterations;
  125. return result;
  126. }
  127. LineSearchResult LineSearchWolfe(const Function& f, const VectorVariable& x0, double f_x0,
  128. const VectorVariable& searchDirection)
  129. {
  130. // uses the notation from Nocedal and Wright, where alpha represents the step size
  131. // alpha0 and alpha1 are the lower and upper bounds of an interval which brackets the final value of alpha
  132. // initial step size of 1 is recommended for quasi-Newton methods (Nocedal and Wright)
  133. double alpha0 = 0.0;
  134. double alpha1 = 1.0;
  135. double f_alpha1 = FunctionValue(f, x0 + alpha1 * searchDirection);
  136. double f_alpha0 = f_x0;
  137. const double df_x0 = DirectionalDerivative(f, x0, searchDirection);
  138. double df_alpha0 = df_x0;
  139. for (AZ::u32 iteration = 0; iteration < lineSearchIterations; iteration++)
  140. {
  141. // if the value of f corresponding to alpha1 isn't sufficiently small compared to f at x0,
  142. // then the interval [alpha0 ... alpha1] must bracket a suitable point.
  143. if ((f_alpha1 > f_x0 + WolfeConditionsC1 * alpha1 * df_x0) || (iteration > 0 && f_alpha1 > f_alpha0))
  144. {
  145. return SelectStepSizeFromInterval(alpha0, alpha1, f_alpha0, f_alpha1, df_alpha0,
  146. f, x0, searchDirection, f_x0, df_x0, WolfeConditionsC1, WolfeConditionsC2);
  147. }
  148. // otherwise, if the derivative corresponding to alpha1 is large enough, alpha1 already
  149. // satisfies the Wolfe conditions and so return alpha1.
  150. double df_alpha1 = DirectionalDerivative(f, x0 + alpha1 * searchDirection, searchDirection);
  151. if (fabs(df_alpha1) <= -WolfeConditionsC2 * df_x0)
  152. {
  153. LineSearchResult result;
  154. result.m_outcome = LineSearchOutcome::Success;
  155. result.m_stepSize = alpha1;
  156. result.m_functionValue = f_alpha1;
  157. result.m_derivativeValue = df_alpha1;
  158. return result;
  159. }
  160. if (df_alpha1 >= 0.0)
  161. {
  162. return SelectStepSizeFromInterval(alpha1, alpha0, f_alpha1, f_alpha0, df_alpha1,
  163. f, x0, searchDirection, f_x0, df_x0, WolfeConditionsC1, WolfeConditionsC2);
  164. }
  165. // haven't found an interval which is guaranteed to bracket a suitable point,
  166. // so expand the search region for the next iteration
  167. alpha0 = alpha1;
  168. f_alpha0 = f_alpha1;
  169. alpha1 = 2.0 * alpha1;
  170. f_alpha1 = FunctionValue(f, x0 + alpha1 * searchDirection);
  171. df_alpha0 = df_alpha1;
  172. }
  173. LineSearchResult result;
  174. result.m_outcome = LineSearchOutcome::BestEffort;
  175. result.m_stepSize = alpha1;
  176. result.m_functionValue = f_alpha1;
  177. result.m_derivativeValue = DirectionalDerivative(f, x0 + alpha1 * searchDirection, searchDirection);
  178. return result;
  179. }
  180. } // namespace NumericalMethods::Optimization