16#include "HLBFGS/HLBFGS.h"
33struct gsHLBFGSObjective
36 typedef gsEigen::Matrix<T, gsEigen::Dynamic, 1> Vector;
39 gsHLBFGSObjective(gsOptProblem<T>* objective)
49 void operator() (
int N, real_t* x, real_t* prev_x, real_t* f, real_t* g)
const
51 gsAsConstVector<real_t> u(x,N);
54 gsAsVector<real_t> Gvec(g,N);
55 obj->gradObj_into(u,Gvec);
58 gsOptProblem<T> * obj;
62class gsHLBFGS :
public gsOptimizer<T>
65 using Base = gsOptimizer<T>;
75 gsHLBFGS(gsOptProblem<T> * problem)
79 this->defaultOptions();
86 Base::defaultOptions();
90 m_options.addReal(
"FuncTol",
"function tolerance used in line-search", 1e-4);
91 m_options.addReal(
"VarTol",
"variable tolerance used in line-search", 1e-16);
92 m_options.addReal(
"GradTol",
"gradient tolerance used in line-search", 0.9);
93 m_options.addReal(
"StpMin",
"stpmin used in line-search", 1e-20);
94 m_options.addReal(
"StpMax",
"stpmax used in line-search", 1e+20);
95 m_options.addReal(
"MinGradLen",
"Minimal gradient length", 1e-9);
96 m_options.addReal(
"MinStepLen",
"Minimal step length", 1e-10);
99 m_options.addInt(
"MaxEval",
"the max number of evaluation in line-search", 20);
100 m_options.addInt(
"TotEval",
"the total number of evalfunc calls", 0);
101 m_options.addInt(
"CurrInt",
"the current number of iterations", 0);
105 m_options.addInt(
"UpdateHess",
"T: the update interval of Hessian. (typical choices: 0-200)", 10);
106 m_options.addInt(
"SwitchHess",
"0: without hessian, 1: with accurate hessian", 0);
107 m_options.addInt(
"Icfs",
"icfs parameter", 15);
108 m_options.addInt(
"LineSearch",
"0: classical line-search; 1: modified line-search (it is not useful in practice)", 0);
109 m_options.addInt(
"DissPre",
"0: Disable preconditioned CG; 1: Enable preconditioned CG", 0);
110 m_options.addInt(
"BetaCG",
"0 or 1 defines different methods for choosing beta in CG.", 1);
111 m_options.addInt(
"Diag",
"internal usage. 0: only update the diag in USER_DEFINED_HLBFGS_UPDATE_H; 1: default.", 1);
113 m_options.addInt(
"LBFGSUpdates",
"Number of LBFGS updates (typically 3-20, put to 0 for gradient descent)",20);
121 m_funcTol = m_options.getReal(
"FuncTol");
122 m_varTol = m_options.getReal(
"VarTol");
123 m_gradTol = m_options.getReal(
"GradTol");
124 m_stpMin = m_options.getReal(
"StpMin");
125 m_stpMax = m_options.getReal(
"StpMax");
126 m_minGradLen = m_options.getReal(
"MinGradLen");
127 m_minStepLen = m_options.getReal(
"MinStepLen");
130 m_maxEval = m_options.getInt(
"MaxEval");
131 m_totEval = m_options.getInt(
"TotEval");
132 m_currInt = m_options.getInt(
"CurrInt");
133 m_updateHess = m_options.getInt(
"UpdateHess");
134 m_switchHess = m_options.getInt(
"SwitchHess");
135 m_icfs = m_options.getInt(
"Icfs");
136 m_lineSearch = m_options.getInt(
"LineSearch");
137 m_dissPre = m_options.getInt(
"DissPre");
138 m_betaCG = m_options.getInt(
"BetaCG");
139 m_diag = m_options.getInt(
"Diag");
141 m_M = m_options.getInt(
"LBFGSUpdates");
143 m_hlbfgs_info[0] =
static_cast<index_t>(m_maxEval);
144 m_hlbfgs_info[1] =
static_cast<index_t>(m_totEval);
145 m_hlbfgs_info[2] =
static_cast<index_t>(m_currInt);
149 m_hlbfgs_info[3] = 1;
150 m_hlbfgs_info[4] =
static_cast<index_t>(m_maxIterations);
151 m_hlbfgs_info[5] =
static_cast<index_t>(m_verbose);
152 m_hlbfgs_info[6] =
static_cast<index_t>(m_updateHess);
153 m_hlbfgs_info[7] =
static_cast<index_t>(m_switchHess);
154 m_hlbfgs_info[8] =
static_cast<index_t>(m_icfs);
155 m_hlbfgs_info[9] =
static_cast<index_t>(m_lineSearch);
156 m_hlbfgs_info[10] =
static_cast<index_t>(m_dissPre);
157 m_hlbfgs_info[11] =
static_cast<index_t>(m_betaCG);
158 m_hlbfgs_info[12] =
static_cast<index_t>(m_diag);
160 m_hlbfgs_pars[0] = m_funcTol;
161 m_hlbfgs_pars[1] = m_varTol;
162 m_hlbfgs_pars[2] = m_gradTol;
163 m_hlbfgs_pars[3] = m_stpMin;
164 m_hlbfgs_pars[4] = m_stpMax;
165 m_hlbfgs_pars[5] = m_minGradLen;
166 m_hlbfgs_pars[6] = m_minStepLen;
168 for (
int i = 0; i!=7; ++i)
170 m_hlbfgs_pars[i] = m_options.askReal(
"PARAMETER"+
util::to_string(i) , m_hlbfgs_pars[i]);
174 for (
int i = 0; i!=13; ++i)
175 m_hlbfgs_info[i] = m_options.askInt(
"INFO"+
util::to_string(i), m_hlbfgs_info[i]);
180 static void static_func_grad(
int N, T* x, T* prev_x, T* f, T* g)
182 (*local_func_grad)(N, x, prev_x, f, g);
185 static void static_newiter_callback(
int iter,
int call_iter, T *x, T* f, T *g, T* gnorm)
187 (*local_newiter_callback)(iter, call_iter, x, f, g, gnorm);
192 void run(std::vector<T> & sol)
196 INIT_HLBFGS(m_hlbfgs_pars, m_hlbfgs_info);
200 const std::function<void(
int N, real_t* x, real_t* prev_x, real_t* f, real_t* g)> wrapfunc =
201 [&](
int N, real_t* x, real_t*, real_t* f, real_t* g) {
202 std::vector<real_t> array_x(N), array_g(N);
204 gsAsConstVector<real_t> u(x,N);
205 *f = m_op->evalObj(u);
207 gsAsVector<real_t> Gvec(g,N);
208 m_op->gradObj_into(u,Gvec);
211 local_func_grad = &wrapfunc;
213 const std::function<void(
int iter,
int call_iter, T *x, T* f, T *g, T* gnorm)> wrapcallback =
214 [
this](
int iter,
int call_iter, T *x, T* f, T *g, T* gnorm)
218 gsInfo<<
"# iter "<< iter <<
": #func eval. " << call_iter <<
", f = " << *f <<
", ||g|| = " << *gnorm << std::endl;
221 local_newiter_callback = &wrapcallback;
231 HLBFGS_UPDATE_Hessian,
232 static_newiter_callback,
238 void solve(
const gsMatrix<T> & initialGuess)
240 GISMO_ASSERT(initialGuess.cols()==1,
"The initial guess should have vector format");
241 std::vector<T> sol(initialGuess.rows());
242 gsMatrix<T>::Map(sol.data(), initialGuess.rows(),1) = initialGuess;
245 m_curDesign = gsMatrix<T>::Map(sol.data(), sol.size(),1);
246 m_numIterations = m_hlbfgs_info[2];
247 m_finalObjective = m_op->evalObj(gsAsConstVector<T>(m_curDesign.data(),m_curDesign.rows()));
250 gsInfo<<
"HLBFGS finished in "<<m_numIterations<<
" iterations, with final objective "<<m_finalObjective<<
"\n";
256 void gradientCheck(
const gsVector<T> &u) {
258 std::vector<T> sol(u.size());
259 gsAsVector<T> analyticGrad(sol);
260 std::copy(u.begin(), u.end(), sol.begin());
261 m_op->gradObj_into(sol, analyticGrad);
264 gsVector<T> numericalGrad(u.size());
265 T h = sqrt(std::numeric_limits<T>::epsilon()) * u.cwiseAbs().maxCoeff();
266 T forwardValue, backwardValue;
268 std::vector<T> solForNumericalGrad(u.size());
269 std::copy(u.begin(), u.end(), solForNumericalGrad.begin());
272 for (
int k = 0; k < u.size(); ++k) {
274 solForNumericalGrad[k] += h;
275 forwardValue = m_op->evalObj(solForNumericalGrad);
278 solForNumericalGrad[k] -= 2.0 * h;
279 backwardValue = m_op->evalObj(solForNumericalGrad);
282 numericalGrad(k) = (forwardValue - backwardValue) / (2.0 * h);
285 solForNumericalGrad[k] += h;
289 gsInfo <<
"Analytical gradient: Finite difference gradient: \n";
291 int numElementsToPrint = std::min(analyticGrad.size(), 30);
292 for (
int i = 0; i < numElementsToPrint; ++i) {
293 gsInfo << std::setw(5) << i << std::setw(20) << analyticGrad(i)
294 << std::setw(20) << numericalGrad(i) <<
"\n";
298 gsInfo <<
"(Displaying the first 30 components only)\n";
302 (analyticGrad - numericalGrad).norm() / analyticGrad.norm();
303 gsInfo <<
"The relative error between the analytic gradient and the "
304 "numerical gradient is: " << relativeError <<
"\n\n";
310 using Base::m_numIterations;
311 using Base::m_finalObjective;
314 using Base::m_verbose;
315 using Base::m_maxIterations;
317 using Base::defaultOptions;
318 using Base::getOptions;
345 index_t m_hlbfgs_info[20] = {0};
346 T m_hlbfgs_pars[20] = {0};
349 static const std::function<void(
int N, T* x, T* prev_x, T* f, T* g)> * local_func_grad;
350 static const std::function<void(
int iter,
int call_iter, T *x, T* f, T *g, T* gnorm)> * local_newiter_callback;
354const std::function<void(
int N, T* x, T* prev_x, T* f, T* g)> * gsHLBFGS<T>::local_func_grad =
nullptr;
356const std::function<void(
int iter,
int call_iter, T *x, T* f, T *g, T* gnorm)> * gsHLBFGS<T>::local_newiter_callback =
nullptr;
gsOptionList m_options
Options.
Definition gsOptimizer.h:108
gsMatrix< T > m_curDesign
Current design variables (and starting point )
Definition gsOptimizer.h:105
std::string to_string(const C &value)
Converts value to string, assuming "operator<<" defined on C.
Definition gsUtils.h:56
#define index_t
Definition gsConfig.h:32
#define gsInfo
Definition gsDebug.h:43
#define GISMO_ASSERT(cond, message)
Definition gsDebug.h:89
This is the main header file that collects wrappers of Eigen for linear algebra.
Provides declaration of an optimization problem.
This file is part of the G+Smo library.
The G+Smo namespace, containing all definitions for the library.