G+Smo  25.01.0
Geometry + Simulation Modules
 
Loading...
Searching...
No Matches
gsHLBFGS.h
1#include <functional>
2#include <vector>
3#include <cassert>
4
5#pragma once
6
9
11
12#ifdef _OPENMP
13#define USE_OPENMP
14#endif
15#undef USE_OPENMP
16#include "HLBFGS/HLBFGS.h"
17#undef USE_OPENMP
18
19/*
20To do:
21- Use Eigen
22- change T, int
23*/
24
25// https://xueyuhanlang.github.io/software/HLBFGS/
26
27#include<iostream>
28#include <iomanip>// Header file needed to use setw
29
30namespace gismo
31{
32
33struct gsHLBFGSObjective
34{
35 typedef real_t T;
36 typedef gsEigen::Matrix<T, gsEigen::Dynamic, 1> Vector;
37 // typedef Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> Matrix;
38
39 gsHLBFGSObjective(gsOptProblem<T>* objective)
40 :
41 obj(objective)
42 { }
43
44 gsHLBFGSObjective()
45 :
46 obj(nullptr)
47 { }
48
49 void operator() (int N, real_t* x, real_t* prev_x, real_t* f, real_t* g) const
50 {
51 gsAsConstVector<real_t> u(x,N);
52 *f = obj->evalObj(u);
53
54 gsAsVector<real_t> Gvec(g,N);
55 obj->gradObj_into(u,Gvec);
56 }
57
58 gsOptProblem<T> * obj;
59};
60
61template<typename T>
62class gsHLBFGS : public gsOptimizer<T>
63{
64public:
65 using Base = gsOptimizer<T>;
66
67public:
68 // gsHLBFGS(gsOptProblem<T> * problem)
69 // :
70 // Base(problem)
71 // {
72 // this->defaultOptions();
73 // }
74
75 gsHLBFGS(gsOptProblem<T> * problem)
76 :
77 Base(problem)
78 {
79 this->defaultOptions();
80 }
81
82
83protected:
84 void defaultOptions()
85 {
86 Base::defaultOptions();
87
88 // see documentation in https://xueyuhanlang.github.io/software/HLBFGS/
89 // parameters 0 ... 6:
90 m_options.addReal("FuncTol", "function tolerance used in line-search", 1e-4);
91 m_options.addReal("VarTol", "variable tolerance used in line-search", 1e-16);
92 m_options.addReal("GradTol", "gradient tolerance used in line-search", 0.9);
93 m_options.addReal("StpMin", "stpmin used in line-search", 1e-20);
94 m_options.addReal("StpMax", "stpmax used in line-search", 1e+20);
95 m_options.addReal("MinGradLen", "Minimal gradient length", 1e-9);
96 m_options.addReal("MinStepLen", "Minimal step length", 1e-10);
97
98 // infos 0 ... 2, 6 ... 12:
99 m_options.addInt("MaxEval", "the max number of evaluation in line-search", 20);
100 m_options.addInt("TotEval", "the total number of evalfunc calls", 0);
101 m_options.addInt("CurrInt", "the current number of iterations", 0);
102 //m_options.addInt("Strategy", "The lbfgs strategy. 0: standard, 1: M1QN3 strategy[8](recommended).", 0);
103 // maxIterations: set by the parent class
104 // verbose: set by the parent class
105 m_options.addInt("UpdateHess", "T: the update interval of Hessian. (typical choices: 0-200)", 10);
106 m_options.addInt("SwitchHess", "0: without hessian, 1: with accurate hessian", 0);
107 m_options.addInt("Icfs", "icfs parameter", 15);
108 m_options.addInt("LineSearch", "0: classical line-search; 1: modified line-search (it is not useful in practice)", 0);
109 m_options.addInt("DissPre", "0: Disable preconditioned CG; 1: Enable preconditioned CG", 0);
110 m_options.addInt("BetaCG", "0 or 1 defines different methods for choosing beta in CG.", 1);
111 m_options.addInt("Diag", "internal usage. 0: only update the diag in USER_DEFINED_HLBFGS_UPDATE_H; 1: default.", 1);
112
113 m_options.addInt("LBFGSUpdates","Number of LBFGS updates (typically 3-20, put to 0 for gradient descent)",20);
114 }
115
116 void getOptions()
117 {
118 Base::getOptions();
119
120 // parameters
121 m_funcTol = m_options.getReal("FuncTol");
122 m_varTol = m_options.getReal("VarTol");
123 m_gradTol = m_options.getReal("GradTol");
124 m_stpMin = m_options.getReal("StpMin");
125 m_stpMax = m_options.getReal("StpMax");
126 m_minGradLen = m_options.getReal("MinGradLen");
127 m_minStepLen = m_options.getReal("MinStepLen");
128
129 // infos
130 m_maxEval = m_options.getInt("MaxEval");
131 m_totEval = m_options.getInt("TotEval");
132 m_currInt = m_options.getInt("CurrInt");
133 m_updateHess = m_options.getInt("UpdateHess");
134 m_switchHess = m_options.getInt("SwitchHess");
135 m_icfs = m_options.getInt("Icfs");
136 m_lineSearch = m_options.getInt("LineSearch");
137 m_dissPre = m_options.getInt("DissPre");
138 m_betaCG = m_options.getInt("BetaCG");
139 m_diag = m_options.getInt("Diag");
140
141 m_M = m_options.getInt("LBFGSUpdates");
142
143 m_hlbfgs_info[0] = static_cast<index_t>(m_maxEval);
144 m_hlbfgs_info[1] = static_cast<index_t>(m_totEval);
145 m_hlbfgs_info[2] = static_cast<index_t>(m_currInt);
146 // m_hlbfgs_info[3]:The lbfgs strategy. 0: standard, 1: M1QN3 strategy (recommended)
147 // Gilbert, J. C., & Lemaréchal, C. (1989). Some numerical experiments with variable-storage
148 // quasi-Newton algorithms. Mathematical programming, 45(1), 407-435.
149 m_hlbfgs_info[3] = 1;
150 m_hlbfgs_info[4] = static_cast<index_t>(m_maxIterations);
151 m_hlbfgs_info[5] = static_cast<index_t>(m_verbose);
152 m_hlbfgs_info[6] = static_cast<index_t>(m_updateHess);
153 m_hlbfgs_info[7] = static_cast<index_t>(m_switchHess);
154 m_hlbfgs_info[8] = static_cast<index_t>(m_icfs);
155 m_hlbfgs_info[9] = static_cast<index_t>(m_lineSearch);
156 m_hlbfgs_info[10] = static_cast<index_t>(m_dissPre);
157 m_hlbfgs_info[11] = static_cast<index_t>(m_betaCG);
158 m_hlbfgs_info[12] = static_cast<index_t>(m_diag);
159
160 m_hlbfgs_pars[0] = m_funcTol;
161 m_hlbfgs_pars[1] = m_varTol;
162 m_hlbfgs_pars[2] = m_gradTol;
163 m_hlbfgs_pars[3] = m_stpMin;
164 m_hlbfgs_pars[4] = m_stpMax;
165 m_hlbfgs_pars[5] = m_minGradLen;
166 m_hlbfgs_pars[6] = m_minStepLen;
167
168 for (int i = 0; i!=7; ++i)
169 {
170 m_hlbfgs_pars[i] = m_options.askReal("PARAMETER"+util::to_string(i) , m_hlbfgs_pars[i]);
171 //gsInfo << "m_hlbfgs_pars[" << i << "]: " << m_hlbfgs_pars[i] << std::endl;
172 }
173
174 for (int i = 0; i!=13; ++i)
175 m_hlbfgs_info[i] = m_options.askInt("INFO"+util::to_string(i), m_hlbfgs_info[i]);
176 }
177
178protected:
179
180 static void static_func_grad(int N, T* x, T* prev_x, T* f, T* g)
181 {
182 (*local_func_grad)(N, x, prev_x, f, g);
183 }
184
185 static void static_newiter_callback(int iter, int call_iter, T *x, T* f, T *g, T* gnorm)
186 {
187 (*local_newiter_callback)(iter, call_iter, x, f, g, gnorm);
188 }
189
190public:
191 // void run(const gsMatrix<T> & initialGuess)
192 void run(std::vector<T> & sol)
193 {
194 // m_curDesign = initialGuess;
195
196 INIT_HLBFGS(m_hlbfgs_pars, m_hlbfgs_info);
197 this->getOptions();
198 // std::function<void(index_t N, T* x, T* prev_x, T* f, T* g)>
199
200 const std::function<void(int N, real_t* x, real_t* prev_x, real_t* f, real_t* g)> wrapfunc =
201 [&](int N, real_t* x, real_t*, real_t* f, real_t* g) {
202 std::vector<real_t> array_x(N), array_g(N);
203
204 gsAsConstVector<real_t> u(x,N);
205 *f = m_op->evalObj(u);
206
207 gsAsVector<real_t> Gvec(g,N);
208 m_op->gradObj_into(u,Gvec);
209 };
210
211 local_func_grad = &wrapfunc; // !not parallel !!
212
213 const std::function<void(int iter, int call_iter, T *x, T* f, T *g, T* gnorm)> wrapcallback =
214 [this](int iter, int call_iter, T *x, T* f, T *g, T* gnorm)
215 {
216 // TODO: give it a flag to decide if we need to print or not.
217 if (m_verbose==2)
218 gsInfo<<"# iter "<< iter << ": #func eval. " << call_iter << ", f = " << *f <<", ||g|| = " << *gnorm << std::endl;
219 };
220
221 local_newiter_callback = &wrapcallback;
222
223 // WHAT ABOUT CONSTRAINTS????
224 HLBFGS(
225 sol.size(),
226 m_M, // hardcoded??? -->>> change to an option of the class
227 sol.data(),
228 static_func_grad,
229// obj,
230 nullptr,
231 HLBFGS_UPDATE_Hessian,
232 static_newiter_callback,
233 m_hlbfgs_pars,
234 m_hlbfgs_info
235 );
236 }
237
238 void solve(const gsMatrix<T> & initialGuess)
239 {
240 GISMO_ASSERT(initialGuess.cols()==1,"The initial guess should have vector format");
241 std::vector<T> sol(initialGuess.rows());
242 gsMatrix<T>::Map(sol.data(), initialGuess.rows(),1) = initialGuess;
243 this->run(sol);
244
245 m_curDesign = gsMatrix<T>::Map(sol.data(), sol.size(),1);
246 m_numIterations = m_hlbfgs_info[2];
247 m_finalObjective = m_op->evalObj(gsAsConstVector<T>(m_curDesign.data(),m_curDesign.rows()));
248
249 if (m_verbose==1)
250 gsInfo<<"HLBFGS finished in "<<m_numIterations<<" iterations, with final objective "<<m_finalObjective<<"\n";
251
252 }
253
255 // own analytic gradient. Author: Ye Ji (jiyess@outlook.com)
256 void gradientCheck(const gsVector<T> &u) {
257 // Get the analytic gradient
258 std::vector<T> sol(u.size());
259 gsAsVector<T> analyticGrad(sol);
260 std::copy(u.begin(), u.end(), sol.begin());
261 m_op->gradObj_into(sol, analyticGrad);
262
263 // Finite difference calculation of gradient using central differences
264 gsVector<T> numericalGrad(u.size());
265 T h = sqrt(std::numeric_limits<T>::epsilon()) * u.cwiseAbs().maxCoeff();
266 T forwardValue, backwardValue;
267
268 std::vector<T> solForNumericalGrad(u.size());
269 std::copy(u.begin(), u.end(), solForNumericalGrad.begin());
270
271 // Iterate through each dimension
272 for (int k = 0; k < u.size(); ++k) {
273 // Compute function value at forward step
274 solForNumericalGrad[k] += h;
275 forwardValue = m_op->evalObj(solForNumericalGrad);
276
277 // Compute function value at backward step
278 solForNumericalGrad[k] -= 2.0 * h;
279 backwardValue = m_op->evalObj(solForNumericalGrad);
280
281 // Compute the numerical gradient using central difference formula
282 numericalGrad(k) = (forwardValue - backwardValue) / (2.0 * h);
283
284 // Reset the k-th component to its original value
285 solForNumericalGrad[k] += h;
286 }
287
288 // Compare the analytic gradient and the numerical gradient
289 gsInfo << "Analytical gradient: Finite difference gradient: \n";
290
291 int numElementsToPrint = std::min(analyticGrad.size(), 30);
292 for (int i = 0; i < numElementsToPrint; ++i) {
293 gsInfo << std::setw(5) << i << std::setw(20) << analyticGrad(i)
294 << std::setw(20) << numericalGrad(i) << "\n";
295 }
296
297 if (u.size() > 30) {
298 gsInfo << "(Displaying the first 30 components only)\n";
299 }
300
301 T relativeError =
302 (analyticGrad - numericalGrad).norm() / analyticGrad.norm();
303 gsInfo << "The relative error between the analytic gradient and the "
304 "numerical gradient is: " << relativeError << "\n\n";
305 }
306
307// Members taken from Base
308protected:
309 using Base::m_op;
310 using Base::m_numIterations;
311 using Base::m_finalObjective;
312 using Base::m_curDesign;
313 using Base::m_options;
314 using Base::m_verbose;
315 using Base::m_maxIterations;
316
317 using Base::defaultOptions;
318 using Base::getOptions;
319
320// Options
321protected:
322 // parameters
323 T m_funcTol;
324 T m_varTol;
325 T m_gradTol;
326 T m_stpMin;
327 T m_stpMax;
328 T m_minGradLen;
329 T m_minStepLen;
330
331 // infos
332 index_t m_maxEval;
333 index_t m_totEval;
334 index_t m_currInt;
335 index_t m_updateHess;
336 index_t m_switchHess;
337 index_t m_icfs;
338 index_t m_lineSearch;
339 index_t m_dissPre;
340 index_t m_betaCG;
341 index_t m_diag;
342
343// HLBFGS options
344protected:
345 index_t m_hlbfgs_info[20] = {0};
346 T m_hlbfgs_pars[20] = {0};
347 index_t m_M;
348
349 static const std::function<void(int N, T* x, T* prev_x, T* f, T* g)> * local_func_grad;
350 static const std::function<void(int iter, int call_iter, T *x, T* f, T *g, T* gnorm)> * local_newiter_callback;
351};
352
353template<typename T>
354const std::function<void(int N, T* x, T* prev_x, T* f, T* g)> * gsHLBFGS<T>::local_func_grad = nullptr;
355template<typename T>
356const std::function<void(int iter, int call_iter, T *x, T* f, T *g, T* gnorm)> * gsHLBFGS<T>::local_newiter_callback = nullptr;
357
358
359// using gsHLBFGS = gdc::GradientDescent<T, Objective, StepSize, Callback, FiniteDifferences>;
360
361} //namespace gismo
gsOptionList m_options
Options.
Definition gsOptimizer.h:108
gsMatrix< T > m_curDesign
Current design variables (and starting point )
Definition gsOptimizer.h:105
std::string to_string(const C &value)
Converts value to string, assuming "operator<<" defined on C.
Definition gsUtils.h:56
#define index_t
Definition gsConfig.h:32
#define gsInfo
Definition gsDebug.h:43
#define GISMO_ASSERT(cond, message)
Definition gsDebug.h:89
This is the main header file that collects wrappers of Eigen for linear algebra.
Provides declaration of an optimization problem.
This file is part of the G+Smo library.
The G+Smo namespace, containing all definitions for the library.