This post describes the way I currently wrap math functions. These classes can be used to interface solvers or mathematical programming procedures.
Demo code is available in this GitHub repository, blog post itself is better read here.
The goal is to wrap the notion of math functions
template <typename DOMAIN_TYPE, typename CODOMAIN_TYPE>
class Function
{
public:
using domain_type = DOMAIN_TYPE;
using codomain_type = CODOMAIN_TYPE;
struct Interface
{
virtual void f(const domain_type& x,
codomain_type& f) const = 0;
virtual ~Interface() = default;
};
protected:
using pimpl_type = std::shared_ptr<const Interface>;
pimpl_type _pimpl;
std::shared_ptr<size_t> _f_counter;
public:
Function() : _pimpl{} {}
Function(pimpl_type&& pimpl,
std::shared_ptr<size_t> f_counter = {})
: _pimpl{std::move(pimpl)}, _f_counter{f_counter}
{
}
// lambda(domain)->codomain
template <typename F,
std::enable_if_t<std::is_invocable_r_v<
CODOMAIN_TYPE,
std::decay_t<F>,
DOMAIN_TYPE>>* = nullptr>
Function(F&& f)
{
struct Impl final : public Interface
{
std::decay_t<F> _f;
Impl(F&& f) : _f(std::forward<F>(f)) {}
void
f(const DOMAIN_TYPE& x, CODOMAIN_TYPE& y) const
{
y = _f(x);
}
};
_pimpl = std::make_shared<Impl>(std::forward<F>(f));
}
// lambda(domain,codomain&)
template <typename F,
std::enable_if_t<std::is_invocable_r_v<
void,
std::decay_t<F>,
DOMAIN_TYPE,
CODOMAIN_TYPE&>>* = nullptr>
Function(F&& f)
{
struct Impl final : public Interface
{
std::decay_t<F> _f;
Impl(F&& f) : _f(std::forward<F>(f)) {}
void
f(const DOMAIN_TYPE& x, CODOMAIN_TYPE& y) const
{
_f(x, y);
}
};
_pimpl = std::make_shared<Impl>(std::forward<F>(f));
}
// codomain_type foo(const domain_type& x, extra_args... )
template <typename... EXTRA_ARGS>
Function(CODOMAIN_TYPE(f)(const DOMAIN_TYPE&,
EXTRA_ARGS...),
const Identity_t<EXTRA_ARGS>&... args)
: Function{
[=](const DOMAIN_TYPE& x, CODOMAIN_TYPE& y) {
y = f(x, args...);
}}
{
}
// void foo(const domain_type& x,codomain_type& y, extra_args... )
template <typename... EXTRA_ARGS>
Function(void(f)(const DOMAIN_TYPE&,
CODOMAIN_TYPE&,
EXTRA_ARGS...),
const Identity_t<EXTRA_ARGS>&... args)
: Function{
[=](const DOMAIN_TYPE& x, CODOMAIN_TYPE& y) {
f(x, y, args...);
}}
{
}
void
f(const domain_type& x, codomain_type& y) const
{
if (_f_counter) ++(*_f_counter);
(*_pimpl).f(x, y);
}
void
initialize_counter()
{
_f_counter = std::make_shared<size_t>(0);
}
size_t
f_counter() const
{
assert(_f_counter);
return *_f_counter;
}
};
I assume that the functions to be computed are enough complex to make memory allocation and virtual method call penalties negligible.
As I do not want to manipulate pointers and want a value semantic. I use the runtime-polymorphism approach promoted by Sean Parent.
This is the role of this part
{
...
struct Interface
{
virtual void f(const domain_type& x,
codomain_type& f) const = 0;
virtual ~Interface() = default;
};
protected:
using pimpl_type = std::shared_ptr<const Interface>;
pimpl_type _pimpl;
...
}
For convenience, I also define a counter to track how many times the function is evaluated:
std::shared_ptr<size_t> _f_counter;
void initialize_counter()
{
_f_counter = std::make_shared<size_t>(0);
}
size_t f_counter() const
{
assert(_f_counter);
return *_f_counter;
}
As we have a value semantic (Function
objects can be copied) we have
to use a shared counter.
Note: this counter is by no way mandatory. You can remove it from the implementation if you want. However it is quite handy when one wants to compare algorithm performance in term of number of function calls.
template <typename F,
std::enable_if_t<std::is_invocable_r_v<
void, std::decay_t<F>, DOMAIN_TYPE,
CODOMAIN_TYPE&>>* = nullptr>
Function(F&& f)
{
struct Impl final : public Interface
{
std::decay_t<F> _f;
Impl(F&& f) : _f(std::forward<F>(f))
{
}
void f(const DOMAIN_TYPE& x, CODOMAIN_TYPE& y) const
{
_f(x, y);
}
};
_pimpl = std::make_shared<Impl>(std::forward<F>(f));
}
This constructor will allow use to use a lambda to construct a
Function
object, by example:
Function<std::vector<double>, double> func{
[](const std::vector<double>& v, double& sum_v) {
sum_v = std::accumulate(v.begin(), v.end(), 0);
}};
The use of SFINAE
std::enable_if_t<std::is_invocable_r_v<
void, std::decay_t<F>, DOMAIN_TYPE, CODOMAIN_TYPE&>>* =
nullptr
allows use to filter the F
argument in order to define other
specializations, by example:
Function<std::vector<double>, double> func{
[](const std::vector<double>& v) {
return std::accumulate(v.begin(), v.end(), 0);
}};
This constructor role is to move the F f
object into a dynamically
created Interface
instance Impl
. Then it stores it into the _pimpl
shared pointer.
We just have to take care of not forgetting any required decay_t
and
std::forward
.
The next constructor
template <typename... EXTRA_ARGS>
Function(CODOMAIN_TYPE(f)(const DOMAIN_TYPE&,
EXTRA_ARGS...),
const Identity_t<EXTRA_ARGS>&... args)
: Function{[=](const DOMAIN_TYPE& x, CODOMAIN_TYPE& y) {
y = f(x, args...);
}}
{
}
is more interesting.
First, in some cases the c++17 class template argument deduction can be used:
double Rosenbrock(const std::vector<double>& x, double c)
{
assert(x.size() == 2);
return (1 - x[0]) * (1 - x[0]) +
c * (x[1] - x[0] * x[0]) * (x[1] - x[0] * x[0]);
}
// ...
Function f(Rosenbrock, 100); // here c=100
Second, it uses the Identity_t
trick. Without it, this line
Function f(Rosenbrock, 100);
would not compile. You would be forced to write:
Function f(Rosenbrock, 100.); // <- the 100. signs "double" type
Identity_t
is defined as follows
template <typename T>
struct Identity
{
using type = T;
};
template <typename T>
using Identity_t = typename Identity<T>::type;
In the line
template <typename... EXTRA_ARGS>
Function(CODOMAIN_TYPE(f)(const DOMAIN_TYPE&,
EXTRA_ARGS...),
const Identity_t<EXTRA_ARGS>&... args)
its role is to prevent args...
to participate in the template argument
deduction. If this is not done the compiler has a contradictory
information: in one side 100 is a double
as defined by the double c
type of Rosenbrock(const std::vector<double>& x, double c)
but on the
other side it is an int
as define by the 100 of the Function
f(Rosenbrock, 100);
expression. When Identity_t
is used, the compiler
makes its decision only using the Rosenbrock
function prototype .
A more pedagogical explanation can be found here. You can also read cppreference C++20 std::type_identity.
Here is maybe the most controversial part and certainly the point where I have hesitated the most.
To invoke function evaluation at a
point
void f(const domain_type& x, codomain_type& y) const; // (1)
codomain_type operator()(const domain_type& x) const; // (2)
With the first method, we have:
Function func;
// ...
func.f(x,y);
With the second method, we have:
Function func;
// ...
y=func(x);
I do not want to support the two approaches (-> such kind of no-choice is often a bad design decision as you have to support two (possibly incompatible) paradigms).
The latest case has a more familiar syntax, however the former case
has some other advantages. It transfers the responsibility/task of Y
object creation to the caller and it does not impose you useless
copies.
Let’s give an example. This example is a little far-fetched, but it
gives some illustrations. Imagine a non-copyiable, non-movable class
Y
, then you cannot define your function using (2):
struct Y
{
Y() = default;
Y(const Y&) = delete;
Y(Y&&) = delete;
void
set_value(double x)
{
data[10] = x;
};
std::array<double, 100> data
};
Y one_function(const double x) // compile-time error:
{ // use of deleted function
Y y; // ‘Y::Y(Y&&)’
y.set_value(x);
return y;
}
Even if it was possible you would have to copy the 100-double array at
each call (because of the set_value
method I don’t even think we can save
the baby with copy elision).
The approach using (1) does not suffer from these drawbacks:
void one_function(const double x, Y& y)
{
y.set_value(x);
}
compiles without problem and does not require useless copy.
By conclusion our interface for computing function values will be:
void
f(const domain_type& x, codomain_type& y) const
{
if (_f_counter) ++(*_f_counter);
(*_pimpl).f(x, y);
}
The Differentiable_Function
follows exactly the same scheme (see Annex
at the end of this post). It defines 3 main methods:
f(const Domain& x, Codomain& y)
df(const Domain& x, Differential& df)
f_df(const Domain& x, Codomain& y, Differential& df)
and add a df
evaluation counter.
When wrapping a function
void
Rosenbrock(const std::valarray<double>& x,
double* f,
std::valarray<double>* df,
double c)
{
assert(x.size() == 2);
assert(df == nullptr or df->size() == 2);
if (f)
{
*f = (1 - x[0]) * (1 - x[0]) +
c * (x[1] - x[0] * x[0]) * (x[1] - x[0] * x[0]);
}
if (df)
{
(*df)[0] = 2 * (-1 + x[0] + 2 * c * x[0] * x[0] * x[0] -
2 * c * x[0] * x[1]);
(*df)[1] = 2 * c * (x[1] - x[0] * x[0]);
}
}
// ...
Differentiable_Function wrapped(Rosenbrock,
10); // here c=10
std::valarray<double> x(2, 2);
std::valarray<double> df(2, 2);
double y;
wrapped.f_df(x, y, df);
we use the following convention:
- if
f
(ofdouble *f
) =nullptr
do not compute f value - if
df
(ofstd::valarray<double> *df
) =nullptr
do not compute df value
One can transform a Differentiable_Function
object into a Function
object, thank to the conversion operator:
operator Function<DOMAIN_TYPE, CODOMAIN_TYPE>() const
{
return {_pimpl, _f_counter};
}
During the conversion only a shallow copy is performed (thanks to the
use of the _pimp
shared pointer). The f_counter
is also shared to
track function evaluations performed from created Function
instance.
The user can also explicitly perform the conversion thanks to the
as_function()
method:
Function<DOMAIN_TYPE, CODOMAIN_TYPE>
as_function() const
{
return static_cast<Function<DOMAIN_TYPE, CODOMAIN_TYPE>>(*this);
}
This is a convenience method that, compared to a regular static_cast
,
avoids to explicitly define types:
auto f = differentiable_g.as_function();
// versus
auto f = static_cast<Function<DOMAIN_TYPE,
CODOMAIN_TYPE>>(differentiable_g);
In maths the gradient of a scalar function is the
In C++ to make this distinction explicit we would had had to define 2
different types. This is an overkill effort. The compromise is to
interpret the result of f_df(...)
or df(...)
call as a differential or
a gradient according to the context. This context is generally clear:
multidimensional nonlinear solvers (Newton, GMRES in its nonlinear
version…) use differentials, Optimization procedures that minimize a
scalar function use gradient vectors.
The code is available in the GitHub repository.
We solve $$ x^2-c = 0 $$ using two classical iterative methods:
- Newton’s method, requires a differentiable function
- Steffensen’s method, that only requires function values (and not its differential values)
#include "functions.hpp"
#include <iomanip>
#include <iostream>
template <typename T>
void
square_root(const T& x, T* f, T* df, T c)
{
if (f)
{
(*f) = x * x - c;
}
if (df)
{
(*df) = 2 * x;
}
}
template <typename T>
void
show_iteration(size_t iter, T x, T f)
{
constexpr auto max_digits =
std::numeric_limits<T>::max_digits10;
std::cerr << std::setw(4) << iter
<< " x = " << std::setw(max_digits + 5)
<< std::setprecision(max_digits) << x
<< " f = " << std::setw(max_digits + 5)
<< std::setprecision(max_digits) << f
<< std::endl;
}
template <typename T>
bool
Newton(const Differentiable_Function<T, T, T>& f_obj,
T& x,
double epsilon = 1e-10,
size_t max_iter = 20)
{
T f, df; // assumed to be default constructible
bool has_converged = false;
for (size_t iter = 1; iter <= max_iter; ++iter)
{
f_obj.f_df(x, f, df);
auto delta_x = -f / df;
has_converged = std::abs(delta_x) < epsilon;
x = x + delta_x;
show_iteration(iter, x, f);
if (has_converged) break;
}
return has_converged;
}
template <typename T>
bool
Steffensen(const Function<T, T>& f_obj,
T& x,
double epsilon = 1e-10,
size_t max_iter = 20)
{
T f, g; // assumed to be default constructible
bool has_converged = false;
for (size_t iter = 1; iter <= max_iter; ++iter)
{
f_obj.f(x, f);
f_obj.f(x + f, g);
auto delta_x = -f * f / (g - f);
has_converged = std::abs(delta_x) < epsilon;
x = x + delta_x;
show_iteration(iter, x, f);
if (has_converged) break;
}
return has_converged;
}
int
main()
{
Differentiable_Function f(square_root<double>, 2);
bool has_converged;
const double x_init = 2;
double x;
////////////////
std::cerr << std::endl << "Newton" << std::endl;
f.initialize_counter();
x = x_init;
has_converged = Newton(f, x);
std::cerr << "has converged: " << std::boolalpha
<< has_converged << std::endl;
std::cerr << "f counter: " << f.f_counter() << std::endl;
std::cerr << "df counter: " << f.df_counter()
<< std::endl;
////////////////
std::cerr << std::endl << "Steffensen" << std::endl;
f.initialize_counter();
x = x_init;
has_converged = Steffensen(f.as_function(), x);
std::cerr << "has converged: " << std::boolalpha
<< has_converged << std::endl;
std::cerr << "f counter: " << f.f_counter() << std::endl;
std::cerr << "df counter: " << f.df_counter()
<< std::endl;
}
prints:
Newton 1 x = 1.5 f = 2 2 x = 1.4166666666666667 f = 0.25 3 x = 1.4142156862745099 f = 0.0069444444444446418 4 x = 1.4142135623746899 f = 6.0073048828712672e-06 5 x = 1.4142135623730951 f = 4.510614104447086e-12 has converged: true f counter: 5 df counter: 5 Steffensen 1 x = 1.6666666666666667 f = 2 2 x = 1.4774774774774775 f = 0.77777777777777812 3 x = 1.4191773378054482 f = 0.18293969645320995 4 x = 1.414246675030719 f = 0.014064316140559363 5 x = 1.4142135638571252 f = 9.3657835444016513e-05 6 x = 1.4142135623730951 f = 4.1974708153702522e-09 7 x = 1.4142135623730949 f = 4.4408920985006262e-16 has converged: true f counter: 14 df counter: 0
This last example implements the Adam gradient method. It also use Optional Arguments to define its parameters.
#include "Adam.hpp"
using namespace Optimize;
void
Rosenbrock(const std::valarray<double>& x,
double* f,
std::valarray<double>* df,
double c)
{
assert(x.size() == 2);
assert(df == nullptr or df->size() == 2);
if (f)
{
*f = (1 - x[0]) * (1 - x[0]) +
c * (x[1] - x[0] * x[0]) * (x[1] - x[0] * x[0]);
}
if (df)
{
(*df)[0] = 2 * (-1 + x[0] + 2 * c * x[0] * x[0] * x[0] -
2 * c * x[0] * x[1]);
(*df)[1] = 2 * c * (x[1] - x[0] * x[0]);
}
}
int
main()
{
Differentiable_Function f(Rosenbrock, 10);
std::valarray<double> x(2, 2);
double y;
std::valarray<double> grad(2);
f.initialize_counter();
bool has_converged = Adam_optimize(
f,
x,
y,
grad,
_Adam_beta_1_ = 0.6,
_Adam_beta_2_ = 0.6,
_Adam_alpha_schedule_ = [](const size_t t) -> double {
return 1 / sqrt(t);
},
_absolute_epsilon_ = 0.01,
_verbose_ = true);
std::cerr << "has converged: " << std::boolalpha
<< has_converged << std::endl;
std::cerr << "f counter: " << f.f_counter() << std::endl;
std::cerr << "df counter: " << f.df_counter()
<< std::endl;
}
prints:
1 41 166.8652151 11 0.2439827225 6.328434411 21 0.09116840808 2.78026373 31 0.05612056479 2.428380552 41 0.1368252374 5.609748125 51 0.01053314125 0.717170352 61 0.05312840451 3.246977765 71 0.02220712686 2.08327324 81 0.004002723374 0.8795538422 91 0.0471959481 3.173767534 97 2.253224098e-05 0.006729458161 has converged: true f counter: 11 df counter: 97
template <typename DOMAIN_TYPE,
typename CODOMAIN_TYPE,
typename DIFFERENTIAL_TYPE>
class Differentiable_Function
{
public:
using function_type =
Function<DOMAIN_TYPE, CODOMAIN_TYPE>;
using domain_type = typename function_type::domain_type;
using codomain_type =
typename function_type::codomain_type;
using differential_type = DIFFERENTIAL_TYPE;
struct Diff_Interface : public function_type::Interface
{
virtual void f_df(
const domain_type& x,
codomain_type& y,
differential_type& differential) const = 0;
virtual void df(const domain_type& x,
differential_type& df) const = 0;
};
protected:
using pimpl_type = std::shared_ptr<const Diff_Interface>;
public:
pimpl_type _pimpl;
std::shared_ptr<size_t> _f_counter;
std::shared_ptr<size_t> _df_counter;
public:
operator function_type() const
{
return {_pimpl, _f_counter};
}
Differentiable_Function(
pimpl_type&& pimpl,
std::shared_ptr<size_t> f_counter = {},
std::shared_ptr<size_t> df_counter = {})
: _pimpl(std::move(pimpl)),
_f_counter{f_counter},
_df_counter{df_counter}
{
}
// lambda(domain,codomain*,differential*)
template <typename F,
std::enable_if_t<std::is_invocable_r_v<
void,
std::decay_t<F>,
DOMAIN_TYPE,
CODOMAIN_TYPE*,
DIFFERENTIAL_TYPE*>>* = nullptr>
Differentiable_Function(F&& f)
{
struct Impl final : public Diff_Interface
{
std::decay_t<F> _f;
Impl(F&& f) : _f(std::forward<F>(f)) {}
void
f(const DOMAIN_TYPE& x, CODOMAIN_TYPE& y) const
{
_f(x, &y, nullptr);
}
void
f_df(const DOMAIN_TYPE& x,
CODOMAIN_TYPE& y,
DIFFERENTIAL_TYPE& df) const
{
_f(x, &y, &df);
}
void
df(const DOMAIN_TYPE& x, DIFFERENTIAL_TYPE& df) const
{
_f(x, nullptr, &df);
}
};
_pimpl = std::make_shared<Impl>(std::forward<F>(f));
}
// void foo(const std::vector<double>& x, double* f, std::vector<double>* df, extra_args...)
template <typename... EXTRA_ARGS>
Differentiable_Function(
void(f)(const DOMAIN_TYPE&,
CODOMAIN_TYPE*,
DIFFERENTIAL_TYPE*,
EXTRA_ARGS...),
const Identity_t<EXTRA_ARGS>&... args)
: Differentiable_Function{[=](const DOMAIN_TYPE& x,
CODOMAIN_TYPE* y,
DIFFERENTIAL_TYPE* df) {
f(x, y, df, args...);
}}
{
}
function_type
as_function() const
{
return static_cast<function_type>(*this);
}
void
f(const domain_type& x, codomain_type& y) const
{
if (_f_counter) ++(*_f_counter);
(*_pimpl).f(x, y);
}
void
f_df(const domain_type& x,
codomain_type& y,
differential_type& df) const
{
if (_f_counter) ++(*_f_counter);
if (_df_counter) ++(*_df_counter);
(*_pimpl).f_df(x, y, df);
}
void
df(const domain_type& x, differential_type& df) const
{
if (_df_counter) ++(*_df_counter);
(*_pimpl).df(x, df);
}
void
initialize_counter()
{
_f_counter = std::make_shared<size_t>(0);
_df_counter = std::make_shared<size_t>(0);
}
size_t
f_counter() const
{
assert(_f_counter);
return *_f_counter;
}
size_t
df_counter() const
{
assert(_df_counter);
return *_df_counter;
}
};