Visualization of Objective Functions

library(vistool)
library(plotly)

The Visualizer1DObjective and Visualizer2DObjective classes visualize 1 and 2-dimensional objective functions. The package contains predefined objective functions.

as.data.table(dict_objective)
#> Key: <key>
#>                        key               label  xdim limits_lower limits_upper
#>                     <char>              <char> <int>       <list>       <list>
#>  1:           TF_Gfunction           Gfunction    NA           NA           NA
#>  2:      TF_GoldsteinPrice      GoldsteinPrice     2          0,0          1,1
#>  3:   TF_GoldsteinPriceLog   GoldsteinPriceLog     2          0,0          1,1
#>  4:         TF_OTL_Circuit         OTL_Circuit     6           NA           NA
#>  5:          TF_RoosArnold          RoosArnold    NA           NA           NA
#>  6:              TF_ackley              ackley     2          0,0          1,1
#>  7:              TF_banana              banana     2          0,0          1,1
#>  8:               TF_beale               beale     2          0,0          1,1
#>  9:            TF_borehole            borehole     2          0,0      1.5,1.0
#> 10:              TF_branin              branin     2        -2,-2          3,3
#> 11:          TF_currin1991          currin1991     2          0,0          1,1
#> 12:               TF_easom               easom     2          0,0          1,1
#> 13:              TF_franke              franke     2    -0.5,-0.5          1,1
#> 14:           TF_gaussian1           gaussian1    NA           NA           NA
#> 15:            TF_griewank            griewank    NA           NA           NA
#> 16:            TF_hartmann            hartmann     6           NA           NA
#> 17:                TF_hump                hump     2          0,0          1,1
#> 18:                TF_levy                levy    NA           NA           NA
#> 19: TF_linkletter_nosignal linkletter_nosignal    NA           NA           NA
#> 20:         TF_michalewicz         michalewicz    NA           NA           NA
#> 21:              TF_piston              piston     7           NA           NA
#> 22:              TF_powsin              powsin    NA           NA           NA
#> 23:          TF_quad_peaks          quad_peaks     2          0,0          1,1
#> 24:    TF_quad_peaks_slant    quad_peaks_slant     2          0,0          1,1
#> 25:           TF_rastrigin           rastrigin    NA           NA           NA
#> 26:            TF_robotarm            robotarm     8           NA           NA
#> 27:            TF_sinumoid            sinumoid     2          0,0          1,1
#> 28:             TF_sqrtsin             sqrtsin    NA           NA           NA
#> 29:           TF_waterfall           waterfall     2          0,0          1,1
#> 30:          TF_wingweight          wingweight    10           NA           NA
#> 31:            TF_zhou1998            zhou1998     2          0,0          1,1
#>                        key               label  xdim limits_lower limits_upper

To get an objective function from the dictionary, use the obj() function.

obj_branin = obj("TF_branin")

We can evaluate the objective function at a point.

x = c(0.9, 1)
obj_branin$eval(x)
#> [1] 178.3166

The gradient and Hessian at point x can be extracted.

obj_branin$grad(x)
#> [1] -354.3258  395.8377
obj_branin$hess(x)
#>            [,1]      [,2]
#> [1,] -1989.5197 -284.2171
#> [2,]  -284.2171 2162.1162

We create a Visualizer2DObjective to visualize the objective function.

viz = as_visualizer(obj_branin)

The objective can be visualized with a surface

viz$init_layer_surface()
viz$plot()

and contour plot.

viz$init_layer_contour()
viz$plot()

Custom Objectives

Let’s define a loss for a linear model on the iris data with target Sepal.Width and feature Petal.Width. First, an Objective requires a function for evaluation:

# Define the linear model loss function as SSE:
l2norm = function(x) sqrt(sum(crossprod(x)))

mylm = function(x, Xmat, y) {
  l2norm(y - Xmat %*% x)
}

To fix the loss for the data, the Ojbective$new() call allows to pass custom arguments that are stored and reused in every call to $eval() to evaluate fun. So, calling $eval(x) internally calls fun(x, ...). These arguments must be specified just once:

# Use the iris dataset with response `Sepal.Width` and feature `Petal.Width`:
Xmat = model.matrix(~ Petal.Width, data = iris)
y = iris$Sepal.Width

# Create a new object:
obj_lm = Objective$new(id = "iris LM", fun = mylm, xdim = 2,  Xmat = Xmat, y = y, minimize = TRUE)

obj_lm$evalStore(c(1, 2))
obj_lm$evalStore(c(2, 3))
obj_lm$evalStore(coef(lm(Sepal.Width ~ Petal.Width, data = iris)))

obj_lm$archive
#>                        x      fval                      grad        gnorm
#>                   <list>     <num>                    <list>        <num>
#> 1:                   1,2 21.553654        2.375467,11.722838 1.196109e+01
#> 2:                   2,3 43.410022        8.779078,16.929270 1.907020e+01
#> 3:  3.3084256,-0.2093598  4.951004 4.832272e-07,2.664535e-07 5.518206e-07

Visualize lm Objective:

viz_lm = as_visualizer(obj_lm, x1_limits = c(-0.5, 5), x2_limits = c(-3.2, 2.8))
viz_lm$plot()

More advanced (see More advanced) is to add points to the plotly object:

x = sapply(obj_lm$archive$x, function(x) x[1])
y = sapply(obj_lm$archive$x, function(x) x[2])

viz_lm$plot() %>% add_trace(x = x, y = y, z = obj_lm$archive$fval, type = "scatter3d", mode = "markers")

Optimizer

The optimizer class defines the optimization strategy and is initialized by taking an objective function, start value, and learning rate. Available optimizer are:

Gradient descent with OptimizerGD
Momentum with OptimizerMomentum
Nesterovs momentum with OptimizerNAG

Creating an optimizer is done by (let’s use an x value that works well):

obj_banana = obj("TF_banana")
opt = OptimizerGD$new(obj_banana, x_start = c(0.8, 0.6), lr = 0.01)

With these value set, optimization is done by calling $optimize() with the number of steps as argument:

opt$optimize(10L)
#> TF_banana: Batch 1 step 1: f(x) = 0.1572, x = c(0.7352, 0.5778)
#> TF_banana: Batch 1 step 2: f(x) = 0.4955, x = c(0.7888, 0.6174)
#> TF_banana: Batch 1 step 3: f(x) = 0.2118, x = c(0.7296, 0.5977)
#> TF_banana: Batch 1 step 4: f(x) = 0.3741, x = c(0.792, 0.6455)
#> TF_banana: Batch 1 step 5: f(x) = 0.069, x = c(0.6932, 0.602)
#> TF_banana: Batch 1 step 6: f(x) = 0.2809, x = c(0.7189, 0.6245)
#> TF_banana: Batch 1 step 7: f(x) = 0.2565, x = c(0.7877, 0.6806)
#> TF_banana: Batch 1 step 8: f(x) = 0.1753, x = c(0.6922, 0.6351)
#> TF_banana: Batch 1 step 9: f(x) = 0.6265, x = c(0.7418, 0.6799)
#> TF_banana: Batch 1 step 10: f(x) = 0.5908, x = c(0.717, 0.6795)

Calling $optimize() also writes into the archive of the optimizer and also calls $evalStore() of the objective. Therefore, $optimize() writes into two archives:

opt$archive
#>                   x_out                x_in                      update
#>                  <list>              <list>                      <list>
#>  1: 0.7351819,0.5778020             0.8,0.6     -0.06481810,-0.02219798
#>  2: 0.7888135,0.6173630 0.7351819,0.5778020       0.05363157,0.03956093
#>  3: 0.7296113,0.5977212 0.7888135,0.6173630     -0.05920220,-0.01964177
#>  4: 0.7920259,0.6454892 0.7296113,0.5977212       0.06241458,0.04776799
#>  5: 0.6931781,0.6019537 0.7920259,0.6454892     -0.09884780,-0.04353545
#>  6: 0.7189369,0.6245137 0.6931781,0.6019537       0.02575889,0.02256001
#>  7: 0.7876975,0.6806080 0.7189369,0.6245137       0.06876058,0.05609425
#>  8: 0.6921665,0.6351359 0.7876975,0.6806080     -0.09553100,-0.04547204
#>  9: 0.7417730,0.6798539 0.6921665,0.6351359       0.04960652,0.04471794
#> 10: 0.7170136,0.6795146 0.7417730,0.6798539 -0.0247594837,-0.0003392663
#>       fval_out    fval_in    lr step_size
#>          <num>      <num> <num>     <num>
#>  1: 0.15716889 0.46245779  0.01         1
#>  2: 0.49548040 0.15716889  0.01         1
#>  3: 0.21179976 0.49548040  0.01         1
#>  4: 0.37414391 0.21179976  0.01         1
#>  5: 0.06900854 0.37414391  0.01         1
#>  6: 0.28085919 0.06900854  0.01         1
#>  7: 0.25645358 0.28085919  0.01         1
#>  8: 0.17532134 0.25645358  0.01         1
#>  9: 0.62647891 0.17532134  0.01         1
#> 10: 0.59083776 0.62647891  0.01         1
#>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     objective
#>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <list>
#>  1: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#>  2: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#>  3: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#>  4: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#>  5: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#>  6: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#>  7: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#>  8: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#>  9: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#> 10: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#>     momentum  step batch
#>        <num> <int> <num>
#>  1:        0     1     1
#>  2:        0     2     1
#>  3:        0     3     1
#>  4:        0     4     1
#>  5:        0     5     1
#>  6:        0     6     1
#>  7:        0     7     1
#>  8:        0     8     1
#>  9:        0     9     1
#> 10:        0    10     1
opt$objective$archive
#>                       x       fval                    grad     gnorm
#>                  <list>      <num>                  <list>     <num>
#>  1:             0.8,0.6 0.46245779     -6.481810,-2.219798  6.851377
#>  2: 0.7351819,0.5778020 0.15716889       5.363157,3.956093  6.664393
#>  3: 0.7888135,0.6173630 0.49548040     -5.920220,-1.964177  6.237547
#>  4: 0.7296113,0.5977212 0.21179976       6.241458,4.776799  7.859618
#>  5: 0.7920259,0.6454892 0.37414391     -9.884780,-4.353545 10.801029
#>  6: 0.6931781,0.6019537 0.06900854       2.575889,2.256001  3.424142
#>  7: 0.7189369,0.6245137 0.28085919       6.876058,5.609425  8.873884
#>  8: 0.7876975,0.6806080 0.25645358     -9.553100,-4.547204 10.580113
#>  9: 0.6921665,0.6351359 0.17532134       4.960652,4.471794  6.678699
#> 10: 0.7417730,0.6798539 0.62647891 -2.47594837,-0.03392663  2.476181

We can let the algorithm run for another 10 iterations in a second batch:

opt$optimize(10L)
#> TF_banana: Batch 2 step 1: f(x) = 0.2592, x = c(0.7638, 0.728)
#> TF_banana: Batch 2 step 2: f(x) = 0.2873, x = c(0.6702, 0.679)
#> TF_banana: Batch 2 step 3: f(x) = 0.5391, x = c(0.7292, 0.7404)
#> TF_banana: Batch 2 step 4: f(x) = 0.1765, x = c(0.6349, 0.6896)
#> TF_banana: Batch 2 step 5: f(x) = 0.6858, x = c(0.6718, 0.7368)
#> TF_banana: Batch 2 step 6: f(x) = 0.401, x = c(0.713, 0.7914)
#> TF_banana: Batch 2 step 7: f(x) = 0.3109, x = c(0.6133, 0.7283)
#> TF_banana: Batch 2 step 8: f(x) = 0.8142, x = c(0.657, 0.7963)
#> TF_banana: Batch 2 step 9: f(x) = 0.7474, x = c(0.6208, 0.7806)
#> TF_banana: Batch 2 step 10: f(x) = 0.5501, x = c(0.6575, 0.8468)

Still not very satisfying.

Visualize Optimization Traces

A layer of the Visualizer class is $add_optimization_trace() that gets the optimizer as argument and adds the optimization trace to the plot:

viz = as_visualizer(obj_banana)
viz$add_optimization_trace(opt)
viz$plot()

Step size control

When calling $optimize(), the second argument is stepSizeControl that allows to expand or compress the update added to the old value of $x$ . For example, for GD with $x_{\text{new}} = x_{\text{old}} + lr * \Delta_f(x_{\text{old}})$ the update $u = lr * \Delta_f(x_{\text{old}})$ is multiplied with the return value of stepSizeControl(). There are a few pre-implemented control functions like line search or various decaying methods:

stepSizeControlLineSearch(lower, upper): Conduct a line search for $a$ in $x_{\text{new}} = x_{\text{old}} + a * lr * \Delta_f(x_{\text{old}})$ `.
stepSizeControlDecayTime(decay): Lower the updates by $(1 + decay * iteration)^{-1}$ .
stepSizeControlDecayExp(decay): Lower the updates by $exp(-decay * iteration)$ .
stepSizeControlDecayLinear(iter_zero): Lower the updates until iter_zero is reached. Updates with iter > iter_zero are 0.
stepSizeControlDecaySteps(drop_rate, every_iter): Lower the updates every_iter by drop_rate.

Note that these functions return a function that contains a function with the required signature:

stepSizeControlDecayTime()
#> function (x, u, obj, opt) 
#> {
#>     assertStepSizeControl(x, u, obj, opt)
#>     epoch = nrow(obj$archive)
#>     return(1/(1 + decay * epoch))
#> }
#> <bytecode: 0x55ddd176fa70>
#> <environment: 0x55ddd176f060>

Let’s define multiple gradient descent optimizers and optimize 100 steps with a step size control:

x0 = c(0.8, 0.6)
lr = 0.01
obj_banana = obj("TF_banana")

oo1 = OptimizerGD$new(obj_banana, x_start = x0, lr = lr, id = "GD without LR Control", print_trace = FALSE)
oo2 = OptimizerGD$new(obj_banana, x_start = x0, lr = lr, id = "GD with Line Search", print_trace = FALSE)
oo3 = OptimizerGD$new(obj_banana, x_start = x0, lr = lr, id = "GD with Time Decay", print_trace = FALSE)
oo4 = OptimizerGD$new(obj_banana, x_start = x0, lr = lr, id = "GD with Exp Decay", print_trace = FALSE)
oo5 = OptimizerGD$new(obj_banana, x_start = x0, lr = lr, id = "GD with Linear Decay", print_trace = FALSE)
oo6 = OptimizerGD$new(obj_banana, x_start = x0, lr = lr, id = "GD with Step Decay", print_trace = FALSE)

oo1$optimize(steps = 100)
oo2$optimize(steps = 100, stepSizeControlLineSearch())
oo3$optimize(steps = 100, stepSizeControlDecayTime())
oo4$optimize(steps = 100, stepSizeControlDecayExp())
oo5$optimize(steps = 100, stepSizeControlDecayLinear())
oo6$optimize(steps = 100, stepSizeControlDecaySteps())

For now we don’t know how well it worked. Let’s collect all archives with mergeOptimArchives() and visualize the step sizes and function values with patchwork magic:

arx = mergeOptimArchives(oo1, oo2, oo3, oo4, oo5, oo6)

library(patchwork)
gg1 = ggplot(arx, aes(x = iteration, y = step_size, color = optim_id))
gg2 = ggplot(arx, aes(x = iteration, y = fval_out, color = optim_id))

(gg1 + ggtitle("Step sizes") |
 gg1 + ylim(0, 1) + ggtitle("Step sizes (zoomed)") |
 gg2 + ggtitle("Objective")) +
  plot_layout(guides = "collect") &
  geom_line() &
  theme_minimal() &
  theme(legend.position = "bottom") &
  ggsci::scale_color_simpsons()

Visualizing the traces is done as before by adding optimization trace layer. We can do this for all optimizers to add multiple traces to the plot (colors are picked randomly, see the Visualizer section for more details about plotting):

viz = as_visualizer(obj_banana)

viz$add_optimization_trace(oo1)
viz$add_optimization_trace(oo2)
viz$add_optimization_trace(oo3)
viz$add_optimization_trace(oo4)
viz$add_optimization_trace(oo5)
viz$add_optimization_trace(oo6)

viz$plot()

Practically, it should be no issue to also combine multiple control functions. The important thing is to keep the signature of the function by allowing the function to get the arguments x (current value), u (current update), obj (Objective object), and opt (Optimizer object):

myStepSizeControl = function(x, u, obj, opt) {
  sc1 = stepSizeControlLineSearch(0, 10)
  sc2 = stepSizeControlDecayTime(0.1)
  return(sc1(x, u, obj, opt) * sc2(x, u, obj, opt))
}

my_oo = OptimizerGD$new(obj_banana, x_start = x0, lr = lr, id = "GD without LR Control", print_trace = FALSE)
my_oo$optimize(100, myStepSizeControl)
tail(my_oo$archive)
#>                  x_out                x_in                      update fval_out
#>                 <list>              <list>                      <list>    <num>
#> 1: 0.5000029,0.8666667 0.5000032,0.8666667 -5.170309e-07, 1.110223e-10        1
#> 2: 0.5000026,0.8666667 0.5000029,0.8666667   -4.67848e-07, 0.00000e+00        1
#> 3: 0.5000024,0.8666667 0.5000026,0.8666667 -4.238832e-07,-1.110223e-10        1
#> 4: 0.5000022,0.8666667 0.5000024,0.8666667 -3.843592e-07, 1.110223e-10        1
#> 5: 0.5000020,0.8666667 0.5000022,0.8666667 -3.487211e-07, 0.000000e+00        1
#> 6: 0.5000018,0.8666667 0.5000020,0.8666667 -3.168577e-07, 0.000000e+00        1
#>    fval_in    lr step_size
#>      <num> <num>     <num>
#> 1:       1  0.01 0.5942504
#> 2:       1  0.01 0.5888878
#> 3:       1  0.01 0.5829326
#> 4:       1  0.01 0.5774413
#> 5:       1  0.01 0.5722183
#> 6:       1  0.01 0.5667869
#>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    objective
#>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       <list>
#> 1: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#> 2: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#> 3: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#> 4: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#> 5: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#> 6: <Objective>\n  Public:\n    addLogFun: function (l, label) \n    archive: active binding\n    assertX: function (x, ...) \n    clearArchive: function () \n    clone: function (deep = FALSE) \n    eval: function (x) \n    evalStore: function (x) \n    grad: function (x) \n    hess: function (x) \n    id: TF_banana\n    initialize: function (id, fun, label = "f", xdim, limits_lower = NA, limits_upper = NA, \n    label: banana\n    limits_lower: 0 0\n    limits_upper: 1 1\n    log_funs: active binding\n    minimize: FALSE\n    xdim: active binding\n  Private:\n    p_archive: data.table, data.frame\n    p_fargs: list\n    p_fun: function (x, scale_it = T, scale_low = c(-20, -10), scale_high = c(20, \n    p_gradient: NULL\n    p_gradientFallback: function (x, ...) \n    p_hessian: NULL\n    p_hessianFallback: function (x, ...) \n    p_log_funs: list\n    p_xdim: 2\n    p_xtest: 0 0
#>    momentum  step batch
#>       <num> <int> <num>
#> 1:        0    95     1
#> 2:        0    96     1
#> 3:        0    97     1
#> 4:        0    98     1
#> 5:        0    99     1
#> 6:        0   100     1

Visualizer

Visualizer class

The Visualizer class is initialized by calling as_visualizer(obj) for a given Objective. Furhter arguments are x1limits and x2limits as well as padding to stretch the limits by a factor and npoints to specify the number of generated point per dimension, hence npoints^2 points are evaluated to create the initial layers.

Initial layers

An initial layer is always required to which other layers (such as optimization traces) are added step by step. The two available base layers are contour lines $init_layer_contour() and $init_layer_surface().

For both, the first argument tis the opacity and the second a colorscale:

viz = as_visualizer(obj("TF_franke"))
viz$init_layer_contour(opacity = 1, colorscale = list(c(0, 1), c("rgb(176,196,222)", "rgb(160,82,45)")))
viz$plot()

viz = as_visualizer(obj("TF_franke"))
viz$init_layer_contour(colorscale = list(c(0, 1), c("white", "blue")))
viz$plot()

viz = as_visualizer(obj("TF_franke"))
viz$init_layer_surface(opacity = 1, colorscale = list(c(0, 1), c("white", "black")))
viz$plot()

Internally, add_trace() from plotly is called. Further arguments are directly passed to it, e.g. by adding contour lines:

viz = as_visualizer(obj("TF_franke"))

# Add 10 grid lines per dim, limits (sometimes) can be obtained form the
# objective:
llower = viz$objective$limits_lower
lupper = viz$objective$limits_upper
ssize = (lupper - llower) / 10

viz$init_layer_surface(opacity = 1, colorscale = list(c(0, 1), c("white", "black")),
  contours = list(
    x = list(show = TRUE, start = llower[1], end = lupper[1], size = ssize[1], color = "black"),
    y = list(show = TRUE, start = llower[2], end = lupper[2], size = ssize[2], color = "black")))
viz$plot()

Optimization traces

As shown previously, optimization traces can be added by $add_optimization_trace. Let’s optimize our custom linear model objective for the three available optimizers:

# Reset the archive to have an empty objective:
obj_lm$clearArchive()

oo1 = OptimizerGD$new(obj_lm, x_start = c(0, -0.05), lr = 0.001, print_trace = FALSE)
oo2 = OptimizerMomentum$new(obj_lm, x_start = c(-0.05, 0), lr = 0.001, print_trace = FALSE)
oo3 = OptimizerNAG$new(obj_lm, x_start = c(0, 0), lr = 0.001, print_trace = FALSE)

oo1$optimize(steps = 100)
oo2$optimize(steps = 100)
oo3$optimize(steps = 100)

viz = as_visualizer(obj_lm, x1_limits = c(-0.5, 5), x2_limits = c(-3.2, 2.8))

viz$init_layer_contour()

viz$add_optimization_trace(oo1, add_marker_at = round(seq(1, 100, len = 10L)))
viz$add_optimization_trace(oo2, add_marker_at = c(1, 50, 90), marker_shape = c("square", "star-triangle-down", "cross"))
viz$add_optimization_trace(oo3, add_marker_at = 100, marker_shape = "star")

viz$plot()

Setting the layout and scene (TODO)

viz$set_layout(legend = list(orientation = "h", xanchor = "center", x = 0.5))
viz$plot()

Overlaying layers (TODO)

obj = obj("TF_banana")
viz = as_visualizer(obj)
viz$init_layer_surface()

x0 = c(0.85, 0.47)
viz$add_layer_taylor(x0, npoints_per_dim = 5, degree = 1, x1margin = 0.3, x2margin = 0.3, contours = list(
    x = list(show = TRUE, start = 0, end = 1, size = 0.03, color = "black"),
    y = list(show = TRUE, start = 0, end = 1, size = 0.03, color = "black")))
viz$add_layer_hessian(x0)
viz$plot()

Manual layers (TODO)

obj = obj("TF_banana")
viz = as_visualizer(obj)
viz$init_layer_surface(opacity = 0.5)
p = viz$plot()
class(p)
#> [1] "plotly"     "htmlwidget"

nsim = 100
grid = data.frame(x = runif(nsim), y = runif(nsim))
grid$z = apply(grid, 1, viz$objective$eval) + rnorm(nsim, sd = 0.05)
p %>% add_trace(data = grid, x = ~x, y = ~y, z = ~z, mode = "markers",
  type = "scatter3d", marker = list(symbol = "cross"))

# List of marker symbols:
head(schema(F)$traces$scatter3d$attributes$marker$symbol$values)
#> [1] "circle"       "circle-open"  "cross"        "diamond"      "diamond-open"
#> [6] "square"