Commit ea5c6d18 authored by phit0's avatar phit0
Browse files

[BO] cal_simulation_error part 1

parent 3733e9ce
......@@ -447,6 +447,8 @@ BO <- R6::R6Class("BO",
dp_parameter = character(),
param_min = numeric(),
param_max = numeric())
private$.scale_fun <- function(x) return(x)
private$.unscale_fun <- function(x) return(x)
},
add_parameter = function(...) {
assertthat::assert_that(...length() == 1,
......@@ -480,65 +482,83 @@ BO <- R6::R6Class("BO",
#' \code{\link[lhs]{randomLHS}} and then transformed (if desired) to the
#' respective range via \code{\link[stats]{qunif}} and the minimum- and maximum
#' values specified in `calibration_set`.
#'
#' @param calibration_set *tibble* with columns *file_ext*, *mkey, skey, spec*,
#' *min*, and *max*. Best created via [cal_create_calibration_set()].
#' @param n_samples *integer* for the number of samples for each parameter
#' @param interval_01 *logical* should the sample be from the (0,1) interval?
#' @param scale_which *character* that identifies the parameters in
#' `calibration_set` that should be scaled. Default is *NULL*, then all parameters
#' will be scaled according to *scale_fun*.
#' @param scale_fun *function* that allows sampling from a scaled distribution, e.g.
#' if the values are on a *log* scale. Default is `I()` i.e. no transformation.
#' @param unscale_fun *function* inverse of the previous function to transform
#' parameters back after sampling. Default is `I()` as well.
#'
#' @return *tibble* with as many rows as parameters, 6 columns for parameter keys
#' and the remaining `n_samples` columns containing sampled parameter values.
#' @export
#'
#' @examples \dontrun{
#' calibration_set <- cal_create_calibration_set(
#' c("mmp$MEDIUM_PROPERTIES1$PERMEABILITY_TENSOR", "ISOTROPIC", 1.0e-4, 1.0e-2),
#' c("mmp$MEDIUM_PROPERTIES2$PERMEABILITY_TENSOR", "ISOTROPIC", 1.0e-9, 1.0e-4),
#' c("mmp$MEDIUM_PROPERTIES3$PERMEABILITY_TENSOR", "ISOTROPIC", 1.0e-7, 1.0e-3),
#' c("mmp$MEDIUM_PROPERTIES4$PERMEABILITY_TENSOR", "ISOTROPIC", 1.0e-7, 1.0e-3)
#' )
#'
#' # sample starting parameters from calibration set
#' init <- cal_sample_parameters(calibration_set,
#' n_samples = 4,
#' interval_01 = FALSE,
#' scale_fun = log10,
#' unscale_fun = function(x) 10**x,
#')
cal_sample_parameters = function(calibration_set,
n_samples = d * 3,
interval_01 = FALSE,
scale_which = NULL,
scale_fun = I,
unscale_fun = I) {
d <- nrow(calibration_set)
cal_sample_parameters = function(n_samples, interval_01 = FALSE) {
d <- nrow(self$cal_set)
smpl <- t(lhs::randomLHS(n = n_samples, d)) %>%
'colnames<-' (paste0("sample_", 1:n_samples))
smpl <- tibble::as_tibble(cbind(calibration_set, smpl))
smpl <- tibble::as_tibble(cbind(self$cal_set, smpl))
if (interval_01) {
return(smpl)
private$.par_df <- smpl
} else {
return(from01(smpl,
scale_which = scale_which,
scale_fun = scale_fun,
unscale_fun = unscale_fun))
private$.par_df <- private$from01(smpl)
}
},
cal_simulation_error = function(ensemble_path = NULL,
ensemble_cores,
ensemble_name
) {
# general purpose function to sequentially call ogs5 runs
# takes value(s) to evaluate by ogs, runs a simulation and calls
# f(), that has to be user defined and with simulation results
# the data frame theta should contain the columns file_ext, mkey, skey.
#TODO: add input sanity checks
# === change input parameters ===
n_samples <- ncol(private$.par_df[-private$nv])
# check if ensemble
if (n_samples > 1) {
ens1 <- private$cal_change_parameters(n_samples)
# run ensemble and get output
#error <- rep(NA, n_samples)
ens1$run_simulation(parallel = TRUE)
lapply(ens1$ensemble, ogs6_read_output_files)
# compute objective function metric and residuals
resids = vector("list", n_samples)
error <- rep(NA, n_samples)
for (i in seq(n_samples)) {
output <- objective_function(ens1$ensemble[[i]], exp_data)
error[[i]] <- output[[1]]
resids[[i]] <- output[[2]]
}
} else {
private$cal_change_parameters(n_samples)
e <- ogs6_run_simulation(ogs6_obj,
write_logfile = private$.log_output)
# === read output ===
tryCatch(
ogs6_read_output_files(ogs6_obj),
error = {
# if error occurs, wait and try again!
Sys.sleep(1)
ogs6_read_output_files(ogs6_obj)
}
)
# compute error
output <- private$.objective_function(ogs6_obj, exp_data)
error <- output[[1]]
resids <- list(output[[2]])
}
return(list(error, resids))
},
run = function(max_it, verbose) {
# function that contains the main loop
# BO "updates" will be performed untli max_it is reached.
# will perform one "BO update" using all the helper functions
# evaluate ogs simulation and calculate objective function
self$cal_simulation_error()
}
......@@ -596,6 +616,29 @@ BO <- R6::R6Class("BO",
private$.objective_function <- value
}
self
},
scale_fun = function(value) {
if (missing(value)) {
return(private$.scale_fun)
} else {
assertthat::assert_that(is.function(value))
private$.scale_fun <- value
}
},
unscale_fun = function(value) {
if (missing(value)) {
return(private$.unscale_fun)
} else {
assertthat::assert_that(is.function(value))
private$.unscale_fun <- value
}
},
par_df = function(value) { # field for read only
if (missing(value)) {
return(private$.par_df)
} else {
cat("This field can only be created with BO$ca_sample_parameters")
}
}
),
......@@ -612,7 +655,11 @@ BO <- R6::R6Class("BO",
residuals_df = NULL,
.kappa = NULL, # no default can be added(?)
.objective_function = NULL,
par_df = NULL,
.par_df = NULL,
.scale_fun = NULL,
.unscale_fun = NULL,
.log_output = FALSE,
nv = c(1:4),
#' @field to01
#' Transform to unit interval
......@@ -628,27 +675,24 @@ BO <- R6::R6Class("BO",
#' in the unit interval
#'
#' @examples
to01 = function(par_df, scale_which = NULL,
scale_fun = I, unscale_fun = I) {
to01 = function(par_df, scale_which = NULL) {
# converts to unit interval
for (k in seq_len(nrow(par_df))) {
scf <- private$.scale_fun
if(!is.null(scale_which)) {
# skip parameters that should not be scaled
if (!any(stringr::str_detect(par_df[k, c(1:3)],
scale_which))) {
scale_fun <- I
unscale_fun <- I
if (!any(stringr::str_detect(par_df[k, -private$nv], scale_which))) {
scf <- I
}}
par_df[k, -c(1:3)] <- par_df[k, -c(1:3)] %>%
par_df[k, -private$nv] <-
par_df[k, -private$nv] %>%
unlist %>%
scale_fun %>% # e.g. log10()
stats::punif(min = scale_fun(par_df[[k, "param_min"]]),
max = scale_fun(par_df[[k, "param_max"]])) %>%
scf %>% # e.g. log10()
stats::punif(min = scf(par_df[[k, "param_min"]]),
max = scf(par_df[[k, "param_max"]])) %>%
t # tell tibble this is a row
}
return(par_df)
invisible(par_df)
},
#' @field from01
#' Transform from unit interval
......@@ -664,26 +708,25 @@ BO <- R6::R6Class("BO",
#' in the actual parameter space
#'
#' @examples
from01 = function(par_df, scale_which = NULL,
scale_fun = I, unscale_fun = I) {
from01 = function(par_df, scale_which = NULL) {
# converts from unit interval
for (k in seq_len(nrow(par_df))) {
scf <- private$.scale_fun
uscf <- private$.unscale_fun
if(!is.null(scale_which)) {
# skip parameters that should not be scaled
if (!any(stringr::str_detect(par_df[k, c(1:3)], scale_which))) {
scale_fun <- I
unscale_fun <- I
if (!any(stringr::str_detect(par_df[k, -private$nv], scale_which))) {
scf <- I
uscf <- I
}}
par_df[k, -c(1:3)] <- par_df[k, -c(1:3)] %>%
par_df[k, -private$nv] <- par_df[k, -private$nv] %>%
unlist %>%
stats::qunif(min = scale_fun(par_df[[k, "param_min"]]),
max = scale_fun(par_df[[k, "param_max"]])) %>%
unscale_fun %>% # e.g. 10**x
stats::qunif(min = scf(par_df[[k, "param_min"]]),
max = scf(par_df[[k, "param_max"]])) %>%
uscf %>% # e.g. 10**x
t # tell tibble this is a row
}
return(par_df)
invisible(par_df)
},
#' @field cal_change_parameters
......@@ -701,34 +744,28 @@ BO <- R6::R6Class("BO",
#' @export
#'
#' @examples
cal_change_parameters = function(par_df,
ensemble_path = NULL,
ensemble_name = NULL,
quiet) {
n_samples <- ncol(par_df) - 3
# check if ensemble
is_ens <- n_samples > 1
cal_change_parameters = function(n_samples) {
# transfrom par_df to list that can be read by OGS6 and OGS6_Ensemble
par_list = apply(par_df, 1,
par_list <- apply(private$.par_df, 1,
function(x){
list(x[[1]], as.numeric(x[4:length(par_df)]))
list(x[[1]],
as.numeric(x[-private$nv]))
})
if (is_ens) {
# check if ensemble
if (n_samples > 1) {
# create ensemble ogs6 objects with sampled parameters
ens1 <- OGS6_Ensemble$new(
ogs6_obj = self$ogs6_obj,
parameters = par_list)
if (!quiet) {
message(paste("Preparing ensemble run of", ncol(par_df) - 6))
}
# if (!quiet) {
message(paste("Preparing ensemble run of", n_samples))
# }
} else { # change parameters of existing ogs6_obj
ogs6_obj$update_component(cmpts=par_list)
self$ogs6_obj$update_component(cmpts = par_list)
}
if (is_ens) {
if (n_samples > 1) {
return(ens1)
} else {
return(self)
......@@ -741,67 +778,6 @@ BO <- R6::R6Class("BO",
#' Transform to unit interval
#'
#' @param par_df *tibble* such as explained in [cal_bayesOpt()] with original values
#' @param scale_which *character* (optional) that identifies the parameters to be scaled
#' @param scale_fun *function* (optional) to scale the values, e.g. `log10()`
#' @param unscale_fun *function* (optional) inverse of `scale_fun`, e.g. `10**x``
#'
#' @return *tibble* with parameter specification and transformed values in the unit interval
#'
#' @examples
to01 <- function(par_df, scale_which = NULL, scale_fun = I, unscale_fun = I) {
# converts to unit interval
for (k in seq_len(nrow(par_df))) {
if(!is.null(scale_which)) {
# skip parameters that should not be scaled
if (!any(stringr::str_detect(par_df[k, c(1:3)], scale_which))) {
scale_fun <- I
unscale_fun <- I
}}
par_df[k, -c(1:3)] <- par_df[k, -c(1:3)] %>%
unlist %>%
scale_fun %>% # e.g. log10()
stats::punif(min = scale_fun(par_df[[k, "param_min"]]),
max = scale_fun(par_df[[k, "param_max"]])) %>%
t # tell tibble this is a row
}
return(par_df)
}
#' Transform from unit interval
#'
#' @param par_df *tibble* such as explained in [cal_bayesOpt()] with values in the unit interval
#' @param scale_which *character* (optional) that identifies the parameters to be scaled
#' @param scale_fun *function* (optional) to scale the values, e.g. `log10()`
#' @param unscale_fun *function* (optional) inverse of `scale_fun`, e.g. `10**x``
#'
#' @return *tibble* with parameter specification and transformed values in the actual parameter space
#'
#' @examples
from01 <- function(par_df, scale_which = NULL, scale_fun = I, unscale_fun = I) {
# converts from unit interval
for (k in seq_len(nrow(par_df))) {
if(!is.null(scale_which)) {
# skip parameters that should not be scaled
if (!any(stringr::str_detect(par_df[k, c(1:3)], scale_which))) {
scale_fun <- I
unscale_fun <- I
}}
par_df[k, -c(1:3)] <- par_df[k, -c(1:3)] %>%
unlist %>%
stats::qunif(min = scale_fun(par_df[[k, "param_min"]]),
max = scale_fun(par_df[[k, "param_max"]])) %>%
unscale_fun %>% # e.g. 10**x
t # tell tibble this is a row
}
return(par_df)
}
select_diff <- function(xmin_sampled) {
sel_i <- 1
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment