Skip to content
Snippets Groups Projects
Commit 9831f292 authored by Ruben Heinrich's avatar Ruben Heinrich
Browse files

[feature] closes #19 , percentages mode for ensembles added

parent 8a97cd8c
No related branches found
No related tags found
1 merge request!910 chains
......@@ -16,13 +16,17 @@ OGS6_Ensemble <- R6::R6Class(
#' vector of values. Note that the second elements of the sublists must
#' have the same length.
#'@param sequential_mode flag: Defaults to `FALSE`
#'@param percentages_mode flag: Defaults to `TRUE`
#'@importFrom foreach %dopar%
public = list(
initialize = function(ogs6_obj,
parameters,
sequential_mode = FALSE) {
sequential_mode = FALSE,
percentages_mode = TRUE) {
assertthat::assert_that(inherits(ogs6_obj, "OGS6"))
assertthat::assert_that(assertthat::is.flag(sequential_mode))
assertthat::assert_that(assertthat::is.flag(percentages_mode))
private$.ens_path <- ogs6_obj$sim_path
ogs6_obj$sim_path <- paste0(ogs6_obj$sim_path, ogs6_obj$sim_name)
......@@ -41,7 +45,6 @@ OGS6_Ensemble <- R6::R6Class(
})
assertthat::assert_that(is.list(parameters))
assertthat::assert_that(assertthat::is.flag(sequential_mode))
# If not in sequential mode, value vectors must have same length
if(!sequential_mode){
......@@ -54,9 +57,15 @@ OGS6_Ensemble <- R6::R6Class(
assertthat::assert_that(!any(names(parameters) == ""))
}
private$.parameter_values <- lapply(parameters, function(x){
x[[2]]
})
second_elements <- lapply(parameters, function(x){x[[2]]})
if(percentages_mode){
private$.parameter_percs <- second_elements
private$calc_values_by_percs(ogs6_obj)
names(private$.parameter_values) <- names(parameters)
}else{
private$.parameter_values <- second_elements
}
private$make_ensemble(ogs6_obj$clone(deep = TRUE),
sequential_mode)
......@@ -164,6 +173,12 @@ OGS6_Ensemble <- R6::R6Class(
private$.dp_parameters
},
#'@field parameter_percs
#'Getter for private parameter '.parameter_percs'
parameter_percs = function() {
private$.parameter_percs
},
#'@field parameter_values
#'Getter for private parameter '.parameter_values'
parameter_values = function() {
......@@ -185,15 +200,32 @@ OGS6_Ensemble <- R6::R6Class(
private = list(
# Calculates values based on given percentages
calc_values_by_percs = function(ogs6_obj){
for(i in seq_len(length(private$.parameter_percs))){
print(self$dp_parameters[[i]])
val <- eval(parse(text = self$dp_parameters[[i]]))
val_vec <- lapply(private$.parameter_percs[[i]], function(x){
val + (val * (x / 100))
})
private$.parameter_values <- c(self$parameter_values,
list(val_vec))
}
},
#@description
#Creates the actual ensemble.
#@param ogs6_obj OGS6: Simulation object
#@param sequential_mode flag: Optional:
make_ensemble = function(ogs6_obj,
sequential_mode) {
sequential_mode,
percentages_mode) {
orig_sim_name <- ogs6_obj$sim_name
orig_sim_path <- ogs6_obj$sim_path
parameter_values <- self$parameter_values
# If sequential, put parameters behind each other
......@@ -280,6 +312,7 @@ OGS6_Ensemble <- R6::R6Class(
.ens_path = NULL,
.ensemble = list(),
.dp_parameters = list(),
.parameter_percs = list(),
.parameter_values = list()
)
)
......
......@@ -89,8 +89,6 @@ run_simulation <- function(ogs6_obj,
args = ogs6_args)
}
closeAllConnections()
# Read in generated .pvd file and add it to ogs6_obj
pvd_paths <- list.files(ogs6_obj$sim_path,
"\\.pvd$",
......
......@@ -20,17 +20,46 @@ test_that("OGS6_Ensemble initialization works", {
value = 4
))
ogs6_ens <- OGS6_Ensemble$new(
# Test with sequential_mode and percentages_mode off
ogs6_ens_noseq_noper <- OGS6_Ensemble$new(
ogs6_obj = ogs6_obj,
parameters = list(list(ogs6_obj$parameters[[1]]$value, c(2, 3)),
list(ogs6_obj$parameters[[2]]$value, c(5, 6)))
list(ogs6_obj$parameters[[2]]$value, c(5, 6))),
sequential_mode = FALSE,
percentages_mode = FALSE
)
expect_equal(length(ogs6_ens$ensemble), 2)
expect_equal(ogs6_ens$ensemble[[1]]$parameters[[1]]$value, 2)
expect_equal(ogs6_ens$ensemble[[2]]$parameters[[1]]$value, 3)
expect_equal(ogs6_ens$ensemble[[1]]$parameters[[2]]$value, 5)
expect_equal(ogs6_ens$ensemble[[2]]$parameters[[2]]$value, 6)
expect_equal(length(ogs6_ens_noseq_noper$ensemble), 2)
expect_equal(ogs6_ens_noseq_noper$ensemble[[1]]$parameters[[1]]$value, 2)
expect_equal(ogs6_ens_noseq_noper$ensemble[[2]]$parameters[[1]]$value, 3)
expect_equal(ogs6_ens_noseq_noper$ensemble[[1]]$parameters[[2]]$value, 5)
expect_equal(ogs6_ens_noseq_noper$ensemble[[2]]$parameters[[2]]$value, 6)
# Test with sequential_mode on and percentages_mode off
ogs6_ens_seq_noper <- OGS6_Ensemble$new(
ogs6_obj = ogs6_obj,
parameters = list(a = list(ogs6_obj$parameters[[1]]$value, c(2, 3)),
b = list(ogs6_obj$parameters[[2]]$value, c(5, 6))),
sequential_mode = TRUE,
percentages_mode = FALSE
)
expect_equal(length(ogs6_ens_seq_noper$ensemble), 4)
expect_equal(ogs6_ens_seq_noper$ensemble[[4]]$parameters[[2]]$value, 6)
# Test with sequential_mode off and percentages_mode on
ogs6_ens_noseq_per <- OGS6_Ensemble$new(
ogs6_obj = ogs6_obj,
parameters = list(list(ogs6_obj$parameters[[1]]$value, c(50, -75)),
list(ogs6_obj$parameters[[2]]$value, c(50, -75))),
sequential_mode = FALSE,
percentages_mode = TRUE
)
expect_equal(length(ogs6_ens_noseq_per$ensemble), 2)
expect_equal(ogs6_ens_noseq_per$ensemble[[1]]$parameters[[1]]$value, 1.5)
expect_equal(ogs6_ens_noseq_per$ensemble[[2]]$parameters[[2]]$value, 1)
})
......
---
title: "Ensemble Guide"
title: "r2ogs6 Ensemble Guide"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Ensemble Guide}
%\VignetteIndexEntry{r2ogs6 Ensemble Guide}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r, include = FALSE}
knitr::opts_chunk$set(
fig.width = 6,
fig.align = "center",
collapse = TRUE,
comment = "#>"
comment = "#>",
message = FALSE
)
devtools::load_all(".")
......@@ -18,6 +21,7 @@ devtools::load_all(".")
```{r setup}
library(r2ogs6)
library(ggplot2)
library(dplyr)
```
......@@ -65,63 +69,31 @@ prj_path <- system.file("examples/Theis_problem/benchmark_files/",
read_in_prj(ogs6_obj, prj_path)
```
We already know *which* parameters we want to change between the different simulations. But before we can create an ensemble, we need to know what their respective *values* should be. Say we don't want to hardcode each value, but instead examine the effects of changing our parameters by 1%, 10% and 50%. Below I've supplied a little function that simply takes a value and creates a tibble based on the given `deviations`.
```{r}
# This returns a df so we can keep the data together for plotting
get_values <- function(value,
deviations = c(-0.5, -0.1, -0.01, 0, 0.01, 0.1, 0.5),
zero_to_one = FALSE){
values_and_devs <- tibble::tibble(value = numeric(),
dev = numeric())
for(i in seq_len(length(deviations))){
if(zero_to_one &&
!dplyr::between((value + (value * deviations[[i]])), 0, 1)){
next
}
values_and_devs <-
add_row(values_and_devs,
value = value + (value * deviations[[i]]),
dev = deviations[[i]])
}
return(invisible(values_and_devs))
}
### Single-parameter sequential run
# demo
paste(get_values(100)$value, collapse = " ")
```
We can now use this function for a minimal working example. Let's set up an ensemble with only the `storage` parameter. First we create the corresponding vector...
Let's create a small ensemble where we only alter the value of `storage`. Say we don't want to hardcode the values, but instead examine the effects of changing `storage` by 1%, 10% and 50%. We can use the `percentages_mode` parameter of `OGS6_Ensemble` for this. It already defaults to `TRUE`, below I'm merely being explicit for demonstration purposes.
```{r}
# Create the tibble based on the original value
storage <- get_values(ogs6_obj$media[[1]]$properties[[4]]$value)
paste(storage$value, collapse = " ")
```
... then we pass the original reference as well as the vector to an ensemble object.
# Assign percentages to a variable in case we want to reuse them later
percentages <- c(-50, -10, -1, 0, 1, 10, 50)
```{r}
# First we define an ensemble object
# Define an ensemble object
ogs6_ens <-
OGS6_Ensemble$new(
ogs6_obj = ogs6_obj,
parameters = list(list(ogs6_obj$media[[1]]$properties[[4]]$value,
storage$value)))
percentages)),
percentages_mode = TRUE)
# Now we run the simulations
exit_codes <- ogs6_ens$run_simulation()
paste(exit_codes, collapse = " ")
```
### Plot results
#### Single simulation
#### Plot results for single simulation
Our simulations (hopefully) ran successfully - great! Now it'd be nice to see some results. Say we're interested in the `pressure` data of the first simulation.
Our simulations (hopefully) ran successfully - great! Now it'd be nice to see some results. Say we're interested in the `pressure` data.
```{r}
# This will get a list of tibbles - one for each simulation.
......@@ -132,7 +104,7 @@ storage_tbls <-
})
```
You may leave out the `point_ids` parameter. It defaults to all points in the dataset - I specify it here because there's about 500 which would slow down building this vignette significantly.
You may leave out the `point_ids` parameter. It defaults to all points in the dataset - I specify it here because there's about 500 which would slow down building this vignette.
```{r}
# Let's look at the first 10 rows of the dataset for the first simulation.
......@@ -141,9 +113,8 @@ head(storage_tbls[[1]], 10)
You can see there's one row per timestep for each point ID. Say we want to plot the *average* pressure for each `timestep`, that is, the mean of the `pressure` values where the rows have the same `timestep`.
```{r}
# Get average pressure from first dataframe
# Get average pressure from first dataframe (= from first simulation)
avg_pr_df <- storage_tbls[[1]] %>%
group_by(timestep) %>%
summarise(avg_pressure = mean(pressure))
......@@ -159,29 +130,30 @@ ggplot(data = avg_pr_df) +
```
#### Multiple simulations
#### Plot results for multiple simulations
```{r}
# Get list of dataframes that each look like avg_pr_df
avg_pr_dfs <- list()
# Prepare dataframes for plotting
reformatted_dfs <- list()
for(i in seq_len(length(storage_tbls))){
old_df <- storage_tbls[[i]]
new_df <- old_df %>%
group_by(timestep) %>%
tbl <- storage_tbls[[i]] %>%
group_by(timestep) %>%
summarise(avg_pressure = mean(pressure))
# Add column so we can separate the values based on their old df
new_df$df_id <- rep(i, nrow(new_df))
# Add column so we can separate values based on their old df
tbl$df_id <- rep(i, nrow(tbl))
# Add column so we can plot based on old df rownames
new_df$orig_rwns <- as.numeric(row.names(new_df))
tbl$orig_rwns <- as.numeric(row.names(tbl))
avg_pr_dfs <- c(avg_pr_dfs, list(new_df))
reformatted_dfs <- c(reformatted_dfs, list(tbl))
}
# Combine dataframes into single dataframe for plot
avg_pr_combined_df <- bind_rows(avg_pr_dfs)
avg_pr_combined_df <- bind_rows(reformatted_dfs)
# Make plot
ggplot(avg_pr_combined_df,
......@@ -207,20 +179,11 @@ ggplot(avg_pr_combined_df, aes(x = orig_rwns,
xlab("Timestep")
```
### Multi-parameter sequential run
#### Matrix plot
So far we've only considered the `storage` parameter. Now we want to have a look at how the other two parameters influence the simulation. We will use our `get_values` function again and keep the default deviations.
```{r}
permeability <- get_values(ogs6_obj$media[[1]]$properties[[1]]$value)
# porosity will have length 4 because its value range is between 0 and 1
porosity <- get_values(ogs6_obj$media[[1]]$properties[[3]]$value,
zero_to_one = TRUE)
```
So far we've only considered the `storage` parameter. Now we want to have a look at how the other two parameters influence the simulation, so let's put them into an ensemble together. For `permeability` we can reuse the `percentages` we already have. For `porosity`, this doesn't work - the original value is 1 and its values can only range between 0 and 1, so we'll supply a shorter vector.
Now we'll put them into an ensemble together. This time it's important we set the `OGS6_Ensemble` parameter `sequential_mode` to `TRUE` as this will change the supplied parameters *sequentially* which means in the end we have 19 (7 + 7 + 4) simulations which is equal to the sum of elements in the value vectors you supply (I've named them `values` below for clarity, naming them is optional though).
This time it's important we set the `OGS6_Ensemble` parameter `sequential_mode` to `TRUE` as this will change the supplied parameters *sequentially* which means in the end we have 18 (7 + 7 + 4) simulations which is equal to the sum of elements in the value vectors you supply (I've named them `values` below for clarity, naming them is optional though).
The default `FALSE` would give an error message because our value vectors do not have the same length and even if they had, it wouldn't do what we want - the number of simulations would equal the length of *one* value vector (thus requiring them to be of the same length). Generally, set `sequential_mode` to `TRUE` if you want to examine the influence of parameters on a simulation *independently*. If you want to examine how the parameters influence *each other* as in wanting to test parameter combinations, the default mode is the way to go.
......@@ -235,11 +198,11 @@ ogs6_ens_big <-
OGS6_Ensemble$new(
ogs6_obj = ogs6_obj,
parameters = list(per = list(ogs6_obj$media[[1]]$properties[[1]]$value,
values = permeability$value),
values = percentages),
por = list(ogs6_obj$media[[1]]$properties[[3]]$value,
values = porosity$value),
c(-50, -10, -1, 0)),
sto = list(ogs6_obj$media[[1]]$properties[[4]]$value,
values = storage$value)),
values = percentages)),
sequential_mode = TRUE)
exit_codes <- ogs6_ens_big$run_simulation()
......@@ -257,12 +220,9 @@ per_por_sto_tbls <-
})
```
Plotting time? Not quite. We can't really differentiate what values were used where yet. Since we set `sequential_mode` to `TRUE`, `OGS_Ensemble` allows us to utilize `relevant_parameter_at()` to fix this. It looks up which value vector the parameter we changed for each simulation came from. We will also put in an extra column `dev` so we can group by deviations.
Plotting time? Not quite. We can't really differentiate what values were used where yet. Since we set `sequential_mode` to `TRUE`, `OGS_Ensemble` allows us to utilize `relevant_parameter_at()` to fix this. It looks up which value vector the parameter we changed for each simulation came from. We will also put in an extra column `percs` so we can group by percentages.
```{r}
# Combine deviation columns
deviations_comb <- c(permeability$dev, porosity$dev, storage$dev)
# Get list of dataframes
x_pr_dfs <- list()
......@@ -271,7 +231,7 @@ for(i in seq_len(length(per_por_sto_tbls))){
# Add name and deviation column
new_df$name <- ogs6_ens_big$relevant_parameter_at(i)
new_df$dev <- deviations_comb[[i]]
new_df$percs <- unlist(ogs6_ens_big$parameter_percs)[[i]]
x_pr_dfs <- c(x_pr_dfs, list(new_df))
}
......@@ -287,12 +247,13 @@ x_pr_combined_df <- bind_rows(x_pr_dfs)
ggplot(x_pr_combined_df,
aes(x = x,
y = pressure,
group = dev)) +
geom_point(aes(color = as.factor(dev))) +
group = percs)) +
geom_point(aes(color = as.factor(percs))) +
xlab("x point coordinate") +
labs(color = "%") +
facet_grid(cols = vars(name))
```
Ta-Daa! All nicely grouped. We can see `permeability` has the most influence on the pressure. Though they may seem suspicious, `porosity` and `storage` are being plotted correctly - the points are just being placed right on top of each other. Since `porosity` can't go over the value `1` which was the original value, our value vector only went from -50% to 0% which is why the line colors of `porosity` and `storage` differ.
......@@ -311,3 +272,7 @@ setup
plot single
```{r, include = FALSE}
# Cleanup created folders
do.call(file.remove, list(list.files(testdir_path, full.names = TRUE)))
```
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment