From 9831f2923af1f30d564d39da708200c22c9a7d0c Mon Sep 17 00:00:00 2001 From: aheinri5 <Anna@netzkritzler.de> Date: Sat, 6 Feb 2021 21:46:04 +0100 Subject: [PATCH] [feature] closes #19 , percentages mode for ensembles added --- R/ogs6_ensemble.R | 49 +++++++-- R/sim_utils.R | 2 - tests/testthat/test-ogs6_ensemble.R | 43 ++++++-- vignettes/ensemble_workflow_vignette.Rmd | 125 ++++++++--------------- 4 files changed, 122 insertions(+), 97 deletions(-) diff --git a/R/ogs6_ensemble.R b/R/ogs6_ensemble.R index a3279c3..a2077ba 100644 --- a/R/ogs6_ensemble.R +++ b/R/ogs6_ensemble.R @@ -16,13 +16,17 @@ OGS6_Ensemble <- R6::R6Class( #' vector of values. Note that the second elements of the sublists must #' have the same length. #'@param sequential_mode flag: Defaults to `FALSE` + #'@param percentages_mode flag: Defaults to `TRUE` #'@importFrom foreach %dopar% public = list( initialize = function(ogs6_obj, parameters, - sequential_mode = FALSE) { + sequential_mode = FALSE, + percentages_mode = TRUE) { assertthat::assert_that(inherits(ogs6_obj, "OGS6")) + assertthat::assert_that(assertthat::is.flag(sequential_mode)) + assertthat::assert_that(assertthat::is.flag(percentages_mode)) private$.ens_path <- ogs6_obj$sim_path ogs6_obj$sim_path <- paste0(ogs6_obj$sim_path, ogs6_obj$sim_name) @@ -41,7 +45,6 @@ OGS6_Ensemble <- R6::R6Class( }) assertthat::assert_that(is.list(parameters)) - assertthat::assert_that(assertthat::is.flag(sequential_mode)) # If not in sequential mode, value vectors must have same length if(!sequential_mode){ @@ -54,9 +57,15 @@ OGS6_Ensemble <- R6::R6Class( assertthat::assert_that(!any(names(parameters) == "")) } - private$.parameter_values <- lapply(parameters, function(x){ - x[[2]] - }) + second_elements <- lapply(parameters, function(x){x[[2]]}) + + if(percentages_mode){ + private$.parameter_percs <- second_elements + private$calc_values_by_percs(ogs6_obj) + names(private$.parameter_values) <- names(parameters) + }else{ + private$.parameter_values <- second_elements + } private$make_ensemble(ogs6_obj$clone(deep = TRUE), sequential_mode) @@ -164,6 +173,12 @@ OGS6_Ensemble <- R6::R6Class( private$.dp_parameters }, + #'@field parameter_percs + #'Getter for private parameter '.parameter_percs' + parameter_percs = function() { + private$.parameter_percs + }, + #'@field parameter_values #'Getter for private parameter '.parameter_values' parameter_values = function() { @@ -185,15 +200,32 @@ OGS6_Ensemble <- R6::R6Class( private = list( + # Calculates values based on given percentages + calc_values_by_percs = function(ogs6_obj){ + + for(i in seq_len(length(private$.parameter_percs))){ + + print(self$dp_parameters[[i]]) + val <- eval(parse(text = self$dp_parameters[[i]])) + + val_vec <- lapply(private$.parameter_percs[[i]], function(x){ + val + (val * (x / 100)) + }) + + private$.parameter_values <- c(self$parameter_values, + list(val_vec)) + } + }, + #@description #Creates the actual ensemble. - #@param ogs6_obj OGS6: Simulation object - #@param sequential_mode flag: Optional: make_ensemble = function(ogs6_obj, - sequential_mode) { + sequential_mode, + percentages_mode) { orig_sim_name <- ogs6_obj$sim_name orig_sim_path <- ogs6_obj$sim_path + parameter_values <- self$parameter_values # If sequential, put parameters behind each other @@ -280,6 +312,7 @@ OGS6_Ensemble <- R6::R6Class( .ens_path = NULL, .ensemble = list(), .dp_parameters = list(), + .parameter_percs = list(), .parameter_values = list() ) ) diff --git a/R/sim_utils.R b/R/sim_utils.R index 3047ccd..268ea68 100644 --- a/R/sim_utils.R +++ b/R/sim_utils.R @@ -89,8 +89,6 @@ run_simulation <- function(ogs6_obj, args = ogs6_args) } - closeAllConnections() - # Read in generated .pvd file and add it to ogs6_obj pvd_paths <- list.files(ogs6_obj$sim_path, "\\.pvd$", diff --git a/tests/testthat/test-ogs6_ensemble.R b/tests/testthat/test-ogs6_ensemble.R index 857b0ca..9c90c51 100644 --- a/tests/testthat/test-ogs6_ensemble.R +++ b/tests/testthat/test-ogs6_ensemble.R @@ -20,17 +20,46 @@ test_that("OGS6_Ensemble initialization works", { value = 4 )) - ogs6_ens <- OGS6_Ensemble$new( + # Test with sequential_mode and percentages_mode off + ogs6_ens_noseq_noper <- OGS6_Ensemble$new( ogs6_obj = ogs6_obj, parameters = list(list(ogs6_obj$parameters[[1]]$value, c(2, 3)), - list(ogs6_obj$parameters[[2]]$value, c(5, 6))) + list(ogs6_obj$parameters[[2]]$value, c(5, 6))), + sequential_mode = FALSE, + percentages_mode = FALSE ) - expect_equal(length(ogs6_ens$ensemble), 2) - expect_equal(ogs6_ens$ensemble[[1]]$parameters[[1]]$value, 2) - expect_equal(ogs6_ens$ensemble[[2]]$parameters[[1]]$value, 3) - expect_equal(ogs6_ens$ensemble[[1]]$parameters[[2]]$value, 5) - expect_equal(ogs6_ens$ensemble[[2]]$parameters[[2]]$value, 6) + expect_equal(length(ogs6_ens_noseq_noper$ensemble), 2) + expect_equal(ogs6_ens_noseq_noper$ensemble[[1]]$parameters[[1]]$value, 2) + expect_equal(ogs6_ens_noseq_noper$ensemble[[2]]$parameters[[1]]$value, 3) + expect_equal(ogs6_ens_noseq_noper$ensemble[[1]]$parameters[[2]]$value, 5) + expect_equal(ogs6_ens_noseq_noper$ensemble[[2]]$parameters[[2]]$value, 6) + + # Test with sequential_mode on and percentages_mode off + ogs6_ens_seq_noper <- OGS6_Ensemble$new( + ogs6_obj = ogs6_obj, + parameters = list(a = list(ogs6_obj$parameters[[1]]$value, c(2, 3)), + b = list(ogs6_obj$parameters[[2]]$value, c(5, 6))), + sequential_mode = TRUE, + percentages_mode = FALSE + ) + + expect_equal(length(ogs6_ens_seq_noper$ensemble), 4) + expect_equal(ogs6_ens_seq_noper$ensemble[[4]]$parameters[[2]]$value, 6) + + + # Test with sequential_mode off and percentages_mode on + ogs6_ens_noseq_per <- OGS6_Ensemble$new( + ogs6_obj = ogs6_obj, + parameters = list(list(ogs6_obj$parameters[[1]]$value, c(50, -75)), + list(ogs6_obj$parameters[[2]]$value, c(50, -75))), + sequential_mode = FALSE, + percentages_mode = TRUE + ) + + expect_equal(length(ogs6_ens_noseq_per$ensemble), 2) + expect_equal(ogs6_ens_noseq_per$ensemble[[1]]$parameters[[1]]$value, 1.5) + expect_equal(ogs6_ens_noseq_per$ensemble[[2]]$parameters[[2]]$value, 1) }) diff --git a/vignettes/ensemble_workflow_vignette.Rmd b/vignettes/ensemble_workflow_vignette.Rmd index f012fa7..35c664b 100644 --- a/vignettes/ensemble_workflow_vignette.Rmd +++ b/vignettes/ensemble_workflow_vignette.Rmd @@ -1,16 +1,19 @@ --- -title: "Ensemble Guide" +title: "r2ogs6 Ensemble Guide" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Ensemble Guide} + %\VignetteIndexEntry{r2ogs6 Ensemble Guide} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( + fig.width = 6, + fig.align = "center", collapse = TRUE, - comment = "#>" + comment = "#>", + message = FALSE ) devtools::load_all(".") @@ -18,6 +21,7 @@ devtools::load_all(".") ```{r setup} library(r2ogs6) +library(ggplot2) library(dplyr) ``` @@ -65,63 +69,31 @@ prj_path <- system.file("examples/Theis_problem/benchmark_files/", read_in_prj(ogs6_obj, prj_path) ``` -We already know *which* parameters we want to change between the different simulations. But before we can create an ensemble, we need to know what their respective *values* should be. Say we don't want to hardcode each value, but instead examine the effects of changing our parameters by 1%, 10% and 50%. Below I've supplied a little function that simply takes a value and creates a tibble based on the given `deviations`. -```{r} -# This returns a df so we can keep the data together for plotting -get_values <- function(value, - deviations = c(-0.5, -0.1, -0.01, 0, 0.01, 0.1, 0.5), - zero_to_one = FALSE){ - - values_and_devs <- tibble::tibble(value = numeric(), - dev = numeric()) - - for(i in seq_len(length(deviations))){ - if(zero_to_one && - !dplyr::between((value + (value * deviations[[i]])), 0, 1)){ - next - } - - values_and_devs <- - add_row(values_and_devs, - value = value + (value * deviations[[i]]), - dev = deviations[[i]]) - } - - return(invisible(values_and_devs)) -} +### Single-parameter sequential run -# demo -paste(get_values(100)$value, collapse = " ") -``` -We can now use this function for a minimal working example. Let's set up an ensemble with only the `storage` parameter. First we create the corresponding vector... +Let's create a small ensemble where we only alter the value of `storage`. Say we don't want to hardcode the values, but instead examine the effects of changing `storage` by 1%, 10% and 50%. We can use the `percentages_mode` parameter of `OGS6_Ensemble` for this. It already defaults to `TRUE`, below I'm merely being explicit for demonstration purposes. ```{r} -# Create the tibble based on the original value -storage <- get_values(ogs6_obj$media[[1]]$properties[[4]]$value) - -paste(storage$value, collapse = " ") -``` -... then we pass the original reference as well as the vector to an ensemble object. +# Assign percentages to a variable in case we want to reuse them later +percentages <- c(-50, -10, -1, 0, 1, 10, 50) -```{r} -# First we define an ensemble object +# Define an ensemble object ogs6_ens <- OGS6_Ensemble$new( ogs6_obj = ogs6_obj, parameters = list(list(ogs6_obj$media[[1]]$properties[[4]]$value, - storage$value))) + percentages)), + percentages_mode = TRUE) # Now we run the simulations exit_codes <- ogs6_ens$run_simulation() paste(exit_codes, collapse = " ") ``` - -### Plot results -#### Single simulation +#### Plot results for single simulation -Our simulations (hopefully) ran successfully - great! Now it'd be nice to see some results. Say we're interested in the `pressure` data of the first simulation. +Our simulations (hopefully) ran successfully - great! Now it'd be nice to see some results. Say we're interested in the `pressure` data. ```{r} # This will get a list of tibbles - one for each simulation. @@ -132,7 +104,7 @@ storage_tbls <- }) ``` -You may leave out the `point_ids` parameter. It defaults to all points in the dataset - I specify it here because there's about 500 which would slow down building this vignette significantly. +You may leave out the `point_ids` parameter. It defaults to all points in the dataset - I specify it here because there's about 500 which would slow down building this vignette. ```{r} # Let's look at the first 10 rows of the dataset for the first simulation. @@ -141,9 +113,8 @@ head(storage_tbls[[1]], 10) You can see there's one row per timestep for each point ID. Say we want to plot the *average* pressure for each `timestep`, that is, the mean of the `pressure` values where the rows have the same `timestep`. - ```{r} -# Get average pressure from first dataframe +# Get average pressure from first dataframe (= from first simulation) avg_pr_df <- storage_tbls[[1]] %>% group_by(timestep) %>% summarise(avg_pressure = mean(pressure)) @@ -159,29 +130,30 @@ ggplot(data = avg_pr_df) + ``` -#### Multiple simulations + +#### Plot results for multiple simulations ```{r} -# Get list of dataframes that each look like avg_pr_df -avg_pr_dfs <- list() +# Prepare dataframes for plotting +reformatted_dfs <- list() for(i in seq_len(length(storage_tbls))){ - old_df <- storage_tbls[[i]] - new_df <- old_df %>% - group_by(timestep) %>% + + tbl <- storage_tbls[[i]] %>% + group_by(timestep) %>% summarise(avg_pressure = mean(pressure)) - # Add column so we can separate the values based on their old df - new_df$df_id <- rep(i, nrow(new_df)) + # Add column so we can separate values based on their old df + tbl$df_id <- rep(i, nrow(tbl)) # Add column so we can plot based on old df rownames - new_df$orig_rwns <- as.numeric(row.names(new_df)) + tbl$orig_rwns <- as.numeric(row.names(tbl)) - avg_pr_dfs <- c(avg_pr_dfs, list(new_df)) + reformatted_dfs <- c(reformatted_dfs, list(tbl)) } # Combine dataframes into single dataframe for plot -avg_pr_combined_df <- bind_rows(avg_pr_dfs) +avg_pr_combined_df <- bind_rows(reformatted_dfs) # Make plot ggplot(avg_pr_combined_df, @@ -207,20 +179,11 @@ ggplot(avg_pr_combined_df, aes(x = orig_rwns, xlab("Timestep") ``` +### Multi-parameter sequential run -#### Matrix plot - -So far we've only considered the `storage` parameter. Now we want to have a look at how the other two parameters influence the simulation. We will use our `get_values` function again and keep the default deviations. - -```{r} -permeability <- get_values(ogs6_obj$media[[1]]$properties[[1]]$value) - -# porosity will have length 4 because its value range is between 0 and 1 -porosity <- get_values(ogs6_obj$media[[1]]$properties[[3]]$value, - zero_to_one = TRUE) -``` +So far we've only considered the `storage` parameter. Now we want to have a look at how the other two parameters influence the simulation, so let's put them into an ensemble together. For `permeability` we can reuse the `percentages` we already have. For `porosity`, this doesn't work - the original value is 1 and its values can only range between 0 and 1, so we'll supply a shorter vector. -Now we'll put them into an ensemble together. This time it's important we set the `OGS6_Ensemble` parameter `sequential_mode` to `TRUE` as this will change the supplied parameters *sequentially* which means in the end we have 19 (7 + 7 + 4) simulations which is equal to the sum of elements in the value vectors you supply (I've named them `values` below for clarity, naming them is optional though). +This time it's important we set the `OGS6_Ensemble` parameter `sequential_mode` to `TRUE` as this will change the supplied parameters *sequentially* which means in the end we have 18 (7 + 7 + 4) simulations which is equal to the sum of elements in the value vectors you supply (I've named them `values` below for clarity, naming them is optional though). The default `FALSE` would give an error message because our value vectors do not have the same length and even if they had, it wouldn't do what we want - the number of simulations would equal the length of *one* value vector (thus requiring them to be of the same length). Generally, set `sequential_mode` to `TRUE` if you want to examine the influence of parameters on a simulation *independently*. If you want to examine how the parameters influence *each other* as in wanting to test parameter combinations, the default mode is the way to go. @@ -235,11 +198,11 @@ ogs6_ens_big <- OGS6_Ensemble$new( ogs6_obj = ogs6_obj, parameters = list(per = list(ogs6_obj$media[[1]]$properties[[1]]$value, - values = permeability$value), + values = percentages), por = list(ogs6_obj$media[[1]]$properties[[3]]$value, - values = porosity$value), + c(-50, -10, -1, 0)), sto = list(ogs6_obj$media[[1]]$properties[[4]]$value, - values = storage$value)), + values = percentages)), sequential_mode = TRUE) exit_codes <- ogs6_ens_big$run_simulation() @@ -257,12 +220,9 @@ per_por_sto_tbls <- }) ``` -Plotting time? Not quite. We can't really differentiate what values were used where yet. Since we set `sequential_mode` to `TRUE`, `OGS_Ensemble` allows us to utilize `relevant_parameter_at()` to fix this. It looks up which value vector the parameter we changed for each simulation came from. We will also put in an extra column `dev` so we can group by deviations. +Plotting time? Not quite. We can't really differentiate what values were used where yet. Since we set `sequential_mode` to `TRUE`, `OGS_Ensemble` allows us to utilize `relevant_parameter_at()` to fix this. It looks up which value vector the parameter we changed for each simulation came from. We will also put in an extra column `percs` so we can group by percentages. ```{r} -# Combine deviation columns -deviations_comb <- c(permeability$dev, porosity$dev, storage$dev) - # Get list of dataframes x_pr_dfs <- list() @@ -271,7 +231,7 @@ for(i in seq_len(length(per_por_sto_tbls))){ # Add name and deviation column new_df$name <- ogs6_ens_big$relevant_parameter_at(i) - new_df$dev <- deviations_comb[[i]] + new_df$percs <- unlist(ogs6_ens_big$parameter_percs)[[i]] x_pr_dfs <- c(x_pr_dfs, list(new_df)) } @@ -287,12 +247,13 @@ x_pr_combined_df <- bind_rows(x_pr_dfs) ggplot(x_pr_combined_df, aes(x = x, y = pressure, - group = dev)) + - geom_point(aes(color = as.factor(dev))) + + group = percs)) + + geom_point(aes(color = as.factor(percs))) + xlab("x point coordinate") + labs(color = "%") + facet_grid(cols = vars(name)) ``` + Ta-Daa! All nicely grouped. We can see `permeability` has the most influence on the pressure. Though they may seem suspicious, `porosity` and `storage` are being plotted correctly - the points are just being placed right on top of each other. Since `porosity` can't go over the value `1` which was the original value, our value vector only went from -50% to 0% which is why the line colors of `porosity` and `storage` differ. @@ -311,3 +272,7 @@ setup plot single +```{r, include = FALSE} +# Cleanup created folders +do.call(file.remove, list(list.files(testdir_path, full.names = TRUE))) +``` \ No newline at end of file -- GitLab