[feature] closes #19 , percentages mode for ensembles added

9831f292 · Ruben Heinrich · 8a97cd8c · 9831f292 · 9831f292 · 9831f292
Commit 9831f292 authored 4 years ago by Ruben Heinrich
--- a/R/ogs6_ensemble.R
+++ b/R/ogs6_ensemble.R
@@ -16,13 +16,17 @@ OGS6_Ensemble <- R6::R6Class(
    #' vector of values. Note that the second elements of the sublists must
    #' have the same length.
    #'@param sequential_mode flag: Defaults to `FALSE`
+    #'@param percentages_mode flag: Defaults to `TRUE`
    #'@importFrom foreach %dopar%
    public = list(
        initialize = function(ogs6_obj,
                              parameters,
-                              sequential_mode = FALSE) {
+                              sequential_mode = FALSE,
+                              percentages_mode = TRUE) {

            assertthat::assert_that(inherits(ogs6_obj, "OGS6"))
+            assertthat::assert_that(assertthat::is.flag(sequential_mode))
+            assertthat::assert_that(assertthat::is.flag(percentages_mode))

            private$.ens_path <- ogs6_obj$sim_path
            ogs6_obj$sim_path <- paste0(ogs6_obj$sim_path, ogs6_obj$sim_name)
@@ -41,7 +45,6 @@ OGS6_Ensemble <- R6::R6Class(
            })

            assertthat::assert_that(is.list(parameters))
-            assertthat::assert_that(assertthat::is.flag(sequential_mode))

            # If not in sequential mode, value vectors must have same length
            if(!sequential_mode){
@@ -54,9 +57,15 @@ OGS6_Ensemble <- R6::R6Class(
                assertthat::assert_that(!any(names(parameters) == ""))
            }

-            private$.parameter_values <- lapply(parameters, function(x){
-                x[[2]]
-            })
+            second_elements <- lapply(parameters, function(x){x[[2]]})
+
+            if(percentages_mode){
+                private$.parameter_percs <- second_elements
+                private$calc_values_by_percs(ogs6_obj)
+                names(private$.parameter_values) <- names(parameters)
+            }else{
+                private$.parameter_values <- second_elements
+            }

            private$make_ensemble(ogs6_obj$clone(deep = TRUE),
                                  sequential_mode)
@@ -164,6 +173,12 @@ OGS6_Ensemble <- R6::R6Class(
            private$.dp_parameters
        },

+        #'@field parameter_percs
+        #'Getter for private parameter '.parameter_percs'
+        parameter_percs = function() {
+            private$.parameter_percs
+        },
+
        #'@field parameter_values
        #'Getter for private parameter '.parameter_values'
        parameter_values = function() {
@@ -185,15 +200,32 @@ OGS6_Ensemble <- R6::R6Class(

    private = list(

+        # Calculates values based on given percentages
+        calc_values_by_percs = function(ogs6_obj){
+
+            for(i in seq_len(length(private$.parameter_percs))){
+
+                print(self$dp_parameters[[i]])
+                val <- eval(parse(text = self$dp_parameters[[i]]))
+
+                val_vec <- lapply(private$.parameter_percs[[i]], function(x){
+                    val + (val * (x / 100))
+                    })
+
+                private$.parameter_values <- c(self$parameter_values,
+                                               list(val_vec))
+            }
+        },
+
        #@description
        #Creates the actual ensemble.
-        #@param ogs6_obj OGS6: Simulation object
-        #@param sequential_mode flag: Optional:
        make_ensemble = function(ogs6_obj,
-                                 sequential_mode) {
+                                 sequential_mode,
+                                 percentages_mode) {

            orig_sim_name <- ogs6_obj$sim_name
            orig_sim_path <- ogs6_obj$sim_path
+
            parameter_values <- self$parameter_values

            # If sequential, put parameters behind each other
@@ -280,6 +312,7 @@ OGS6_Ensemble <- R6::R6Class(
        .ens_path = NULL,
        .ensemble = list(),
        .dp_parameters = list(),
+        .parameter_percs = list(),
        .parameter_values = list()
    )
 )

--- a/R/sim_utils.R
+++ b/R/sim_utils.R
@@ -89,8 +89,6 @@ run_simulation <- function(ogs6_obj,
                             args = ogs6_args)
    }

-    closeAllConnections()
-
    # Read in generated .pvd file and add it to ogs6_obj
    pvd_paths <- list.files(ogs6_obj$sim_path,
                           "\\.pvd$",

--- a/tests/testthat/test-ogs6_ensemble.R
+++ b/tests/testthat/test-ogs6_ensemble.R
@@ -20,17 +20,46 @@ test_that("OGS6_Ensemble initialization works", {
        value = 4
    ))

-    ogs6_ens <- OGS6_Ensemble$new(
+    # Test with sequential_mode and percentages_mode off
+    ogs6_ens_noseq_noper <- OGS6_Ensemble$new(
        ogs6_obj = ogs6_obj,
        parameters = list(list(ogs6_obj$parameters[[1]]$value, c(2, 3)),
-                          list(ogs6_obj$parameters[[2]]$value, c(5, 6)))
+                          list(ogs6_obj$parameters[[2]]$value, c(5, 6))),
+        sequential_mode = FALSE,
+        percentages_mode = FALSE
    )

-    expect_equal(length(ogs6_ens$ensemble), 2)
-    expect_equal(ogs6_ens$ensemble[[1]]$parameters[[1]]$value, 2)
-    expect_equal(ogs6_ens$ensemble[[2]]$parameters[[1]]$value, 3)
-    expect_equal(ogs6_ens$ensemble[[1]]$parameters[[2]]$value, 5)
-    expect_equal(ogs6_ens$ensemble[[2]]$parameters[[2]]$value, 6)
+    expect_equal(length(ogs6_ens_noseq_noper$ensemble), 2)
+    expect_equal(ogs6_ens_noseq_noper$ensemble[[1]]$parameters[[1]]$value, 2)
+    expect_equal(ogs6_ens_noseq_noper$ensemble[[2]]$parameters[[1]]$value, 3)
+    expect_equal(ogs6_ens_noseq_noper$ensemble[[1]]$parameters[[2]]$value, 5)
+    expect_equal(ogs6_ens_noseq_noper$ensemble[[2]]$parameters[[2]]$value, 6)
+
+    # Test with sequential_mode on and percentages_mode off
+    ogs6_ens_seq_noper <- OGS6_Ensemble$new(
+        ogs6_obj = ogs6_obj,
+        parameters = list(a = list(ogs6_obj$parameters[[1]]$value, c(2, 3)),
+                          b = list(ogs6_obj$parameters[[2]]$value, c(5, 6))),
+        sequential_mode = TRUE,
+        percentages_mode = FALSE
+    )
+
+    expect_equal(length(ogs6_ens_seq_noper$ensemble), 4)
+    expect_equal(ogs6_ens_seq_noper$ensemble[[4]]$parameters[[2]]$value, 6)
+
+
+    # Test with sequential_mode off and percentages_mode on
+    ogs6_ens_noseq_per <- OGS6_Ensemble$new(
+        ogs6_obj = ogs6_obj,
+        parameters = list(list(ogs6_obj$parameters[[1]]$value, c(50, -75)),
+                          list(ogs6_obj$parameters[[2]]$value, c(50, -75))),
+        sequential_mode = FALSE,
+        percentages_mode = TRUE
+    )
+
+    expect_equal(length(ogs6_ens_noseq_per$ensemble), 2)
+    expect_equal(ogs6_ens_noseq_per$ensemble[[1]]$parameters[[1]]$value, 1.5)
+    expect_equal(ogs6_ens_noseq_per$ensemble[[2]]$parameters[[2]]$value, 1)
 })



--- a/vignettes/ensemble_workflow_vignette.Rmd
+++ b/vignettes/ensemble_workflow_vignette.Rmd
 ---
-title: "Ensemble Guide"
+title: "r2ogs6 Ensemble Guide"
 output: rmarkdown::html_vignette
 vignette: >
-  %\VignetteIndexEntry{Ensemble Guide}
+  %\VignetteIndexEntry{r2ogs6 Ensemble Guide}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
 ---

 ```{r, include = FALSE}
 knitr::opts_chunk$set(
+  fig.width = 6,
+  fig.align = "center",
  collapse = TRUE,
-  comment = "#>"
+  comment = "#>",
+  message = FALSE
 )

 devtools::load_all(".")
@@ -18,6 +21,7 @@ devtools::load_all(".")

 ```{r setup}
 library(r2ogs6)
+library(ggplot2)
 library(dplyr)
 ```

@@ -65,63 +69,31 @@ prj_path <- system.file("examples/Theis_problem/benchmark_files/",
 read_in_prj(ogs6_obj, prj_path)
 ```

-We already know *which* parameters we want to change between the different simulations. But before we can create an ensemble, we need to know what their respective *values* should be. Say we don't want to hardcode each value, but instead examine the effects of changing our parameters by 1%, 10% and 50%. Below I've supplied a little function that simply takes a value and creates a tibble based on the given `deviations`. 

-```{r}
-# This returns a df so we can keep the data together for plotting
-get_values <- function(value, 
-                       deviations = c(-0.5, -0.1, -0.01, 0, 0.01, 0.1, 0.5),
-                       zero_to_one = FALSE){
-  
-  values_and_devs <- tibble::tibble(value = numeric(),
-                                    dev = numeric())
-  
-  for(i in seq_len(length(deviations))){
-    if(zero_to_one &&
-       !dplyr::between((value + (value * deviations[[i]])), 0, 1)){
-      next
-    }
-    
-    values_and_devs <- 
-      add_row(values_and_devs,
-              value = value + (value * deviations[[i]]),
-              dev = deviations[[i]])
-  }
-  
-  return(invisible(values_and_devs))
-}
+### Single-parameter sequential run

-# demo
-paste(get_values(100)$value, collapse = " ")
-```
-We can now use this function for a minimal working example. Let's set up an ensemble with only the `storage` parameter. First we create the corresponding vector...
+Let's create a small ensemble where we only alter the value of `storage`. Say we don't want to hardcode the values, but instead examine the effects of changing `storage` by 1%, 10% and 50%. We can use the `percentages_mode` parameter of `OGS6_Ensemble` for this. It already defaults to `TRUE`, below I'm merely being explicit for demonstration purposes.

 ```{r}
-# Create the tibble based on the original value
-storage <- get_values(ogs6_obj$media[[1]]$properties[[4]]$value)
-
-paste(storage$value, collapse = " ")
-```
-... then we pass the original reference as well as the vector to an ensemble object.
+# Assign percentages to a variable in case we want to reuse them later
+percentages <- c(-50, -10, -1, 0, 1, 10, 50)

-```{r}
-# First we define an ensemble object
+# Define an ensemble object
 ogs6_ens <- 
  OGS6_Ensemble$new(
    ogs6_obj = ogs6_obj,
    parameters = list(list(ogs6_obj$media[[1]]$properties[[4]]$value,
-                           storage$value)))
+                           percentages)),
+    percentages_mode = TRUE)

 # Now we run the simulations
 exit_codes <- ogs6_ens$run_simulation()
 paste(exit_codes, collapse = " ")
 ```

- 
-### Plot results
-#### Single simulation
+#### Plot results for single simulation

-Our simulations (hopefully) ran successfully - great! Now it'd be nice to see some results. Say we're interested in the `pressure` data of the first simulation.
+Our simulations (hopefully) ran successfully - great! Now it'd be nice to see some results. Say we're interested in the `pressure` data.

 ```{r}
 # This will get a list of tibbles - one for each simulation.
@@ -132,7 +104,7 @@ storage_tbls <-
  })
 ```

-You may leave out the `point_ids` parameter. It defaults to all points in the dataset - I specify it here because there's about 500 which would slow down building this vignette significantly.
+You may leave out the `point_ids` parameter. It defaults to all points in the dataset - I specify it here because there's about 500 which would slow down building this vignette.

 ```{r}
 # Let's look at the first 10 rows of the dataset for the first simulation.
@@ -141,9 +113,8 @@ head(storage_tbls[[1]], 10)

 You can see there's one row per timestep for each point ID. Say we want to plot the *average* pressure for each `timestep`, that is, the mean of the `pressure` values where the rows have the same `timestep`.

-
 ```{r}
-# Get average pressure from first dataframe
+# Get average pressure from first dataframe (= from first simulation)
 avg_pr_df <- storage_tbls[[1]] %>% 
  group_by(timestep) %>%
  summarise(avg_pressure = mean(pressure))
@@ -159,29 +130,30 @@ ggplot(data = avg_pr_df) +


 ```
-#### Multiple simulations
+
+#### Plot results for multiple simulations

 ```{r}
-# Get list of dataframes that each look like avg_pr_df
-avg_pr_dfs <- list()
+# Prepare dataframes for plotting
+reformatted_dfs <- list()

 for(i in seq_len(length(storage_tbls))){
-  old_df <- storage_tbls[[i]]
-  new_df <- old_df %>% 
-    group_by(timestep) %>% 
+
+  tbl <- storage_tbls[[i]] %>% 
+    group_by(timestep) %>%
    summarise(avg_pressure = mean(pressure))
  
-  # Add column so we can separate the values based on their old df 
-  new_df$df_id <- rep(i, nrow(new_df))
+  # Add column so we can separate values based on their old df 
+  tbl$df_id <- rep(i, nrow(tbl))
  
  # Add column so we can plot based on old df rownames
-  new_df$orig_rwns <- as.numeric(row.names(new_df))
+  tbl$orig_rwns <- as.numeric(row.names(tbl))
  
-  avg_pr_dfs <- c(avg_pr_dfs, list(new_df))
+  reformatted_dfs <- c(reformatted_dfs, list(tbl))
 }

 # Combine dataframes into single dataframe for plot
-avg_pr_combined_df <- bind_rows(avg_pr_dfs)
+avg_pr_combined_df <- bind_rows(reformatted_dfs)

 # Make plot
 ggplot(avg_pr_combined_df,
@@ -207,20 +179,11 @@ ggplot(avg_pr_combined_df, aes(x = orig_rwns,
  xlab("Timestep")
 ```

+### Multi-parameter sequential run

-#### Matrix plot
-
-So far we've only considered the `storage` parameter. Now we want to have a look at how the other two parameters influence the simulation. We will use our `get_values` function again and keep the default deviations.
-
-```{r}
-permeability <- get_values(ogs6_obj$media[[1]]$properties[[1]]$value)
-
-# porosity will have length 4 because its value range is between 0 and 1
-porosity <- get_values(ogs6_obj$media[[1]]$properties[[3]]$value,
-                            zero_to_one = TRUE)
-```
+So far we've only considered the `storage` parameter. Now we want to have a look at how the other two parameters influence the simulation, so let's put them into an ensemble together. For `permeability` we can reuse the `percentages` we already have. For `porosity`, this doesn't work - the original value is 1 and its values can only range between 0 and 1, so we'll supply a shorter vector.

-Now we'll put them into an ensemble together. This time it's important we set the `OGS6_Ensemble` parameter `sequential_mode` to `TRUE` as this will change the supplied parameters *sequentially* which means in the end we have 19 (7 + 7 + 4) simulations which is equal to the sum of elements in the value vectors you supply (I've named them `values` below for clarity, naming them is optional though).
+This time it's important we set the `OGS6_Ensemble` parameter `sequential_mode` to `TRUE` as this will change the supplied parameters *sequentially* which means in the end we have 18 (7 + 7 + 4) simulations which is equal to the sum of elements in the value vectors you supply (I've named them `values` below for clarity, naming them is optional though).

 The default `FALSE` would give an error message because our value vectors do not have the same length and even if they had, it wouldn't do what we want - the number of simulations would equal the length of *one* value vector (thus requiring them to be of the same length). Generally, set `sequential_mode` to `TRUE` if you want to examine the influence of parameters on a simulation *independently*. If you want to examine how the parameters influence *each other* as in wanting to test parameter combinations, the default mode is the way to go. 

@@ -235,11 +198,11 @@ ogs6_ens_big <-
  OGS6_Ensemble$new(
    ogs6_obj = ogs6_obj,
    parameters = list(per = list(ogs6_obj$media[[1]]$properties[[1]]$value, 
-                                 values = permeability$value),
+                                 values = percentages),
                      por = list(ogs6_obj$media[[1]]$properties[[3]]$value, 
-                                 values = porosity$value),
+                                 c(-50, -10, -1, 0)),
                      sto = list(ogs6_obj$media[[1]]$properties[[4]]$value, 
-                                 values = storage$value)),
+                                 values = percentages)),
    sequential_mode = TRUE)

 exit_codes <- ogs6_ens_big$run_simulation()
@@ -257,12 +220,9 @@ per_por_sto_tbls <-
  })
 ```

-Plotting time? Not quite. We can't really differentiate what values were used where yet. Since we set `sequential_mode` to `TRUE`, `OGS_Ensemble` allows us to utilize `relevant_parameter_at()` to fix this. It looks up which value vector the parameter we changed for each simulation came from. We will also put in an extra column `dev` so we can group by deviations.
+Plotting time? Not quite. We can't really differentiate what values were used where yet. Since we set `sequential_mode` to `TRUE`, `OGS_Ensemble` allows us to utilize `relevant_parameter_at()` to fix this. It looks up which value vector the parameter we changed for each simulation came from. We will also put in an extra column `percs` so we can group by percentages.

 ```{r}
-# Combine deviation columns
-deviations_comb <- c(permeability$dev, porosity$dev, storage$dev)
-
 # Get list of dataframes
 x_pr_dfs <- list()

@@ -271,7 +231,7 @@ for(i in seq_len(length(per_por_sto_tbls))){
  
  # Add name and deviation column
  new_df$name <- ogs6_ens_big$relevant_parameter_at(i)
-  new_df$dev <- deviations_comb[[i]]
+  new_df$percs <- unlist(ogs6_ens_big$parameter_percs)[[i]]
  
  x_pr_dfs <- c(x_pr_dfs, list(new_df))
 }
@@ -287,12 +247,13 @@ x_pr_combined_df <- bind_rows(x_pr_dfs)
 ggplot(x_pr_combined_df,
       aes(x = x,
           y = pressure,
-           group = dev)) +
-  geom_point(aes(color = as.factor(dev))) +
+           group = percs)) +
+  geom_point(aes(color = as.factor(percs))) +
  xlab("x point coordinate") +
  labs(color = "%") +
  facet_grid(cols = vars(name))
 ```
+
 Ta-Daa! All nicely grouped. We can see `permeability` has the most influence on the pressure. Though they may seem suspicious, `porosity` and `storage` are being plotted correctly - the points are just being placed right on top of each other. Since `porosity` can't go over the value `1` which was the original value, our value vector only went from -50% to 0% which is why the line colors of `porosity` and `storage` differ.


@@ -311,3 +272,7 @@ setup

 plot single

+```{r, include = FALSE}
+# Cleanup created folders
+do.call(file.remove, list(list.files(testdir_path, full.names = TRUE)))
+```
\ No newline at end of file