From 9831f2923af1f30d564d39da708200c22c9a7d0c Mon Sep 17 00:00:00 2001
From: aheinri5 <Anna@netzkritzler.de>
Date: Sat, 6 Feb 2021 21:46:04 +0100
Subject: [PATCH] [feature] closes #19 , percentages mode for ensembles added

---
 R/ogs6_ensemble.R                        |  49 +++++++--
 R/sim_utils.R                            |   2 -
 tests/testthat/test-ogs6_ensemble.R      |  43 ++++++--
 vignettes/ensemble_workflow_vignette.Rmd | 125 ++++++++---------------
 4 files changed, 122 insertions(+), 97 deletions(-)

diff --git a/R/ogs6_ensemble.R b/R/ogs6_ensemble.R
index a3279c3..a2077ba 100644
--- a/R/ogs6_ensemble.R
+++ b/R/ogs6_ensemble.R
@@ -16,13 +16,17 @@ OGS6_Ensemble <- R6::R6Class(
     #' vector of values. Note that the second elements of the sublists must
     #' have the same length.
     #'@param sequential_mode flag: Defaults to `FALSE`
+    #'@param percentages_mode flag: Defaults to `TRUE`
     #'@importFrom foreach %dopar%
     public = list(
         initialize = function(ogs6_obj,
                               parameters,
-                              sequential_mode = FALSE) {
+                              sequential_mode = FALSE,
+                              percentages_mode = TRUE) {
 
             assertthat::assert_that(inherits(ogs6_obj, "OGS6"))
+            assertthat::assert_that(assertthat::is.flag(sequential_mode))
+            assertthat::assert_that(assertthat::is.flag(percentages_mode))
 
             private$.ens_path <- ogs6_obj$sim_path
             ogs6_obj$sim_path <- paste0(ogs6_obj$sim_path, ogs6_obj$sim_name)
@@ -41,7 +45,6 @@ OGS6_Ensemble <- R6::R6Class(
             })
 
             assertthat::assert_that(is.list(parameters))
-            assertthat::assert_that(assertthat::is.flag(sequential_mode))
 
             # If not in sequential mode, value vectors must have same length
             if(!sequential_mode){
@@ -54,9 +57,15 @@ OGS6_Ensemble <- R6::R6Class(
                 assertthat::assert_that(!any(names(parameters) == ""))
             }
 
-            private$.parameter_values <- lapply(parameters, function(x){
-                x[[2]]
-            })
+            second_elements <- lapply(parameters, function(x){x[[2]]})
+
+            if(percentages_mode){
+                private$.parameter_percs <- second_elements
+                private$calc_values_by_percs(ogs6_obj)
+                names(private$.parameter_values) <- names(parameters)
+            }else{
+                private$.parameter_values <- second_elements
+            }
 
             private$make_ensemble(ogs6_obj$clone(deep = TRUE),
                                   sequential_mode)
@@ -164,6 +173,12 @@ OGS6_Ensemble <- R6::R6Class(
             private$.dp_parameters
         },
 
+        #'@field parameter_percs
+        #'Getter for private parameter '.parameter_percs'
+        parameter_percs = function() {
+            private$.parameter_percs
+        },
+
         #'@field parameter_values
         #'Getter for private parameter '.parameter_values'
         parameter_values = function() {
@@ -185,15 +200,32 @@ OGS6_Ensemble <- R6::R6Class(
 
     private = list(
 
+        # Calculates values based on given percentages
+        calc_values_by_percs = function(ogs6_obj){
+
+            for(i in seq_len(length(private$.parameter_percs))){
+
+                print(self$dp_parameters[[i]])
+                val <- eval(parse(text = self$dp_parameters[[i]]))
+
+                val_vec <- lapply(private$.parameter_percs[[i]], function(x){
+                    val + (val * (x / 100))
+                    })
+
+                private$.parameter_values <- c(self$parameter_values,
+                                               list(val_vec))
+            }
+        },
+
         #@description
         #Creates the actual ensemble.
-        #@param ogs6_obj OGS6: Simulation object
-        #@param sequential_mode flag: Optional:
         make_ensemble = function(ogs6_obj,
-                                 sequential_mode) {
+                                 sequential_mode,
+                                 percentages_mode) {
 
             orig_sim_name <- ogs6_obj$sim_name
             orig_sim_path <- ogs6_obj$sim_path
+
             parameter_values <- self$parameter_values
 
             # If sequential, put parameters behind each other
@@ -280,6 +312,7 @@ OGS6_Ensemble <- R6::R6Class(
         .ens_path = NULL,
         .ensemble = list(),
         .dp_parameters = list(),
+        .parameter_percs = list(),
         .parameter_values = list()
     )
 )
diff --git a/R/sim_utils.R b/R/sim_utils.R
index 3047ccd..268ea68 100644
--- a/R/sim_utils.R
+++ b/R/sim_utils.R
@@ -89,8 +89,6 @@ run_simulation <- function(ogs6_obj,
                              args = ogs6_args)
     }
 
-    closeAllConnections()
-
     # Read in generated .pvd file and add it to ogs6_obj
     pvd_paths <- list.files(ogs6_obj$sim_path,
                            "\\.pvd$",
diff --git a/tests/testthat/test-ogs6_ensemble.R b/tests/testthat/test-ogs6_ensemble.R
index 857b0ca..9c90c51 100644
--- a/tests/testthat/test-ogs6_ensemble.R
+++ b/tests/testthat/test-ogs6_ensemble.R
@@ -20,17 +20,46 @@ test_that("OGS6_Ensemble initialization works", {
         value = 4
     ))
 
-    ogs6_ens <- OGS6_Ensemble$new(
+    # Test with sequential_mode and percentages_mode off
+    ogs6_ens_noseq_noper <- OGS6_Ensemble$new(
         ogs6_obj = ogs6_obj,
         parameters = list(list(ogs6_obj$parameters[[1]]$value, c(2, 3)),
-                          list(ogs6_obj$parameters[[2]]$value, c(5, 6)))
+                          list(ogs6_obj$parameters[[2]]$value, c(5, 6))),
+        sequential_mode = FALSE,
+        percentages_mode = FALSE
     )
 
-    expect_equal(length(ogs6_ens$ensemble), 2)
-    expect_equal(ogs6_ens$ensemble[[1]]$parameters[[1]]$value, 2)
-    expect_equal(ogs6_ens$ensemble[[2]]$parameters[[1]]$value, 3)
-    expect_equal(ogs6_ens$ensemble[[1]]$parameters[[2]]$value, 5)
-    expect_equal(ogs6_ens$ensemble[[2]]$parameters[[2]]$value, 6)
+    expect_equal(length(ogs6_ens_noseq_noper$ensemble), 2)
+    expect_equal(ogs6_ens_noseq_noper$ensemble[[1]]$parameters[[1]]$value, 2)
+    expect_equal(ogs6_ens_noseq_noper$ensemble[[2]]$parameters[[1]]$value, 3)
+    expect_equal(ogs6_ens_noseq_noper$ensemble[[1]]$parameters[[2]]$value, 5)
+    expect_equal(ogs6_ens_noseq_noper$ensemble[[2]]$parameters[[2]]$value, 6)
+
+    # Test with sequential_mode on and percentages_mode off
+    ogs6_ens_seq_noper <- OGS6_Ensemble$new(
+        ogs6_obj = ogs6_obj,
+        parameters = list(a = list(ogs6_obj$parameters[[1]]$value, c(2, 3)),
+                          b = list(ogs6_obj$parameters[[2]]$value, c(5, 6))),
+        sequential_mode = TRUE,
+        percentages_mode = FALSE
+    )
+
+    expect_equal(length(ogs6_ens_seq_noper$ensemble), 4)
+    expect_equal(ogs6_ens_seq_noper$ensemble[[4]]$parameters[[2]]$value, 6)
+
+
+    # Test with sequential_mode off and percentages_mode on
+    ogs6_ens_noseq_per <- OGS6_Ensemble$new(
+        ogs6_obj = ogs6_obj,
+        parameters = list(list(ogs6_obj$parameters[[1]]$value, c(50, -75)),
+                          list(ogs6_obj$parameters[[2]]$value, c(50, -75))),
+        sequential_mode = FALSE,
+        percentages_mode = TRUE
+    )
+
+    expect_equal(length(ogs6_ens_noseq_per$ensemble), 2)
+    expect_equal(ogs6_ens_noseq_per$ensemble[[1]]$parameters[[1]]$value, 1.5)
+    expect_equal(ogs6_ens_noseq_per$ensemble[[2]]$parameters[[2]]$value, 1)
 })
 
 
diff --git a/vignettes/ensemble_workflow_vignette.Rmd b/vignettes/ensemble_workflow_vignette.Rmd
index f012fa7..35c664b 100644
--- a/vignettes/ensemble_workflow_vignette.Rmd
+++ b/vignettes/ensemble_workflow_vignette.Rmd
@@ -1,16 +1,19 @@
 ---
-title: "Ensemble Guide"
+title: "r2ogs6 Ensemble Guide"
 output: rmarkdown::html_vignette
 vignette: >
-  %\VignetteIndexEntry{Ensemble Guide}
+  %\VignetteIndexEntry{r2ogs6 Ensemble Guide}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---
 
 ```{r, include = FALSE}
 knitr::opts_chunk$set(
+  fig.width = 6,
+  fig.align = "center",
   collapse = TRUE,
-  comment = "#>"
+  comment = "#>",
+  message = FALSE
 )
 
 devtools::load_all(".")
@@ -18,6 +21,7 @@ devtools::load_all(".")
 
 ```{r setup}
 library(r2ogs6)
+library(ggplot2)
 library(dplyr)
 ```
 
@@ -65,63 +69,31 @@ prj_path <- system.file("examples/Theis_problem/benchmark_files/",
 read_in_prj(ogs6_obj, prj_path)
 ```
 
-We already know *which* parameters we want to change between the different simulations. But before we can create an ensemble, we need to know what their respective *values* should be. Say we don't want to hardcode each value, but instead examine the effects of changing our parameters by 1%, 10% and 50%. Below I've supplied a little function that simply takes a value and creates a tibble based on the given `deviations`. 
 
-```{r}
-# This returns a df so we can keep the data together for plotting
-get_values <- function(value, 
-                       deviations = c(-0.5, -0.1, -0.01, 0, 0.01, 0.1, 0.5),
-                       zero_to_one = FALSE){
-  
-  values_and_devs <- tibble::tibble(value = numeric(),
-                                    dev = numeric())
-  
-  for(i in seq_len(length(deviations))){
-    if(zero_to_one &&
-       !dplyr::between((value + (value * deviations[[i]])), 0, 1)){
-      next
-    }
-    
-    values_and_devs <- 
-      add_row(values_and_devs,
-              value = value + (value * deviations[[i]]),
-              dev = deviations[[i]])
-  }
-  
-  return(invisible(values_and_devs))
-}
+### Single-parameter sequential run
 
-# demo
-paste(get_values(100)$value, collapse = " ")
-```
-We can now use this function for a minimal working example. Let's set up an ensemble with only the `storage` parameter. First we create the corresponding vector...
+Let's create a small ensemble where we only alter the value of `storage`. Say we don't want to hardcode the values, but instead examine the effects of changing `storage` by 1%, 10% and 50%. We can use the `percentages_mode` parameter of `OGS6_Ensemble` for this. It already defaults to `TRUE`, below I'm merely being explicit for demonstration purposes.
 
 ```{r}
-# Create the tibble based on the original value
-storage <- get_values(ogs6_obj$media[[1]]$properties[[4]]$value)
-
-paste(storage$value, collapse = " ")
-```
-... then we pass the original reference as well as the vector to an ensemble object.
+# Assign percentages to a variable in case we want to reuse them later
+percentages <- c(-50, -10, -1, 0, 1, 10, 50)
 
-```{r}
-# First we define an ensemble object
+# Define an ensemble object
 ogs6_ens <- 
   OGS6_Ensemble$new(
     ogs6_obj = ogs6_obj,
     parameters = list(list(ogs6_obj$media[[1]]$properties[[4]]$value,
-                           storage$value)))
+                           percentages)),
+    percentages_mode = TRUE)
 
 # Now we run the simulations
 exit_codes <- ogs6_ens$run_simulation()
 paste(exit_codes, collapse = " ")
 ```
 
- 
-### Plot results
-#### Single simulation
+#### Plot results for single simulation
 
-Our simulations (hopefully) ran successfully - great! Now it'd be nice to see some results. Say we're interested in the `pressure` data of the first simulation.
+Our simulations (hopefully) ran successfully - great! Now it'd be nice to see some results. Say we're interested in the `pressure` data.
 
 ```{r}
 # This will get a list of tibbles - one for each simulation.
@@ -132,7 +104,7 @@ storage_tbls <-
   })
 ```
 
-You may leave out the `point_ids` parameter. It defaults to all points in the dataset - I specify it here because there's about 500 which would slow down building this vignette significantly.
+You may leave out the `point_ids` parameter. It defaults to all points in the dataset - I specify it here because there's about 500 which would slow down building this vignette.
 
 ```{r}
 # Let's look at the first 10 rows of the dataset for the first simulation.
@@ -141,9 +113,8 @@ head(storage_tbls[[1]], 10)
 
 You can see there's one row per timestep for each point ID. Say we want to plot the *average* pressure for each `timestep`, that is, the mean of the `pressure` values where the rows have the same `timestep`.
 
-
 ```{r}
-# Get average pressure from first dataframe
+# Get average pressure from first dataframe (= from first simulation)
 avg_pr_df <- storage_tbls[[1]] %>% 
   group_by(timestep) %>%
   summarise(avg_pressure = mean(pressure))
@@ -159,29 +130,30 @@ ggplot(data = avg_pr_df) +
 
 
 ```
-#### Multiple simulations
+
+#### Plot results for multiple simulations
 
 ```{r}
-# Get list of dataframes that each look like avg_pr_df
-avg_pr_dfs <- list()
+# Prepare dataframes for plotting
+reformatted_dfs <- list()
 
 for(i in seq_len(length(storage_tbls))){
-  old_df <- storage_tbls[[i]]
-  new_df <- old_df %>% 
-    group_by(timestep) %>% 
+
+  tbl <- storage_tbls[[i]] %>% 
+    group_by(timestep) %>%
     summarise(avg_pressure = mean(pressure))
   
-  # Add column so we can separate the values based on their old df 
-  new_df$df_id <- rep(i, nrow(new_df))
+  # Add column so we can separate values based on their old df 
+  tbl$df_id <- rep(i, nrow(tbl))
   
   # Add column so we can plot based on old df rownames
-  new_df$orig_rwns <- as.numeric(row.names(new_df))
+  tbl$orig_rwns <- as.numeric(row.names(tbl))
   
-  avg_pr_dfs <- c(avg_pr_dfs, list(new_df))
+  reformatted_dfs <- c(reformatted_dfs, list(tbl))
 }
 
 # Combine dataframes into single dataframe for plot
-avg_pr_combined_df <- bind_rows(avg_pr_dfs)
+avg_pr_combined_df <- bind_rows(reformatted_dfs)
 
 # Make plot
 ggplot(avg_pr_combined_df,
@@ -207,20 +179,11 @@ ggplot(avg_pr_combined_df, aes(x = orig_rwns,
   xlab("Timestep")
 ```
 
+### Multi-parameter sequential run
 
-#### Matrix plot
-
-So far we've only considered the `storage` parameter. Now we want to have a look at how the other two parameters influence the simulation. We will use our `get_values` function again and keep the default deviations.
-
-```{r}
-permeability <- get_values(ogs6_obj$media[[1]]$properties[[1]]$value)
-
-# porosity will have length 4 because its value range is between 0 and 1
-porosity <- get_values(ogs6_obj$media[[1]]$properties[[3]]$value,
-                            zero_to_one = TRUE)
-```
+So far we've only considered the `storage` parameter. Now we want to have a look at how the other two parameters influence the simulation, so let's put them into an ensemble together. For `permeability` we can reuse the `percentages` we already have. For `porosity`, this doesn't work - the original value is 1 and its values can only range between 0 and 1, so we'll supply a shorter vector.
 
-Now we'll put them into an ensemble together. This time it's important we set the `OGS6_Ensemble` parameter `sequential_mode` to `TRUE` as this will change the supplied parameters *sequentially* which means in the end we have 19 (7 + 7 + 4) simulations which is equal to the sum of elements in the value vectors you supply (I've named them `values` below for clarity, naming them is optional though).
+This time it's important we set the `OGS6_Ensemble` parameter `sequential_mode` to `TRUE` as this will change the supplied parameters *sequentially* which means in the end we have 18 (7 + 7 + 4) simulations which is equal to the sum of elements in the value vectors you supply (I've named them `values` below for clarity, naming them is optional though).
 
 The default `FALSE` would give an error message because our value vectors do not have the same length and even if they had, it wouldn't do what we want - the number of simulations would equal the length of *one* value vector (thus requiring them to be of the same length). Generally, set `sequential_mode` to `TRUE` if you want to examine the influence of parameters on a simulation *independently*. If you want to examine how the parameters influence *each other* as in wanting to test parameter combinations, the default mode is the way to go. 
 
@@ -235,11 +198,11 @@ ogs6_ens_big <-
   OGS6_Ensemble$new(
     ogs6_obj = ogs6_obj,
     parameters = list(per = list(ogs6_obj$media[[1]]$properties[[1]]$value, 
-                                 values = permeability$value),
+                                 values = percentages),
                       por = list(ogs6_obj$media[[1]]$properties[[3]]$value, 
-                                 values = porosity$value),
+                                 c(-50, -10, -1, 0)),
                       sto = list(ogs6_obj$media[[1]]$properties[[4]]$value, 
-                                 values = storage$value)),
+                                 values = percentages)),
     sequential_mode = TRUE)
 
 exit_codes <- ogs6_ens_big$run_simulation()
@@ -257,12 +220,9 @@ per_por_sto_tbls <-
   })
 ```
 
-Plotting time? Not quite. We can't really differentiate what values were used where yet. Since we set `sequential_mode` to `TRUE`, `OGS_Ensemble` allows us to utilize `relevant_parameter_at()` to fix this. It looks up which value vector the parameter we changed for each simulation came from. We will also put in an extra column `dev` so we can group by deviations.
+Plotting time? Not quite. We can't really differentiate what values were used where yet. Since we set `sequential_mode` to `TRUE`, `OGS_Ensemble` allows us to utilize `relevant_parameter_at()` to fix this. It looks up which value vector the parameter we changed for each simulation came from. We will also put in an extra column `percs` so we can group by percentages.
 
 ```{r}
-# Combine deviation columns
-deviations_comb <- c(permeability$dev, porosity$dev, storage$dev)
-
 # Get list of dataframes
 x_pr_dfs <- list()
 
@@ -271,7 +231,7 @@ for(i in seq_len(length(per_por_sto_tbls))){
   
   # Add name and deviation column
   new_df$name <- ogs6_ens_big$relevant_parameter_at(i)
-  new_df$dev <- deviations_comb[[i]]
+  new_df$percs <- unlist(ogs6_ens_big$parameter_percs)[[i]]
   
   x_pr_dfs <- c(x_pr_dfs, list(new_df))
 }
@@ -287,12 +247,13 @@ x_pr_combined_df <- bind_rows(x_pr_dfs)
 ggplot(x_pr_combined_df,
        aes(x = x,
            y = pressure,
-           group = dev)) +
-  geom_point(aes(color = as.factor(dev))) +
+           group = percs)) +
+  geom_point(aes(color = as.factor(percs))) +
   xlab("x point coordinate") +
   labs(color = "%") +
   facet_grid(cols = vars(name))
 ```
+
 Ta-Daa! All nicely grouped. We can see `permeability` has the most influence on the pressure. Though they may seem suspicious, `porosity` and `storage` are being plotted correctly - the points are just being placed right on top of each other. Since `porosity` can't go over the value `1` which was the original value, our value vector only went from -50% to 0% which is why the line colors of `porosity` and `storage` differ.
 
 
@@ -311,3 +272,7 @@ setup
 
 plot single
 
+```{r, include = FALSE}
+# Cleanup created folders
+do.call(file.remove, list(list.files(testdir_path, full.names = TRUE)))
+```
\ No newline at end of file
-- 
GitLab