Skip to content
Snippets Groups Projects
Commit 7fc29f77 authored by Ruben Heinrich's avatar Ruben Heinrich
Browse files

[base] WIP updating ensemble class

parent 9f634fa7
No related branches found
No related tags found
1 merge request!910 chains
#===== OGS6_gml =====
#'OGS6_gml
#'@description Constructor for the OGS6_gml base class
#'@export
OGS6_gml <- R6::R6Class(
"OGS6_gml",
public = list(
#'@description
#'Creates new OGS6_gml object
#'@param gml_path string: Optional: Path to .gml file
#'@param name string: Geometry name
#'@param points tibble: Must have 3 vectors named 'x', 'y' and 'z', may
#' have optional 'name' vector
#'@param polylines list(list("foo", c(1, 2))):
#'@param surfaces list(list("foo", c(1, 2, 3), c(2, 3, 4))):
initialize = function(gml_path = NULL,
name = NULL,
points = NULL,
polylines = NULL,
surfaces = NULL) {
if(is.null(gml_path)){
self$name <- name
self$points <- points
self$polylines <- polylines
self$surfaces <- surfaces
}else{
if(!is.null(name) ||
!is.null(points) ||
!is.null(polylines) ||
!is.null(surfaces)){
warning(paste("`gml_path` was specified for OGS6_gml",
"initialization, so all other parameters",
"will be ignored!"), call. = FALSE)
}
xml_doc <- validate_read_in_xml(gml_path)
self$name <- xml2::xml_text(xml2::xml_find_first(xml_doc,
"//name"))
self$points <- read_in_points(xml_doc)
self$polylines <- read_in_polylines(xml_doc)
self$surfaces <- read_in_surfaces(xml_doc)
}
private$.gml_path <- gml_path
private$validate()
}
),
active = list(
#'@field gml_path
#'Getter for private parameter '.gml_path'
gml_path = function(value) {
private$gml_path
},
#'@field name
#'Access to private parameter '.name'
name = function(value) {
if(missing(value)) {
private$.name
}else{
assertthat::assert_that(assertthat::is.string(value))
private$.name <- value
}
},
#'@field points
#'Access to private parameter '.points'
points = function(value) {
if(missing(value)) {
private$.points
}else{
private$.points <- validate_points(value)
}
},
#'@field polylines
#'Access to private parameter '.polylines'
polylines = function(value) {
if(missing(value)) {
private$.polylines
}else{
if(!is.null(value)){
value <- validate_polylines(value)
}
private$.polylines <- value
}
},
#'@field surfaces
#'Access to private parameter '.surfaces'
surfaces = function(value) {
if(missing(value)) {
private$.surfaces
}else{
if(!is.null(value)){
value <- validate_surfaces(value)
}
private$.surfaces <- value
}
},
#'@field is_subclass
#'Getter for private parameter '.is_subclass'
is_subclass = function(value) {
private$.is_subclass
},
#'@field attr_names
#'Getter for private parameter '.attr_names'
attr_names = function(value) {
private$.attr_names
},
#'@field flatten_on_exp
#'Getter for private parameter '.flatten_on_exp'
flatten_on_exp = function(value) {
private$.flatten_on_exp
}
),
private = list(
validate = function(){
maximal_point_id <- length(self$points[[1]]) - 1
check_pnt <- function(pnt){
if(pnt > maximal_point_id ||
pnt < 0){
stop(paste("Point with ID", pnt, "does not exist"),
call. = FALSE)
}
}
#Check if polylines reference existing points
lapply(self$polylines, function(x){
lapply(x[[2]], check_pnt)
})
#Check if surfaces reference existing points
lapply(self$surfaces, function(x){
lapply(x[[2]], check_pnt)
if(length(x) == 3){
lapply(x[[3]], check_pnt)
}
})
},
.gml_path = NULL,
.name = NULL,
.points = NULL,
.polylines = NULL,
.surfaces = NULL,
.is_subclass = TRUE,
.attr_names = c("point", "name", "id", "element"),
.flatten_on_exp = character()
)
)
#===== Validation utility =====
#'validate_points
#'@description Checks if the input is a tibble, if this tibble has the right
#' number of elements, if those elements are named correctly and if there are
#' any overlapping points or duplicate point names
#'@param points tibble: Must have 3 vectors named 'x', 'y' and 'z', may have
#' optional 'name' vector
validate_points <- function(points) {
assertthat::assert_that(inherits(points, "tbl_df"))
names <- names(points)
if (!((length(points) == 4 && names[[1]] == "x" && names[[2]] == "y" &&
names[[3]] == "z" && names[[4]] == "name") ||
(length(points) == 3 && names[[1]] == "x" && names[[2]] == "y" &&
names[[3]] == "z"))){
stop(paste(points, " column names do not fit to 'x, y, z, (name)' "),
call. = FALSE)
}
assertthat::assert_that(is.numeric(points$x))
assertthat::assert_that(is.numeric(points$y))
assertthat::assert_that(is.numeric(points$z))
has_names <- (length(points) == 4)
# #Find overlapping points and duplicate names
# for(i in 1:(length(points[[1]])-1)){
# for(j in (i+1):length(points[[1]])){
# if(points[[1]][[i]] == points[[1]][[j]] &&
# points[[2]][[i]] == points[[2]][[j]] &&
# points[[3]][[i]] == points[[3]][[j]]){
# stop("Overlapping .gml points detected", call. = FALSE)
# }
#
# if(has_names){
# if(points[[4]][[i]] == points[[4]][[j]] &&
# points[[4]][[i]] != ""){
# warning("Duplicate .gml point names detected",
# call. = FALSE)
# }
# }
# }
# }
return(invisible(points))
}
#'validate_polylines
#'@description Checks if the input is a list, if this list consists of other
#' lists and if those lists have the correct structure (length of 2, first
#' element is a string named 'name', second element is a numeric vector)
#'@param polylines list(list("foo", c(1, 2))):
validate_polylines <- function(polylines) {
assertthat::assert_that(is.list(polylines))
polylines <- lapply(polylines, function(x){
assertthat::assert_that(is.list(x))
assertthat::assert_that(length(x) == 2)
assertthat::assert_that(assertthat::is.string(x[[1]]))
assertthat::assert_that(is.numeric(x[[2]]))
names(x)[[1]] <- c("name")
names(x[[2]]) <- rep("pnt", length(names(x[[2]])))
return(x)
})
names(polylines) <- rep("polyline", length(polylines))
return(invisible(polylines))
}
#'validate_surfaces
#'@description Checks if the input is a list, if this list consists of other
#' lists and if those lists have the correct structure (length of 2 or 3, first
#' element is a string named 'name', second and third element are numeric
#' vectors)
#'@param surfaces list(list("foo", c(1, 2, 3), c(2, 3, 4))):
validate_surfaces <- function(surfaces) {
assertthat::assert_that(is.list(surfaces))
validate_element <- function(element){
assertthat::assert_that(is.numeric(element))
assertthat::assert_that(length(element) == 3)
names(element) <- c("p1", "p2", "p3")
return(invisible(element))
}
surfaces <- lapply(surfaces, function(x){
assertthat::assert_that(is.list(x))
assertthat::assert_that(length(x) == 2 ||
length(x) == 3)
names(x) <- c("name", rep("element", (length(x)-1)))
x[[2]] <- validate_element(x[[2]])
if(length(x) == 3){
x[[3]] <- validate_element(x[[3]])
# validate_pnt_values(x[[2]], x[[3]])
}
return(x)
})
names(surfaces) <- rep("surface", length(surfaces))
return(invisible(surfaces))
}
#'validate_pnt_values
#'@description Checks if two numerical vectors of length 3
#' (two surface elements) each consist of 3 different elements and have
#' exactly 2 matching elements between them. Think of the two vectors as
#' triangles, and the triangles together form a square which is our surface.
#'@param element_1 numeric, length = 3
#'@param element_2 numeric, length = 3
validate_pnt_values = function (element_1, element_2) {
if(element_1[[1]] == element_1[[2]] ||
element_1[[1]] == element_1[[3]] ||
element_1[[2]] == element_1[[3]] ||
element_2[[1]] == element_2[[2]] ||
element_2[[1]] == element_2[[3]] ||
element_2[[2]] == element_2[[3]]) {
stop("A surface element must consist of 3 different points",
call. = FALSE)
}
equal_count <- 0
for(i in 1:length(element_1)) {
for(j in 1:length(element_2)) {
if(element_1[[i]] == element_2[[j]]) {
equal_count <- equal_count + 1
break
}
}
}
if(equal_count != 2) {
stop("Invalid surface detected", call. = FALSE)
}
}
......@@ -67,9 +67,12 @@ OGS6_Ensemble <- R6::R6Class(
#' For ensembles, output will be written to logfiles.
#'@param parallel flag: Should the function be run in parallel?
#' This is implementented via the 'parallel' package.
run_simulation = function(parallel = FALSE){
#'@param verbose flag
run_simulation = function(parallel = FALSE,
verbose = F){
assertthat::assert_that(assertthat::is.flag(parallel))
assertthat::assert_that(assertthat::is.flag(verbose))
# Create ensemble directory
assertthat::assert_that(!dir.exists(self$ens_path))
......@@ -103,7 +106,8 @@ OGS6_Ensemble <- R6::R6Class(
foreach::foreach(i = seq_along(ensemble)) %dopar% {
ogs6_obj <- ensemble[[i]]
r2ogs6::run_simulation(ogs6_obj,
write_logfile = TRUE)
write_logfile = TRUE,
verbose = verbose)
}
# Cleanup
......@@ -117,14 +121,17 @@ OGS6_Ensemble <- R6::R6Class(
parallel::mclapply(self$ensemble,
run_simulation,
write_logfile = TRUE,
verbose = verbose,
mc.cores = n_cores)
}
}else{
# For serial ensembles, we can use lapply
lapply(self$ensemble,
run_simulation,
write_logfile = TRUE)
exit_codes <- lapply(self$ensemble,
run_simulation,
write_logfile = TRUE,
verbose = verbose)
return(exit_codes)
}
},
......
......@@ -9,13 +9,16 @@
#'@param write_logfile flag: Should output be written to a logfile?
#'@param ogs_bin_path string: Optional: OpenGeoSys 6 bin folder path. Defaults
#' to `options("r2ogs6.default_ogs_bin_path")`
#'@param verbose flag
#'@export
run_simulation <- function(ogs6_obj,
write_logfile = TRUE,
ogs_bin_path) {
ogs_bin_path,
verbose = F) {
assertthat::assert_that(inherits(ogs6_obj, "OGS6"))
assertthat::assert_that(assertthat::is.flag(write_logfile))
assertthat::assert_that(assertthat::is.flag(verbose))
# Call all validators
if(!ogs6_obj$get_status(print_status = FALSE)){
......@@ -74,7 +77,9 @@ run_simulation <- function(ogs6_obj,
assertthat::assert_that(!file.exists(ogs6_obj$logfile))
file.create(ogs6_obj$logfile)
cat("\nRunning sim...\n")
if(verbose){
cat("\nRunning simulation '", ogs6_obj$sim_name, "'\n", sep = "")
}
exit_code <- system2(command = ogs6_command_str,
args = ogs6_args,
......
This diff is collapsed.
......@@ -12,26 +12,29 @@ knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
devtools::load_all(".")
```
```{r setup}
library(r2ogs6)
library(tidyverse)
library(dplyr)
```
## Hi there!
hi this is a guide on the theis problem.
Instructions on how to download benchmark files can be found [here](https://www.opengeosys.org/docs/userguide/basics/introduction/#download-benchmarks).
## Introduction
Hi there! This is a practical guide on the `OGS6_Ensemble` class from the `r2ogs6` package. I will show you how you can use this class to set up ensemble runs for OpenGeoSys 6, extract the results and plot them.
I will
If you want to follow this tutorial, you'll need to have `r2ogs6` installed and loaded. The prerequisites for `r2ogs6` are described in detail in the `r2ogs6 User Guide` vignette.
Additionally, you'll need the following benchmark files:
* [Theis' Problem](https://gitlab.opengeosys.org/ogs/ogs/-/tree/master/Tests/Data/Parabolic/LiquidFlow/AxiSymTheis) as described [here](https://www.opengeosys.org/docs/benchmarks/liquid-flow/liquid-flow-theis-problem/)
## Theis' problem
* [Theis solution for well pumping](https://gitlab.opengeosys.org/ogs/ogs/-/tree/master/Tests/Data/Parabolic/ComponentTransport/Theis) as described [here](https://www.opengeosys.org/docs/benchmarks/hydro-component/theis/hc_theis/)
General instructions on how to download OpenGeoSys 6 benchmark files can be found [here](https://www.opengeosys.org/docs/userguide/basics/introduction/#download-benchmarks).
This problem is described in detail [here](https://www.opengeosys.org/docs/benchmarks/liquid-flow/liquid-flow-theis-problem/). At the top of the page, there is a link which will take you to the benchmark files you need.
## Theis' problem
We will consider the following parameters for our sensitivity analysis:
......@@ -62,33 +65,42 @@ prj_path <- system.file("examples/Theis_problem/benchmark_files/",
read_in_prj(ogs6_obj, prj_path)
```
We already know *which* parameters we want to change between the different simulations. But before we can create an ensemble, we need to know what their respective *values* should be. Say we don't want to hardcode each value, but instead examine the effects of changing our parameters by 1%, 10% and 50%. Below I've supplied a little function that simply takes a value and creates a numeric vector based on the given `deviations`.
We already know *which* parameters we want to change between the different simulations. But before we can create an ensemble, we need to know what their respective *values* should be. Say we don't want to hardcode each value, but instead examine the effects of changing our parameters by 1%, 10% and 50%. Below I've supplied a little function that simply takes a value and creates a tibble based on the given `deviations`.
```{r}
get_values <- function(value,
# This returns a df so we can keep the data together for plotting
get_values <- function(value,
deviations = c(-0.5, -0.1, -0.01, 0, 0.01, 0.1, 0.5),
zero_to_one = FALSE){
values <- lapply(deviations, function(x){
value + (value * x)
})
values_and_devs <- tibble::tibble(value = numeric(),
dev = numeric())
if(zero_to_one){
values <- values[dplyr::between(values, 0, 1)]
for(i in seq_len(length(deviations))){
if(zero_to_one &&
!dplyr::between((value + (value * deviations[[i]])), 0, 1)){
next
}
values_and_devs <-
add_row(values_and_devs,
value = value + (value * deviations[[i]]),
dev = deviations[[i]])
}
return(invisible(values))
return(invisible(values_and_devs))
}
# demo
paste(get_values(100), collapse = " ")
paste(get_values(100)$value, collapse = " ")
```
We can now use this function for a minimal working example. Let's set up an ensemble with only the `storage` parameter. First we create the corresponding vector...
```{r}
# Create the vector based on the original value
# Create the tibble based on the original value
storage <- get_values(ogs6_obj$media[[1]]$properties[[4]]$value)
paste(storage, collapse = " ")
paste(storage$value, collapse = " ")
```
... then we pass the original reference as well as the vector to an ensemble object.
......@@ -97,25 +109,26 @@ paste(storage, collapse = " ")
ogs6_ens <-
OGS6_Ensemble$new(
ogs6_obj = ogs6_obj,
parameters = list(list(ogs6_obj$media[[1]]$properties[[4]]$value, storage)),
use_original_values = FALSE)
parameters = list(list(ogs6_obj$media[[1]]$properties[[4]]$value,
storage$value)))
# Now we run the simulations
ogs6_ens$run_simulation()
exit_codes <- ogs6_ens$run_simulation()
paste(exit_codes, collapse = " ")
```
### Plot results
#### Single simulation
Our simulations (hopefully) ran successfully - great! Now it'd be nice to see some results. For now, we're interested in the `pressure` and `v`elocity data of the first simulation.
Our simulations (hopefully) ran successfully - great! Now it'd be nice to see some results. Say we're interested in the `pressure` data of the first simulation.
```{r}
# This will get a list of tibbles - one for each simulation.
storage_tbls <-
lapply(ogs6_ens$ensemble, function(x){
x$pvds[[1]]$get_point_data(point_ids = c(0, 1, 2),
Names = c("pressure", "v"))
Names = c("pressure"))
})
```
......@@ -163,6 +176,7 @@ for(i in seq_len(length(storage_tbls))){
# Add column so we can plot based on old df rownames
new_df$orig_rwns <- as.numeric(row.names(new_df))
avg_pr_dfs <- c(avg_pr_dfs, list(new_df))
}
......@@ -173,9 +187,10 @@ avg_pr_combined_df <- bind_rows(avg_pr_dfs)
ggplot(avg_pr_combined_df,
aes(x = orig_rwns,
y = avg_pressure)) +
geom_point(aes(color = df_id)) +
geom_line(aes(color = df_id)) +
geom_point(aes(color = as.factor(df_id))) +
geom_line(aes(color = as.factor(df_id))) +
xlab("Timestep") +
labs(color = "sim id") +
facet_grid(rows = vars(df_id))
```
......@@ -186,106 +201,103 @@ Now we have the average `pressure` over time for each of the simulations (rows).
ggplot(avg_pr_combined_df, aes(x = orig_rwns,
y = avg_pressure,
group = df_id)) +
geom_point(aes(color = df_id)) +
geom_line(aes(color = df_id)) +
geom_point(aes(color = as.factor(df_id))) +
geom_line(aes(color = as.factor(df_id))) +
labs(color = "sim id") +
xlab("Timestep")
```
#### Matrix plot
So far we've only considered the `storage` parameter. Now we want to have a look at how the other two parameters influence the simulation and each other. Our `storage` vector had 7 values. Assuming the same length for `permeability` and `porosity` would lead us to a whopping 343 combinations, so we will use our `get_values` function to create smaller vectors. Say we choose a length of 4 with deviations of -1%, -10% and -50% as well as the original value.
So far we've only considered the `storage` parameter. Now we want to have a look at how the other two parameters influence the simulation. We will use our `get_values` function again and keep the default deviations.
```{r}
permeability <- get_values(ogs6_obj$media[[1]]$properties[[1]]$value)
new_deviations <- c(-0.5, -0.1, -0.01, 0)
permeability <- get_values(ogs6_obj$media[[1]]$properties[[1]]$value,
new_deviations)
# porosity will have length 4 because its value range is between 0 and 1
porosity <- get_values(ogs6_obj$media[[1]]$properties[[3]]$value,
new_deviations)
storage <- get_values(ogs6_obj$media[[1]]$properties[[4]]$value,
new_deviations)
# Total length of each vector we want to pass to the ensemble
vec_len <- length(permeability) * length(porosity) * length(storage)
# Replicate elements based on vec_len
per_long <- rep(permeability, (vec_len / length(permeability)))
por_long <- rep(porosity, (vec_len / length(porosity)))
sto_long <- rep(storage, (vec_len / length(storage)))
zero_to_one = TRUE)
```
Now we take these long vectors and feed them to a new ensemble. We can base this on the same `OGS6` object as before since our project file hasn't changed.
Now we'll put them into an ensemble together. This time it's important we set the `OGS6_Ensemble` parameter `sequential_mode` to `TRUE` as this will change the supplied parameters *sequentially* which means in the end we have 19 (7 + 7 + 4) simulations which is equal to the sum of elements in the value vectors you supply (I've named them `values` below for clarity, naming them is optional though).
The default `FALSE` would give an error message because our value vectors do not have the same length and even if they had, it wouldn't do what we want - the number of simulations would equal the length of *one* value vector (thus requiring them to be of the same length). Generally, set `sequential_mode` to `TRUE` if you want to examine the influence of parameters on a simulation *independently*. If you want to examine how the parameters influence *each other* as in wanting to test parameter combinations, the default mode is the way to go.
```{r}
# Change this to fit your system
testdir_path <- system.file("extdata/test_tempdirs/", package = "r2ogs6")
sim_path <- paste0(testdir_path, "/axisym_theis_sim_big")
ogs6_obj$sim_path <- sim_path
ogs6_ens_big <-
OGS6_Ensemble$new(
ogs6_obj = ogs6_obj,
parameters = list(list(ogs6_obj$media[[1]]$properties[[1]]$value, per_long),
list(ogs6_obj$media[[1]]$properties[[3]]$value, por_long),
list(ogs6_obj$media[[1]]$properties[[4]]$value, sto_long)),
use_original_values = FALSE)
#ogs6_ens_big$run_simulation()
parameters = list(per = list(ogs6_obj$media[[1]]$properties[[1]]$value,
values = permeability$value),
por = list(ogs6_obj$media[[1]]$properties[[3]]$value,
values = porosity$value),
sto = list(ogs6_obj$media[[1]]$properties[[4]]$value,
values = storage$value)),
sequential_mode = TRUE)
exit_codes <- ogs6_ens_big$run_simulation()
paste(exit_codes)
```
This will take a while on most systems since we're creating 64 objects and running the same number of simulations. Go grab a coffee (inclusive-)or two. As soon as the simulations are done, we can extract the point data just as we did before.
This will take a short time. As soon as the simulations are done, we can extract the point data much like we did before. This time we want to plot the point x coodinates on the x axis so we're leaving out `point_ids` to get all points. Also we just want the data from the last timestep.
```{r}
# This will get a list of tibbles - one for each simulation.
per_por_sto_tbls <-
lapply(ogs6_ens$ensemble, function(x){
x$pvds[[1]]$get_point_data(point_ids = c(0, 1, 2),
Names = c("pressure", "v"))
lapply(ogs6_ens_big$ensemble, function(x){
x$pvds[[1]]$get_point_data(Names = c("pressure"),
start_at_timestep = x$pvds[[1]]$last_timestep)
})
```
Plotting time! Well, not quite. This time we have to think a bit more about how we plot. There's 3 parameters now and we replicated their original vectors to get our combinations so our ensemble is all over the place. We'll start by putting the data from the tibbles we extracted into a combined dataframe first like we did in the `storage`-only run.
Plotting time? Not quite. We can't really differentiate what values were used where yet. Since we set `sequential_mode` to `TRUE`, `OGS_Ensemble` allows us to utilize `relevant_parameter_at()` to fix this. It looks up which value vector the parameter we changed for each simulation came from. We will also put in an extra column `dev` so we can group by deviations.
```{r}
# Combine deviation columns
deviations_comb <- c(permeability$dev, porosity$dev, storage$dev)
# Get list of dataframes
avg_pr_dfs <- list()
x_pr_dfs <- list()
for(i in seq_len(length(per_por_sto_tbls))){
old_df <- per_por_sto_tbls[[i]]
new_df <- old_df %>%
group_by(timestep) %>%
summarise(avg_pressure = mean(pressure))
new_df <- per_por_sto_tbls[[i]]
# Add column so we can separate the values based on their old df
new_df$df_id <- rep(i, nrow(new_df))
# Add name and deviation column
new_df$name <- ogs6_ens_big$relevant_parameter_at(i)
new_df$dev <- deviations_comb[[i]]
# Add column so we can plot based on old df rownames
new_df$orig_rwns <- as.numeric(row.names(new_df))
avg_pr_dfs <- c(avg_pr_dfs, list(new_df))
x_pr_dfs <- c(x_pr_dfs, list(new_df))
}
# Combine dataframes into single dataframe for plot
avg_pr_combined_df <- bind_rows(avg_pr_dfs)
# Combine dataframes into single dataframe
x_pr_combined_df <- bind_rows(x_pr_dfs)
```
*Now* it's plotting time. We group our variables corresponding to the deviations and then use a facet grid with the `name` parameter we added to the dataframes.
```{r}
# # Make plot
# ggplot(avg_pr_combined_df,
# aes(x = orig_rwns,
# y = avg_pressure)) +
# geom_point(aes(color = df_id)) +
# geom_line(aes(color = df_id)) +
# xlab("Timestep") +
# facet_grid(rows = vars(df_id))
# Make plot
ggplot(x_pr_combined_df,
aes(x = x,
y = pressure,
group = dev)) +
geom_point(aes(color = as.factor(dev))) +
xlab("x point coordinate") +
labs(color = "%") +
facet_grid(cols = vars(name))
```
Ta-Daa! All nicely grouped. We can see `permeability` has the most influence on the pressure. Though they may seem suspicious, `porosity` and `storage` are being plotted correctly - the points are just being placed right on top of each other. Since `porosity` can't go over the value `1` which was the original value, our value vector only went from -50% to 0% which is why the line colors of `porosity` and `storage` differ.
## Theis solution for well pumping
....
This problem is described in detail [here](https://www.opengeosys.org/docs/benchmarks/hydro-component/theis/hc_theis/). At the top of the page, there is a link which will take you to the benchmark files you need.
We will consider the following parameters for our sensitivity analysis:
* `permeability`
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment