-
Ruben Heinrich authoredRuben Heinrich authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
read_in_utils.R 14.38 KiB
#===== VALIDATION UTILITY =====
#'validate_read_in_xml
#'@description Utility function, tries parsing the provided file as an XML
#' document
#'@param file A file to be parsed as XML
#'@return The parsed XML file (as class object of type xml2::xml_document)
validate_read_in_xml <- function(file){
assertthat::assert_that(assertthat::is.string(file))
#Attempt to read in file
xml_doc <- tryCatch(
{
return(invisible(xml2::read_xml(file, encoding="ISO-8859-1")))
},
error = function(e){
print(e)
stop(paste("Could not find file (see error message above),",
"aborting call."), call. = FALSE)
}
)
}
#===== GENERAL READ IN UTILITY =====
#'read_in
#'@description Reads in elements from a file
#'@param ogs6_obj A OGS6 class object
#'@param prj_path string: Path to project file the elements should be read from
#'@param xpath_expr string: An XPath expression (should be absolute!)
read_in <- function(ogs6_obj,
prj_path,
xpath_expr){
assertthat::assert_that("OGS6" %in% class(ogs6_obj))
xml_doc <- validate_read_in_xml(prj_path)
assertthat::assert_that(assertthat::is.string(xpath_expr))
split_path <- unlist(strsplit(xpath_expr, "/", fixed = TRUE))
child_name <- split_path[[length(split_path)]]
subclasses_names <- get_subclass_names(paste0("r2ogs6_", child_name))
for(i in seq_len(length(subclasses_names))){
names(subclasses_names)[[i]] <-
get_class_tag_name(subclasses_names[[i]])
}
nodes <- xml2::xml_find_all(xml_doc, xpath_expr)
if(length(nodes) == 0){
return(invisible(FALSE))
}
r2ogs6_obj <- NULL
#Code to be parsed when r2ogs6_obj has been defined
add_call <- paste0("ogs6_obj$add_", child_name, "(r2ogs6_obj)")
#If selection_vector was NULL, parse all children
for (i in seq_along(nodes)) {
new_xpath_expr <- paste0(xpath_expr,
"/",
xml2::xml_name(nodes[[i]]))
r2ogs6_obj <- node_to_r2ogs6_obj(nodes[[i]],
new_xpath_expr,
subclasses_names)
#Add r2ogs6_obj with code snippet
eval(parse(text = add_call))
}
return(invisible(TRUE))
}
#'node_to_r2ogs6_obj
#'@description Takes an XML node and turns it into a class object
#'@param xml_node An XML node (of class xml2::xml_node)
#'@param xpath_expr An XPath expression (for subclass differentiation)
#'@param subclasses_names Optional: A character vector containing the names of
#' r2ogs6 subclasses (r2ogs6 classes without a method for input_add)
node_to_r2ogs6_obj <- function(xml_node,
xpath_expr,
subclasses_names = character()){
assertthat::assert_that(class(xml_node) == "xml_node")
parameter_nodes <- xml2::xml_children(xml_node)
parameters <- list()
if(length(xml2::xml_attrs(xml_node)) != 0){
parameters <- c(parameters, xml2::xml_attrs(xml_node))
}
for(i in seq_along(parameter_nodes)){
new_xpath_expr <- paste0(xpath_expr,
"/",
xml2::xml_name(parameter_nodes[[i]]))
#Guess R representation of node, add it to parameter list
parameters <- c(parameters, list(guess_structure(parameter_nodes[[i]],
new_xpath_expr,
subclasses_names)))
#Name parameter after the xml_node child name
names(parameters)[[length(parameters)]] <-
xml2::xml_name(parameter_nodes[[i]])
}
class_name <- ""
#If node represented by subclass, get class name
if(xml2::xml_name(xml_node) %in% names(subclasses_names)){
class_name <- paste0(subclasses_names[[xml2::xml_name(xml_node)]])
class_name <- select_fitting_subclass(xpath_expr, subclasses_names)
#Else assume class name is r2ogs6_ + node name
}else{
class_name <- paste0("r2ogs6_", xml2::xml_name(xml_node))
}
ordered_parameters <- order_parameters(parameters, class_name)
#Construct the call to the r2ogs6_object helper
class_constructor_call <-
paste0(class_name,
"(",
paste(
names(parameters),
lapply(parameters,
function(x) {
paste(utils::capture.output(dput(x)),
collapse = "\n")
}),
sep = " = ",
collapse = ", "
),
")")
#Evaluate the constructed call
r2ogs6_obj <- eval(parse(text = class_constructor_call))
return(invisible(r2ogs6_obj))
}
#'order_parameters
#'@description Orders a list of parameters corresponding to the argument order
#' of a class
#'@param parameter_list A list of parameters
#'@param class_name The name of a class
order_parameters <- function(parameter_list, class_name){
assertthat::assert_that(is.list(parameter_list))
assertthat::assert_that(assertthat::is.string(class_name))
ordered_parameters <- list()
#Gets the class parameters in the correct order
class_args <- names(as.list(formals(class_name)))
#Check for length mismatches
if(length(parameter_list) > length(class_args)){
stop(paste0("order_parameters: More parameters in parameter_list",
"than parameters in definition of class '", class_name,
"'. Please check the class definition!"), call. = FALSE)
}
#Check for value mismatches
for(i in seq_len(length(parameter_list))){
if(!names(parameter_list)[[i]] %in% class_args){
stop(paste0("order_parameters: Found element named '",
names(parameter_list)[[i]],
"', in parameter_list.",
" This element is not a parameter of class '",
class_name,
"'. Please check the class definition!"),
call. = FALSE)
}
}
for(i in seq_len(length(class_args))){
if(!class_args[[i]] %in% names(parameter_list)){
ordered_parameters[[class_args[[i]]]] <- NULL
}else{
ordered_parameters[[class_args[[i]]]] <-
parameter_list[[class_args[[i]]]]
}
}
return(invisible(ordered_parameters))
}
#===== GUESS STRUCTURE FUNCTIONALITY =====
#'guess_structure
#'@description Guesses the R representation of an XML node and adds it to
#' parameter list
#'ASSUMPTIONS:
#'1) Leaf nodes will have EITHER a value OR attributes (and will not be missing
#' both, e.g. '<a/>').
#'2) Leaf nodes will never be r2ogs6_* objects
#'3) If there are multiple occurrences of r2ogs6_* class (and subclass)
#' elements on the same level, they have a wrapper node as their parent
#' (e.g. <processes>, <properties>) which will contain ONLY elements of this
#' type
#'4) Wrapper nodes are represented as lists
#'5) Parent nodes whose children have no children are represented as lists
#'@param xml_node An XML node (of class xml2::xml_node)
#'@param xpath_expr An XPath expression (for subclass differentiation)
#'@param subclasses_names Optional: A character vector containing the names of
#' r2ogs6 subclasses (r2ogs6 classes without a method for input_add)
guess_structure <- function(xml_node,
xpath_expr,
subclasses_names = character()){
assertthat::assert_that(class(xml_node) == "xml_node")
#Node is a leaf node
if(length(xml2::xml_children(xml_node)) == 0){
if(xml2::xml_text(xml_node) != ""){
return(invisible(xml2::xml_text(xml_node)))
}else{
return(invisible(xml2::xml_attrs(xml_node)))
}
#Node is represented by subclass
}else if(xml2::xml_name(xml_node) %in% names(subclasses_names)){
return(invisible(node_to_r2ogs6_obj(xml_node,
xpath_expr,
subclasses_names)))
#Node has children that are represented by a subclass
}else if(is_subclass_wrapper(xml_node, subclasses_names) ||
is_het_wrapper(xml_node, subclasses_names)){
wrapper_list <- list()
for(i in seq_along(xml2::xml_children(xml_node))){
child_node <- xml2::xml_children(xml_node)[[i]]
list_content <- NULL
new_xpath_expr <- paste0(xpath_expr,
"/",
xml2::xml_name(child_node))
if(xml2::xml_name(child_node) %in% names(subclasses_names)){
list_content <- node_to_r2ogs6_obj(child_node,
new_xpath_expr,
subclasses_names)
}else{
list_content <- guess_structure(child_node)
}
wrapper_list <- c(wrapper_list, list(list_content))
}
return(invisible(wrapper_list))
#Return values for parent nodes whose children have no children
}else if(all(get_grandchild_length_vector(xml_node) == 0)){
return(invisible(list_from_nodeset(xml2::xml_children(xml_node))))
#If the children of the parent node do have children, look for custom function
}else{
read_in_func_call <- find_read_in_func_call(xml2::xml_name(xml_node))
if(read_in_func_call != ""){
obj <- eval(parse(text = read_in_func_call))
return(invisible(obj))
#If the node does NOT have its own function, abort mission
}else{
stop(paste0("Could not guess the structure of node with name '",
xml2::xml_name(xml_node), "', please read the ",
"documentation for guess_structure() and which ",
"assumptions it is based on."), call. = FALSE)
}
}
}
is_subclass_wrapper <- function(xml_node, subclasses_names){
child_nodes <- xml2::xml_children(xml_node)
for(i in seq_along(child_nodes)){
if(!xml2::xml_name(child_nodes[[i]]) %in% names(subclasses_names)){
return(invisible(FALSE))
}
}
return(invisible(TRUE))
}
#'is_het_wrapper
#'@description Tests if a node is a heterogenous wrapper
#'@param xml_node An XML node (of class xml2::xml_node)
#'@param subclasses_names Optional: A character vector containing the names of
#' r2ogs6 subclasses (r2ogs6 classes without a method for input_add)
is_het_wrapper <- function(xml_node, subclasses_names = character()){
child_nodes <- xml2::xml_children(xml_node)
leaves_found <- FALSE
parent_found <- FALSE
for(i in seq_along(child_nodes)){
if(length(xml2::xml_children(child_nodes[[i]])) != 0 &&
!xml2::xml_name(child_nodes[[i]]) %in% names(subclasses_names) &&
find_read_in_func_call(xml2::xml_name(child_nodes[[i]])) == ""){
return(invisible(FALSE))
}
if(length(xml2::xml_children(child_nodes[[i]])) == 0){
leaves_found <- TRUE
}
if(xml2::xml_name(child_nodes[[i]]) %in% names(subclasses_names) ||
find_read_in_func_call(xml2::xml_name(child_nodes[[i]])) != ""){
parent_found <- TRUE
}
}
if(leaves_found && parent_found){
return(invisible(TRUE))
}
return(invisible(FALSE))
}
#'get_grandchild_length_vector
#'@description Helper function to check the number of children of children of
#' an XML node (i.e. grandchildren)
#'@param xml_node An XML node
#'@return A numeric vector containing the number of children of each child of
#' xml_node
get_grandchild_length_vector <- function(xml_node){
length_vector <- c()
for(i in seq_along(xml2::xml_children(xml_node))){
child_node <- xml2::xml_children(xml_node)[[i]]
length_vector <- c(length_vector,
length(xml2::xml_children(child_node)))
}
return(invisible(length_vector))
}
#'list_from_nodeset
#'@description Creates a named vector from a nodeset
#'@param xml_nodeset An XML nodeset (of class xml2::xml_nodeset)
list_from_nodeset <- function(xml_nodeset){
assertthat::assert_that(class(xml_nodeset) == "xml_nodeset")
my_list <- list()
for(i in seq_along(xml_nodeset)){
if(xml2::xml_text(xml_nodeset[[i]]) != ""){
my_list <- c(my_list, list(xml2::xml_text(xml_nodeset[[i]])))
}else{
my_list <- c(my_list, list(xml2::xml_attrs(xml_nodeset[[i]])))
}
names(my_list)[[length(my_list)]] <- xml2::xml_name(xml_nodeset[[i]])
}
return(invisible(my_list))
}
#===== FIND CUSTOM READ IN FUNCTIONS UTILITY =====
#'find_read_in_func_call
#'@description Checks if a custom read_in function was defined for nodes
#' with the given name
#'@param node_name The name of an XML node
#'@return A ready-to-call string if a corresponding function was found, an
#' empty string otherwise
find_read_in_func_call <- function(node_name){
assertthat::assert_that(assertthat::is.string(node_name))
func_name <- paste0("read_in_", node_name, "_node")
if(exists(func_name, where=asNamespace("r2ogs6"), mode="function")){
func_call <- paste0(func_name, "(xml_node)")
return(invisible(func_call))
}
return(invisible(""))
}
#===== FILE HANDLING UTILITY =====
#'check_file_extension
#'@description Helper function to check the extension of a file
#'@param file A file
#'@param expected_extension The expected file extension
check_file_extension <- function(file, expected_extension){
assertthat::assert_that(assertthat::is.string(file))
assertthat::assert_that(assertthat::is.string(expected_extension))
if(tools::file_ext(file) != expected_extension){
stop(paste("File must have extension", expected_extension),
call. = FALSE)
}
}
#Source: https://stackoverflow.com/questions/48218491/os-independent-way-to-
# select-directory-interactively-in-r/48296736
#Helper function for choosing a directory (platform independent!)
choose_directory = function(ini_dir = getwd(),
caption = 'Select data directory') {
if (exists('utils::choose.dir')) {
utils::choose.dir(default = ini_dir, caption = caption)
} else {
tcltk::tk_choose.dir(default = ini_dir, caption = caption)
}
}