diff --git a/scripts/doc/append-xml-tags.py b/scripts/doc/append-xml-tags.py
new file mode 100755
index 0000000000000000000000000000000000000000..2b242e3ced4ddc88c55daf9a953d820b66e34c18
--- /dev/null
+++ b/scripts/doc/append-xml-tags.py
@@ -0,0 +1,169 @@
+#!/usr/bin/python
+
+# prevent broken pipe error
+from signal import signal, SIGPIPE, SIG_DFL
+signal(SIGPIPE,SIG_DFL)
+
+import os
+import xml.etree.cElementTree as ET
+import argparse
+
+github_src_url  = "https://github.com/ufz/ogs/tree/master"
+github_data_url = "https://github.com/ufz/ogs-data/tree/master"
+
+parser = argparse.ArgumentParser(description="Print XML tags")
+
+parser.add_argument("ext",       help="Extension of files to consider")
+parser.add_argument("datadir",   help="data directory")
+parser.add_argument("docauxdir", help="directory of auxiliary doc files")
+
+args = parser.parse_args()
+extension = '.' + args.ext
+datadir   = os.path.abspath(args.datadir)
+docauxdir = os.path.abspath(args.docauxdir)
+docdir    = os.path.join(docauxdir, "dox", "ProjectFile")
+
+tag_path_expansion_table = {
+    "initial_condition":  "process_variables.process_variable.initial_condition",
+    "boundary_condition": "process_variables.process_variable.boundary_conditions.boundary_condition",
+    "linear_solver":      "linear_solvers.linear_solver",
+    "process":            "processes.process",
+    "parameter":          "parameters.parameter",
+    "prj": "",
+}
+
+# maps tags to the set of xml files they appear in
+dict_tag_files = dict()
+
+# maps tags to additional parameter info obtained prior to this script
+dict_tag_info = dict()
+
+def dict_of_set_append(dict_, key, value):
+    if key in dict_:
+        dict_[key].add(value)
+    else:
+        dict_[key] = set((value,))
+
+def dict_of_list_append(dict_, key, value):
+    if key in dict_:
+        dict_[key].append(value)
+    else:
+        dict_[key] = [value]
+
+
+def print_tags(node, path, level, filepath):
+    global dict_tag_files
+
+    tag = node.tag
+    if level>1: # skip root node
+        tagpath = path + "." + tag
+    else:
+        tagpath = tag
+
+    if level>0: # skip root node
+        dict_of_set_append(dict_tag_files, (True, tagpath), filepath)
+        for k in node.attrib:
+            dict_of_set_append(dict_tag_files, (False, tagpath + "." + k), filepath)
+
+    for child in node:
+        print_tags(child, tagpath, level + 1, filepath)
+
+# gather info from xml files
+for (dirpath, _, filenames) in os.walk(datadir):
+    for f in filenames:
+        if not f.endswith(extension): continue
+
+        filepath = os.path.join(dirpath, f)
+        xmlroot = ET.parse(filepath).getroot()
+        print_tags(xmlroot, "", 0, filepath[len(datadir)+1:])
+
+if False:
+    first = True
+    for (tag, files) in sorted(dict_tag_files.items()):
+        if first:
+            first = False
+        else:
+            print()
+
+        print("T |" if tag[0] else "A |", tag[1])
+        for f in sorted(files):
+            print("   ", f)
+
+# read parameter cache
+with open(os.path.join(docauxdir, "documented-parameters-cache.txt")) as fh:
+    for line in fh:
+        line = line.strip().split("@@@")
+        if line[0] == "OK":
+            tagpath = line[3]
+            dict_of_list_append(dict_tag_info, tagpath, line)
+
+# traverse dox file hierarchy
+for (dirpath, _, filenames) in os.walk(docdir):
+    reldirpath = dirpath[len(docdir)+1:]
+    istag = True
+
+    for f in filenames:
+        if not f.endswith(".dox"): continue
+
+        if f.startswith("i_") or f.startswith("c_"):
+            tagpath = reldirpath
+        elif f.startswith("t_"):
+            tagpath = os.path.join(reldirpath, f[2:-len(".dox")])
+            istag = True
+        elif f.startswith("a_"):
+            tagpath = os.path.join(reldirpath, f[2:-len(".dox")])
+            istag = False
+
+        tagpath = tagpath.replace(os.sep, ".")
+
+        path = os.path.join(dirpath, f)
+        with open(path, "a") as fh:
+            # TODO this can currently only expand the top level
+            tagpathparts = tagpath.split(".")
+            if tagpathparts[0] in tag_path_expansion_table:
+                tagpathhead = tag_path_expansion_table[tagpathparts[0]]
+            else:
+                tagpathhead = "NONEXISTENT"
+            tagpath_expanded = ".".join((tagpathhead, *tagpathparts[1:])).lstrip(".")
+
+            if tagpath:
+                fh.write("\n\n# Additional info\n")
+                if tagpath in dict_tag_info:
+                    for info in dict_tag_info[tagpath]:
+                        path = info[1]; line = info[2]
+                        fh.write(("\n## From {0} line {1}\n\n")
+                                .format(path, line))
+
+                        method = info[6]
+                        if method.endswith("Optional"):
+                            fh.write("- This is an optional parameter.\n")
+                        elif method.endswith("List"):
+                            fh.write("- This parameter can be given arbitrarily many times.\n")
+                        elif method: # method not empty
+                            fh.write("- This is a required parameter.\n")
+
+                        datatype = info[5]
+                        if datatype: fh.write("- Data type: <tt>{}</tt>\n".format(datatype))
+
+                        fh.write("- Expanded tag path: {}\n".format(tagpath_expanded))
+
+                        fh.write("- Go to source code: [&rarr; ufz/ogs/master]({2}/{0}#L{1})\n"
+                                .format(path, line, github_src_url))
+                else:
+                    fh.write("\nNo additional info.\n")
+
+            if tagpath_expanded:
+                fh.write("\n\n# Used in the following test data files\n\n")
+                try:
+                    datafiles = dict_tag_files[(istag, tagpath_expanded)]
+
+                    for df in sorted(datafiles):
+                        fh.write("- \\[[&rarr; ogs-data/master]({1}/{0})\\]&emsp;{0}\n"
+                                .format(df, github_data_url))
+                except KeyError:
+                    fh.write("Used in no end-to-end test cases.\n")
+            else:
+                # no additional output for the main doc page
+                pass
+
+            fh.write("\n*/\n")
diff --git a/scripts/doc/check-project-params.py b/scripts/doc/check-project-params.py
new file mode 100755
index 0000000000000000000000000000000000000000..e7e7781bd1cbdd5e851bcd495eb75fc33f53e886
--- /dev/null
+++ b/scripts/doc/check-project-params.py
@@ -0,0 +1,103 @@
+#!/usr/bin/python
+
+import sys
+import re
+import os.path
+
+github_src_url = "https://github.com/ufz/ogs/tree/master"
+
+def debug(msg):
+    sys.stderr.write(msg+"\n")
+
+if len(sys.argv) != 2:
+    print("USAGE: {} DOCAUXDIR".format(sys.argv[0]))
+    sys.exit(1)
+
+docauxdir = sys.argv[1]
+if not os.path.isdir(docauxdir):
+    print("error: `{}' is not a directory".format(docauxdir))
+    sys.exit(1)
+
+undocumented = []
+unneeded_comments = []
+wrong_input = []
+no_doc_page = []
+
+for inline in sys.stdin:
+    inline = inline.strip().split("@@@")
+    status = inline[0]
+
+    if status == "OK":
+        tag_path_comment = inline[3]
+        tag_name_comment = tag_path_comment.split(".")[-1]
+
+        dirs = tag_path_comment.split(".")[:-1]
+        p = os.path.join(docauxdir, *dirs, )
+        if     (not os.path.isfile(os.path.join(p,                   "t_" + tag_name_comment + ".dox"))) \
+           and (not os.path.isfile(os.path.join(p,                   "a_" + tag_name_comment + ".dox"))) \
+           and (not os.path.isfile(os.path.join(p, tag_name_comment, "i_" + tag_name_comment + ".dox"))) \
+           and (not os.path.isfile(os.path.join(p, tag_name_comment, "c_" + tag_name_comment + ".dox"))) :
+            no_doc_page.append((tag_path_comment, inline[1], inline[2]))
+
+    elif status == "WRONGIN":
+        wrong_input.append(inline[1:])
+    elif status == "NODOC":
+        method = inline[6]
+        # ignored parameters need not be documented
+        if not method.startswith("ignore"):
+            undocumented.append(inline[1:])
+    elif status == "UNNEEDED":
+        unneeded_comments.append(inline[1:])
+    elif status == "SPECIAL":
+        debug("SPECIAL: " + " ".join(inline[1:])) # TODO implement proper handling
+        # unneeded.append(inline[1:])
+    else:
+        debug("ERROR: unrecognized status {}".format(status))
+
+
+if (undocumented):
+    print()
+    print("# Undocumented parameters")
+    print("| File | Line | Parameter | Type | Method | Link |")
+    print("| ---- | ---: | --------- | ---- | ------ | ---- |")
+    for u in sorted(undocumented):
+        print(("| {0} | {1} | {3} | <tt>{4}</tt> | <tt>{5}</tt> "
+            + "| [&rarr; ufz/ogs/master]({6}/{0}#L{1})").format(*u, github_src_url))
+
+if (unneeded_comments):
+    print()
+    print("# Comments not documenting anything")
+    print("| File | Line | Comment | Link |")
+    print("| ---- | ---: | ------- | ---- |")
+    for u in sorted(unneeded_comments):
+        u2 = list(u)
+        u2[2] = re.sub(r'([\\@&$#<>%".|])', r"\\\1", u2[2])
+        print(("| {0} | {1} | {2} "
+            + "| [&rarr; ufz/ogs/master]({3}/{0}#L{1}) |").format(*u2, github_src_url))
+
+if (wrong_input):
+    print()
+    print("# Lines of input to that script that have not been recognized")
+    print("| File | Line | Content | Link |")
+    print("| ---- | ---: | ------- | ---- |")
+    for w in sorted(wrong_input):
+        w2 = list(w)
+        w2[2] = re.sub(r'([\\@&$#<>%".|])', r"\\\1", w2[2])
+        print(("| {0} | {1} | {2} "
+            + "| [&rarr; ufz/ogs/master]({3}/{0}#L{1}) |").format(*w2, github_src_url))
+
+if (no_doc_page):
+    print()
+    print("# No documentation page")
+    print("| Parameter | File | Line | Link |")
+    print("| --------- | ---- | ---: | ---- |")
+    for n in sorted(no_doc_page):
+        print(("| {0} | {1} | {2} "
+            + "| [&rarr; ufz/ogs/master]({3}/{1}#L{2}) |").format(*n, github_src_url))
+
+# exit with error status if something was not documented.
+if (not not undocumented) or (not not unneeded_comments) \
+        or (not not wrong_input) or (not not no_doc_page):
+            sys.exit(1)
+
+sys.exit(0)
diff --git a/scripts/doc/create-docu-file-stubs.sh b/scripts/doc/create-docu-file-stubs.sh
new file mode 100755
index 0000000000000000000000000000000000000000..5a38fada92ee53de94ed880d8b1f408e397083ba
--- /dev/null
+++ b/scripts/doc/create-docu-file-stubs.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+
+# expect input from get-project-params.sh
+
+base="Documentation/ProjectFile"
+
+while IFS=":" read -r fn lno content; do
+    [ "$content" = "${content#*//!}" ] && continue
+    tag_name="$(echo "$content" \
+        | sed -n -e 'sX^\s*//! \\ogs_file_\(param\|attr\)\(_special\)\?{\([A-Za-z_0-9]\+\)}$X\1 \3Xp')"
+    [ -z "$tag_name" ] && continue
+    param_or_attr="${tag_name%% *}"
+    tag_name="${tag_name#* }"
+    tag_name="${tag_name//__/\/}"
+    echo "$param_or_attr $base/$tag_name"
+done \
+| sort -r \
+| while read param_or_attr path; do
+    dn="`dirname "$path"`"
+    bn="`basename "$path"`"
+    # echo "$param_or_attr $path"
+
+    if [ ! -d "$dn" ]; then
+        mkdir -p "$dn"
+
+        bdn="`basename "$dn"`"
+        if [ "`expr match "$bdn" '^[A-Z]'`" -eq 0 ] && [ ! -f "$dn/i_$bdn.md" ]; then
+            echo "creating $dn/i_$bdn.md"
+            echo '\todo document' >"$dn/i_$bdn.md"
+        elif [ "`expr match "$bdn" '^[A-Z]'`" -ne 0 ] && [ ! -f "$dn/c_$bdn.md" ]; then
+            echo "creating $dn/c_$bdn.md"
+            echo '\todo document' >"$dn/c_$bdn.md"
+        fi
+    fi
+
+    if [ -d "$path" ]; then
+        if [ "`expr match "$bn" '^[A-Z]'`" -eq 0 ] && [ ! -f "$path/i_$bn.md" ]; then
+            echo "creating $path/i_$bn.md"
+            echo '\todo document' >"$path/i_$bn.md"
+        elif [ "`expr match "$bn" '^[A-Z]'`" -ne 0 ] && [ ! -f "$path/c_$bn.md" ]; then
+            echo "creating $path/c_$bn.md"
+            echo '\todo document' >"$path/c_$bn.md"
+        fi
+    elif [ "$param_or_attr" = param ] && [ ! -f "$dn/t_$bn.md" ]; then
+        echo "creating $dn/t_$bn.md"
+        echo '\todo document' >"$dn/t_$bn.md"
+    elif [ "$param_or_attr" = attr ] && [ ! -f "$dn/a_$bn.md" ]; then
+        echo "creating $dn/a_$bn.md"
+        echo '\todo document' >"$dn/a_$bn.md"
+    # else
+    #     echo "OK $path"
+    fi
+
+    # if [ -d "$path" ] && [ -f "$path.md" ]; then
+    #     echo "ERROR: both $path and $path.md exist!" >&2
+    # fi
+done
diff --git a/scripts/doc/generate-project-file-doc-qa.sh b/scripts/doc/generate-project-file-doc-qa.sh
new file mode 100644
index 0000000000000000000000000000000000000000..77c3a226346a0c865af4e0fd0c5b49370d3596e7
--- /dev/null
+++ b/scripts/doc/generate-project-file-doc-qa.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+echo "======== $@"
+
+if [ $# -ne 3 ]; then
+    echo "USAGE: $0 SRCDIR BUILDDIR DATADIR" >&2
+    exit 1
+fi
+
+srcdir="$1"
+builddir="$2"
+datadir="$3"
+
+docauxdir="$builddir/DocAux"
+doxdir="$docauxdir/dox"
+toolsdir="$srcdir/scripts/doc"
+
+param_cache="$docauxdir/documented-parameters-cache.txt"
+
+qafile="$doxdir/project-file-doc-qa.dox"
+check_quality_script="$toolsdir/check-project-params.py"
+
+mkdir -p "$doxdir"
+
+# gather information about documented parameters
+"$toolsdir/get-project-params.sh" "$srcdir" \
+    | "$toolsdir/normalize-param-cache.py" >"$param_cache"
+
+# write QA information
+cat <<"EOF" >"$qafile"
+/*! \page project_file_doc_qa OGS Input File Parameters&mdash;Quality Assurance
+
+This is the QA page
+
+EOF
+
+cat "$param_cache" | "$check_quality_script" "$doxdir/ProjectFile" >>"$qafile"
+
+cat <<EOF >>"$qafile"
+
+*/
+EOF
+
+# finish parameter documentation dox files
+"$toolsdir/append-xml-tags.py" prj "$datadir" "$docauxdir"
diff --git a/scripts/doc/get-project-params.sh b/scripts/doc/get-project-params.sh
new file mode 100755
index 0000000000000000000000000000000000000000..09cd9e99e90102c056f5da5bbff296f1c40f8cc4
--- /dev/null
+++ b/scripts/doc/get-project-params.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+if [ $# -ne 1 ]; then
+    echo "USAGE: ${0##*/} SRCDIR" >&2
+    exit 1
+fi
+
+srcdir="`readlink -f "$1"`"
+
+#color="--color=always"
+color=""
+
+cat <<"EOF" \
+| grep -r $srcdir \
+    --include '*.h' \
+    --include '*.cpp' \
+    --exclude-dir '.git' \
+    --exclude-dir 'Tests' \
+    --exclude 'ConfigTree*.*' \
+    -f - -r -n -o $color \
+| cut -c $((`expr length "$srcdir"` + 2))-
+^\s*//! \\ogs_file_\(param\|attr\){[A-Za-z_0-9]\+}\( \\todo .*\)\?$
+^\s*//! \\ogs_file_special$
+^\s*//! \\ogs_file_\(param\|attr\)_special{[A-Za-z_0-9]\+}\( \\todo .*\)\?$
+checkConfParam.*)
+getConfAttribute.*)
+getConfParam.*)
+getConfSubtree.*)
+ignoreConfAttribute.*)
+ignoreConfParam.*)
+peekConfParam.*)
+EOF
+
+# format as table:
+# | sed -e 's_::_@@_g' -e's_:\s\+_:_' | column -t -s: | sed -e 's_@@_::_g'
diff --git a/scripts/doc/get-undocumented-project-params.py b/scripts/doc/get-undocumented-project-params.py
new file mode 100755
index 0000000000000000000000000000000000000000..238b0bbddccd4c70ef26da41b48ba0976189db1f
--- /dev/null
+++ b/scripts/doc/get-undocumented-project-params.py
@@ -0,0 +1,43 @@
+#!/usr/bin/python
+
+# expect input from get-project-params.sh
+
+import sys
+import subprocess
+
+print_next = True
+
+old_fn = None
+undoc_lnos = []
+
+def add_doc_stubs(fn, lnos):
+    if not lnos: return
+
+    print(fn, lnos)
+    cmd = ["sed", "-i"]
+    for lno in lnos:
+        cmd.append("-e")
+        cmd.append(str(lno) + r""" i \
+//! \\ogs_file_param{todo_document_parameter} \\todo project_file_docu
+//! \\ogs_file_param{todo_document_parameter} \\todo project_file_docu
+""")
+    cmd.append(fn)
+    subprocess.run(cmd)
+    del lnos[:]
+
+
+for line in sys.stdin:
+    fn, l, content = line.split(maxsplit=2)
+    if fn != old_fn:
+        add_doc_stubs(old_fn, undoc_lnos)
+        old_fn = fn
+
+    if content.startswith("//!"):
+        print_next = False
+    elif print_next:
+        # print(line.rstrip())
+        undoc_lnos.append(l)
+    else:
+        print_next = True
+
+add_doc_stubs(old_fn, undoc_lnos)
diff --git a/scripts/doc/normalize-param-cache.py b/scripts/doc/normalize-param-cache.py
new file mode 100755
index 0000000000000000000000000000000000000000..acd524bdabd60ff9ce3c07f8fdf46aec44f7c341
--- /dev/null
+++ b/scripts/doc/normalize-param-cache.py
@@ -0,0 +1,130 @@
+#!/usr/bin/python
+
+import sys
+import re
+import os.path
+
+def debug(msg):
+    sys.stderr.write(msg+"\n")
+
+def write_out(*args):
+    print("@@@".join([str(a) for a in args]))
+
+# capture #1 is the parameter path
+comment = re.compile(r"^//! \\ogs_file_(param|attr)\{([A-Za-z_0-9]+)\}( \\todo .*)?$")
+comment_special = re.compile(r"^//! \\ogs_file(_param|_attr)?_special(\{[A-Za-z_0-9]+\})?( \\todo .*)?$")
+#comment_special = re.compile(r"^//! \\ogs_file_special$")
+
+# capture #5 is the parameter name
+getter = re.compile(r'^(get|check|ignore|peek)Conf(Param|Attribute|Subtree)(List|Optional|All)?'
+                   +r'(<.*>)?'
+                   +r'\("([a-zA-Z_0-9:]+)"[,)]')
+
+getter_special = re.compile(r'^(get|check|ignore|peek)Conf(Param|Attribute|Subtree)(List|Optional|All)?'
+                           +r'(<.*>)?\(')
+
+state = "getter"
+path = ""
+lineno = 0
+line = ""
+tag_path_comment = ""
+param_or_attr_comment = ""
+
+for inline in sys.stdin:
+    oldpath = path; oldlineno = lineno; oldline = line
+
+    path, lineno, line = inline.split(":", 2)
+
+    if path != oldpath: debug(path)
+
+    line = line.strip()
+    lineno = int(lineno)
+
+    m = comment.fullmatch(line)
+    if m:
+        if state != "getter":
+            write_out("UNNEEDED", oldpath, oldlineno, oldline)
+        state = "comment"
+
+        param_or_attr_comment = m.group(1)
+        tag_path_comment = m.group(2).replace("__", ".")
+        debug(" {:>5}  //! {}".format(lineno, tag_path_comment))
+        tag_name_comment = tag_path_comment.split(".")[-1]
+
+        continue
+
+    m = comment_special.fullmatch(line)
+    if m:
+        if state != "getter":
+            write_out("UNNEEDED", oldpath, oldlineno, oldline)
+        state = "comment"
+        param_or_attr_comment = "special"
+
+        if m.group(1): # param|attr matched
+            # second group must not be empty!
+            tag_path_comment = m.group(2).strip("{}").replace("__", ".")
+            param = tag_path_comment.split(".")[-1]
+            paramtype = ""
+            method = ""
+            write_out("OK", path, lineno, tag_path_comment, param, paramtype, method)
+            state = "getter" # reset state s.t. next time a comment is accepted
+
+        continue
+
+    m = getter.match(line)
+    if m:
+        param = m.group(5)
+        paramtype = m.group(4)[1:-1] if m.group(4) else ""
+        method = m.group(1) + "Conf" + m.group(2) + (m.group(3) or "")
+
+        if state != "comment" or oldpath != path:
+            write_out("NODOC", path, lineno, "NONE", param, paramtype, method)
+        else:
+            debug(" {:>5}  {} {} ".format(lineno, param, paramtype))
+
+            if param != tag_name_comment:
+                debug("error: parameter name from comment and code do not match: "
+                        + tag_name_comment + " vs. " + param)
+                write_out("NODOC", path, lineno, tag_path_comment, param, paramtype, method)
+            elif lineno != oldlineno+1:
+                debug("error: the associated comment is not on the line preceding this one."
+                        + " line numbers {} vs. {}".format(oldlineno, lineno))
+                write_out("NODOC", path, lineno, tag_path_comment, param, paramtype, method)
+            elif param_or_attr_comment == "param" and m.group(2) != "Param" and m.group(2) != "Subtree":
+                debug("error: comment says param but code says different.")
+                write_out("NODOC", path, lineno, tag_path_comment, param, paramtype, method)
+            elif param_or_attr_comment == "attr" and m.group(2) != "Attribute":
+                debug("error: comment says attr but code says different.")
+                write_out("NODOC", path, lineno, tag_path_comment, param, paramtype, method)
+            elif param_or_attr_comment == "special":
+                debug("error: comment comments a special line.")
+                write_out("NODOC", path, lineno, "UNKNOWN", "UNKNOWN", paramtype, method)
+            else:
+                write_out("OK", path, lineno, tag_path_comment, param, paramtype, method)
+
+        state = "getter"
+        continue
+
+    m = getter_special.match(line)
+    if m:
+        paramtype = m.group(4)[1:-1] if m.group(4) else ""
+        method = m.group(1) + "Conf" + m.group(2) + (m.group(3) or "")
+
+        if state != "comment" or oldpath != path:
+            write_out("NODOC", path, lineno, "NONE", "UNKNOWN", paramtype, method)
+        else:
+            if lineno != oldlineno+1:
+                debug("error: the associated comment is not on the line preceding this one."
+                        + " line numbers {} vs. {}".format(oldlineno, lineno))
+                write_out("NODOC", path, lineno, "UNKNOWN", "UNKNOWN", paramtype, method)
+            elif param_or_attr_comment != "special":
+                debug("error: comment does not comment a special line.")
+                write_out("NODOC", path, lineno, "UNKNOWN", "UNKNOWN", paramtype, method)
+            else:
+                write_out("SPECIAL", path, lineno, paramtype, method)
+
+        state = "getter"
+        continue
+
+    write_out("WRONGIN", path, lineno, line.strip())
+    state = "getter" # reset state in order to avoid warnings
diff --git a/scripts/doc/print-xml-tags.py b/scripts/doc/print-xml-tags.py
new file mode 100755
index 0000000000000000000000000000000000000000..a4831ea0a37a0de2375a6bb09ad58f68f18ef854
--- /dev/null
+++ b/scripts/doc/print-xml-tags.py
@@ -0,0 +1,67 @@
+#!/usr/bin/python
+
+# prevent broken pipe error
+from signal import signal, SIGPIPE, SIG_DFL
+signal(SIGPIPE,SIG_DFL)
+
+import os
+
+import xml.etree.cElementTree as ET
+
+import argparse
+
+parser = argparse.ArgumentParser(description="Print XML tags")
+
+parser.add_argument("ext",  help="Extension of files to consider")
+parser.add_argument("path", help="Top level directory of traversal")
+
+args = parser.parse_args()
+rootdir = os.path.abspath(args.path)
+extension = '.' + args.ext
+
+# maps tags to the set of xml files they appear in
+dict_tag_files = dict()
+
+def dict_of_set_append(dict_, key, value):
+    if key in dict_:
+        dict_[key].add(value)
+    else:
+        dict_[key] = set((value,))
+
+
+def print_tags(node, path, level, filepath):
+    global dict_tag_files
+
+    tag = node.tag
+    if level>1: # skip root node
+        tagpath = path + "." + tag
+    else:
+        tagpath = tag
+
+    if level>0: # skip root node
+        dict_of_set_append(dict_tag_files, "T | " + tagpath, filepath)
+        for k in node.attrib:
+            dict_of_set_append(dict_tag_files, "A | " + tagpath + "." + k, filepath)
+
+    for child in node:
+        print_tags(child, tagpath, level + 1, filepath)
+
+
+for (dirpath, _, filenames) in os.walk(rootdir):
+    for f in filenames:
+        if not f.endswith(extension): continue
+
+        filepath = os.path.join(dirpath, f)
+        xmlroot = ET.parse(filepath).getroot()
+        print_tags(xmlroot, "", 0, filepath[len(rootdir)+1:])
+
+first = True
+for (tag, files) in sorted(dict_tag_files.items()):
+    if first:
+        first = False
+    else:
+        print()
+
+    print(tag)
+    for f in sorted(files):
+        print("   ", f)