diff --git a/Makefile b/Makefile
index 024cd745..5832e279 100644
--- a/Makefile
+++ b/Makefile
@@ -12,9 +12,10 @@ MAKEFLAGS += --warn-undefined-variables
 build:
 	mkdir build
 
-.PHONY: doc time test sqlite_test pg_test
-.PHONY: api_test sqlite_api_test pg_qpi_test
-.PHONY: random_test_data random_test sqlite_random_test pg_random_test
+.PHONY: doc readme valve_debug valve_release test sqlite_test pg_test api_test sqlite_api_test \
+	pg_qpi_test random_test_data random_test sqlite_random_test pg_random_test guess_test_data \
+	perf_test_data sqlite_perf_test pg_perf_test perf_test
+
 
 doc:
 	cargo doc --document-private-items
@@ -23,13 +24,19 @@ readme:
 	cargo readme --no-title > README.md
 
 valve: src/*.rs src/*.lalrpop
+	@$(MAKE) valve_debug
+
+valve_release:
 	rm -f valve
 	cargo build --release
 	ln -s target/release/ontodev_valve valve
-	# cargo build
-	# ln -s target/debug/ontodev_valve valve
 
-build/valve.db: test/src/table.tsv valve clean | build
+valve_debug:
+	rm -f valve
+	cargo build
+	ln -s target/debug/ontodev_valve valve
+
+build/valve.db: test/src/table.tsv clean valve | build
 	./valve $< $@
 
 test/output:
@@ -37,7 +44,8 @@ test/output:
 
 test: sqlite_test pg_test api_test random_test
 
-tables_to_test = column datatype rule table table1 table2 table3 table4 table5 table6 table7 table8 table9 table10 table11
+tables_to_test = column datatype rule table table1 table2 table3 table4 table5 table6 table7 table8 \
+	table9 table10 table11
 
 sqlite_test: build/valve.db test/src/table.tsv | test/output
 	@echo "Testing valve on sqlite ..."
@@ -93,10 +101,10 @@ random_test_dir = test/random_test_data
 random_test: sqlite_random_test pg_random_test
 
 $(random_test_dir)/ontology:
-	mkdir -p $(random_test_dir)/ontology
+	mkdir -p $@
 
-random_test_data: test/generate_random_test_data.py | $(random_test_dir)/ontology
-	./$< $$(date +"%s") 100 5 $|
+random_test_data: test/generate_random_test_data.py valve valve test/random_test_data/table.tsv | $(random_test_dir)/ontology
+	./$< $$(date +"%s") 100 5 $(word 3,$^) $|
 
 sqlite_random_test: valve clean random_test_data | build test/output
 	@echo "Testing with random data on sqlite ..."
@@ -110,40 +118,66 @@ pg_random_test: valve clean random_test_data | build test/output
 	test/round_trip.sh postgresql:///valve_postgres $(random_test_dir)/table.tsv
 	@echo "Test succeeded!"
 
-test/perf_test_data/ontology: test/generate_random_test_data.py
-	mkdir $@
-	./$< 1 10000 5 $@
+guess_test_dir = test/guess_test_data
+guess_test_db = build/valve_guess.db
+
+$(guess_test_dir)/table1.tsv: test/generate_random_test_data.py valve $(guess_test_dir)/*.tsv
+	./$< 0 30000 5 $(guess_test_dir)/table.tsv $(guess_test_dir)
+
+$(guess_test_dir)/ontology:
+	mkdir -p $@
+
+guess_test_data: test/generate_random_test_data.py $(guess_test_dir)/table1.tsv valve confirm_overwrite.sh $(guess_test_dir)/*.tsv | $(guess_test_dir)/ontology
+	./confirm_overwrite.sh $(guess_test_dir)/ontology
+	rm -f $(guess_test_dir)/table1.tsv
+	./$< 0 30000 5 $(guess_test_dir)/table.tsv $(guess_test_dir)
+	rm -f $(guess_test_dir)/ontology/*.tsv
+	./$< 0 30000 5 $(guess_test_dir)/table_expected.tsv $|
+	rm -f $(guess_test_dir)/ontology/table1.tsv
 
-build/valve_perf.db: valve | test/perf_test_data/ontology build
-	@if [ -f $@ ]; \
-	then \
-		echo "'$@' exists but is out of date. To rebuild '$@', run \`make cleanperfdb\`" \
-		"before running \`make $@\`" ; \
-		false; \
-	fi
-	time -p ./$< --verbose test/perf_test_data/table.tsv $@
+$(guess_test_db): valve guess_test_data $(guess_test_dir)/*.tsv | build $(guess_test_dir)/ontology
+	rm -f $@
+	./$< $(guess_test_dir)/table.tsv $@
+
+perf_test_dir = test/perf_test_data
+perf_test_db = build/valve_perf.db
+
+$(perf_test_dir)/ontology:
+	mkdir -p $@
+
+perf_test_data: test/generate_random_test_data.py valve confirm_overwrite.sh $(perf_test_dir)/*.tsv | $(perf_test_dir)/ontology
+	./confirm_overwrite.sh $(perf_test_dir)/ontology
+	rm -f $(perf_test_dir)/ontology/*.tsv
+	./$< $$(date +"%s") 10000 5 $(perf_test_dir)/table.tsv $|
+
+$(perf_test_db): valve perf_test_data $(perf_test_dir)/*.tsv | build $(perf_test_dir)/ontology
+	rm -f $@
+	time -p ./$< --verbose $(perf_test_dir)/table.tsv $@
 
-.PHONY: sqlite_perf_test
 sqlite_perf_test: build/valve_perf.db | test/output
 	time -p scripts/export.py messages $< $| $(tables_to_test)
 
-.PHONY: pg_perf_test
-pg_perf_test: valve test/perf_test_data/ontology | test/output
-	time -p ./$< --verbose test/perf_test_data/table.tsv postgresql:///valve_postgres
+pg_perf_test: valve $(perf_test_dir)/ontology | test/output
+	time -p ./$< --verbose $(perf_test_dir)/table.tsv postgresql:///valve_postgres
 	time -p scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test)
 
-.PHONY: perf_test
 perf_test: sqlite_perf_test pg_perf_test
 
 clean:
-	rm -Rf build/valve.db build/valve_random.db test/output $(random_test_dir)/ontology
+	rm -Rf build/valve.db* build/valve_random.db* test/output $(random_test_dir)/ontology valve
 
-cleanperfdb:
+clean_guess_db:
+	rm -Rf build/valve_guess.db
+
+clean_guess_data:
+	rm -Rf $(guess_test_dir)/table1.tsv $(guess_test_dir)/ontology
+
+clean_perf_db:
 	rm -Rf build/valve_perf.db
 
-cleanperfdata:
-	rm -Rf test/perf_test_data/ontology
+clean_perf_data:
+	rm -Rf $(perf_test_dir)/ontology
 
-cleanall: clean cleanperfdb cleanperfdata
+cleanall: clean clean_perf_db clean_perf_data clean_guess_db clean_guess_data
 	cargo clean
-	rm -Rf valve
+	rm -f valve
diff --git a/confirm_overwrite.sh b/confirm_overwrite.sh
new file mode 100755
index 00000000..aa58cd50
--- /dev/null
+++ b/confirm_overwrite.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env sh
+
+if [ -d $1 -a ! -z "$(ls -A $1)" ]
+then
+	printf "$1 already exists and contains the following files: $(ls -A -m -w 0 $1)\nAre you sure (y/n)? "
+	read enter
+	if [ $enter = 'y' ]
+	then
+		exit 0
+	else
+    echo "Understood. Exiting with error code."
+		exit 1
+	fi
+fi
diff --git a/scripts/guess.py b/scripts/guess.py
new file mode 100755
index 00000000..0f9ab864
--- /dev/null
+++ b/scripts/guess.py
@@ -0,0 +1,524 @@
+#!/usr/bin/env python3
+
+import csv
+import json
+import random
+import re
+import sqlite3
+import subprocess
+import sys
+import time
+
+from copy import deepcopy
+from guess_grammar import grammar, TreeToDict
+
+from argparse import ArgumentParser
+from lark import Lark
+from numbers import Number
+from pathlib import Path
+from pprint import pformat
+from textwrap import dedent
+
+
+SPECIAL_TABLES = ["table", "column", "datatype", "rule", "history", "message"]
+VERBOSE = False
+
+
+def log(message, force=False, suppress_time=False):
+    global VERBOSE
+
+    if force or VERBOSE:
+        if not suppress_time:
+            print(f"{time.asctime()} {message}", file=sys.stderr)
+        else:
+            print(f"{message}", file=sys.stderr)
+
+
+def has_ncolumn(sample, ncolumn):
+    return bool([label for label in sample if sample[label]["normalized"] == ncolumn])
+
+
+def get_random_sample(table, sample_size):
+    # Get the number of rows in the file (we substract 1 for the header row):
+    with open(table, "rb") as f:
+        total_rows = sum(1 for _ in f) - 1
+
+    if total_rows <= sample_size:
+        sample_size = total_rows
+        sample_row_numbers = range(0, total_rows)
+    else:
+        sample_row_numbers = random.sample(range(0, total_rows), sample_size)
+    with open(table) as f:
+        rows = [r for r in csv.DictReader(f, delimiter="\t", quoting=csv.QUOTE_NONE)]
+        sample = {}
+        pattern = re.compile(r"[^0-9a-zA-Z_]+")
+        for i in sample_row_numbers:
+            for label, value in rows[i].items():
+                if label not in sample:
+                    ncolumn = re.sub(pattern, "_", label).casefold().strip("_")
+                    if has_ncolumn(sample, ncolumn):
+                        print(
+                            "The data has more than one column with the normalized name "
+                            f"{ncolumn}"
+                        )
+                        sys.exit(1)
+                    sample[label] = {
+                        "normalized": ncolumn,
+                        "values": [],
+                    }
+                sample[label]["values"].append(value)
+    return sample
+
+
+def get_valve_config(valve_table):
+    result = subprocess.run(["./valve", "--dump_config", valve_table], capture_output=True)
+    if result.returncode != 0:
+        error = result.stderr.decode()
+        output = result.stdout.decode()
+        if output:
+            error = f"{error}\n{output}"
+        print(f"{error}", file=sys.stderr)
+        sys.exit(result.returncode)
+    return json.loads(result.stdout.decode())
+
+
+def get_hierarchy_for_dt(config, primary_dt_name):
+    def get_parents(dt_name):
+        datatypes = []
+        if dt_name is not None:
+            datatype = config["datatype"][dt_name]
+            if datatype["datatype"] != primary_dt_name:
+                datatypes.append(datatype)
+            datatypes += get_parents(datatype.get("parent"))
+        return datatypes
+
+    return [config["datatype"][primary_dt_name]] + get_parents(primary_dt_name)
+
+
+def get_dt_hierarchies(config):
+    """
+    Given a VALVE configuration, return a datatype hierarchy that looks like this:
+    {0: {'dt_name_1': [{'datatype': 'dt_name_1',
+                        'description': 'a description',
+                         ...},
+                       {'datatype': 'parent datatype',
+                        'description': 'a description',
+                        ...},
+                       {'datatype': 'grandparent datatype',
+                        'description': 'a description',
+                        ...},
+                       ...],
+         'dt_name_2': etc.},
+     1: ... etc.}
+    """
+
+    def get_higher_datatypes(datatype_hierarchies, universals, depth):
+        current_datatypes = [dt_name for dt_name in datatype_hierarchies.get(depth, [])]
+        higher_datatypes = {}
+        if current_datatypes:
+            universals = [dt_name for dt_name in universals]
+            lower_datatypes = []
+            for i in range(0, depth):
+                lower_datatypes += [dt_name for dt_name in datatype_hierarchies.get(i, [])]
+            for dt_name in dt_hierarchies[depth]:
+                dt_hierarchy = dt_hierarchies[depth][dt_name]
+                if len(dt_hierarchy) > 1:
+                    parent_hierarchy = dt_hierarchy[1:]
+                    parent = parent_hierarchy[0]["datatype"]
+                    if parent not in current_datatypes + lower_datatypes + universals:
+                        higher_datatypes[parent] = parent_hierarchy
+        return higher_datatypes
+
+    dt_config = config["datatype"]
+    dt_names = [dt_name for dt_name in dt_config]
+    dt_hierarchies = {0: {}}
+    universals = {}
+    for dt_name in dt_names:
+        # Add all the leaf datatypes to dt_hierarchies at 0 depth:
+        children = [child for child in dt_names if dt_config[child].get("parent") == dt_name]
+        if not children:
+            dt_hierarchies[0][dt_name] = get_hierarchy_for_dt(config, dt_name)
+        # Ungrounded and unconditioned datatypes go into the universals category, which are added
+        # to the top of dt_hierarchies later:
+        elif not dt_config[dt_name].get("parent") or not dt_config[dt_name].get("condition"):
+            universals[dt_name] = get_hierarchy_for_dt(config, dt_name)
+
+    depth = 0
+    higher_dts = get_higher_datatypes(dt_hierarchies, universals, depth)
+    while higher_dts:
+        depth += 1
+        dt_hierarchies[depth] = deepcopy(higher_dts)
+        higher_dts = get_higher_datatypes(dt_hierarchies, universals, depth)
+    dt_hierarchies[depth + 1] = universals
+    return dt_hierarchies
+
+
+def get_sql_type(config, datatype):
+    """Given the config map and the name of a datatype, climb the datatype tree (as required),
+    and return the first 'SQLite type' found."""
+    if "datatype" not in config:
+        print("Missing datatypes in config")
+        sys.exit(1)
+    if datatype not in config["datatype"]:
+        return None
+    if config["datatype"][datatype].get("SQLite type"):
+        return config["datatype"][datatype]["SQLite type"]
+    return get_sql_type(config, config["datatype"][datatype].get("parent"))
+
+
+def get_potential_foreign_columns(config, datatype):
+    global SPECIAL_TABLES
+
+    def get_coarser_sql_type(datatype):
+        sql_type = get_sql_type(config, datatype)
+        if sql_type not in ["integer", "numeric", "real"]:
+            return "text"
+        else:
+            return sql_type.casefold()
+
+    potential_foreign_columns = []
+    this_sql_type = get_coarser_sql_type(datatype)
+    for table, table_config in config["table"].items():
+        if table not in SPECIAL_TABLES:
+            for column, column_config in table_config["column"].items():
+                if column_config.get("structure") in ["primary", "unique"]:
+                    foreign_sql_type = get_coarser_sql_type(column_config["datatype"])
+                    if foreign_sql_type == this_sql_type:
+                        potential_foreign_columns.append(
+                            {
+                                "table": table,
+                                "column": column,
+                                "sql_type": foreign_sql_type,
+                            }
+                        )
+    return potential_foreign_columns
+
+
+SAVED_CONDITIONS = {}
+
+
+def get_compiled_condition(condition, parser):
+    global SAVED_CONDITIONS
+
+    if condition in SAVED_CONDITIONS:
+        return SAVED_CONDITIONS[condition]
+
+    parsed_condition = parser.parse(condition)
+    if len(parsed_condition) != 1:
+        print(
+            f"'{condition}' is invalid. Only one condition per column is allowed.", file=sys.stderr
+        )
+        sys.exit(1)
+    parsed_condition = parsed_condition[0]
+    if parsed_condition["type"] == "function" and parsed_condition["name"] == "equals":
+        expected = re.sub(r"^['\"](.*)['\"]$", r"\1", parsed_condition["args"][0]["value"])
+        compiled_condition = lambda x: x == expected
+    elif parsed_condition["type"] == "function" and parsed_condition["name"] in (
+        "exclude",
+        "match",
+        "search",
+    ):
+        pattern = re.sub(r"^['\"](.*)['\"]$", r"\1", parsed_condition["args"][0]["pattern"])
+        flags = parsed_condition["args"][0]["flags"]
+        flags = "(?" + "".join(flags) + ")" if flags else ""
+        pattern = re.compile(flags + pattern)
+        if parsed_condition["name"] == "exclude":
+            compiled_condition = lambda x: not bool(pattern.search(x))
+        elif parsed_condition["name"] == "match":
+            compiled_condition = lambda x: bool(pattern.fullmatch(x))
+        else:
+            compiled_condition = lambda x: bool(pattern.search(x))
+    elif parsed_condition["type"] == "function" and parsed_condition["name"] == "in":
+        alternatives = [
+            re.sub(r"^['\"](.*)['\"]$", r"\1", arg["value"]) for arg in parsed_condition["args"]
+        ]
+        compiled_condition = lambda x: x in alternatives
+    else:
+        print(f"Unrecognized condition: {condition}", file=sys.stderr)
+        sys.exit(1)
+
+    SAVED_CONDITIONS[condition] = compiled_condition
+    return compiled_condition
+
+
+def annotate(label, sample, config, error_rate, is_primary_candidate):
+    def has_nulltype(target):
+        num_values = len(target["values"])
+        num_empties = target["values"].count("")
+        return num_empties / num_values > error_rate
+
+    def has_duplicates(target, ignore_empties):
+        if ignore_empties:
+            values = [v for v in target["values"] if v != ""]
+        else:
+            values = target["values"]
+        distinct_values = set(values)
+        return (len(values) - len(distinct_values)) > (error_rate * len(values))
+
+    def get_datatype(target, dt_hierarchies):
+        def is_match(datatype):
+            # If the datatype has no associated condition then it matches anything:
+            if not datatype.get("condition"):
+                return True
+            # If the SQLite type is NULL this datatype is ruled out:
+            sqlite_type = datatype.get("SQLite type")
+            if sqlite_type and sqlite_type.casefold() == "null":
+                return False
+
+            condition = get_compiled_condition(datatype["condition"], config["parser"])
+            num_values = len(target["values"])
+            num_passed = [condition(v) for v in target["values"]].count(True)
+            success_rate = num_passed / num_values
+            if (1 - success_rate) <= error_rate:
+                return success_rate
+
+        def tiebreak(datatypes):
+            in_types = []
+            other_types = []
+            parents = set([dt["datatype"].get("parent") for dt in datatypes])
+            parents.discard(None)
+            for dt in datatypes:
+                if dt["datatype"]["datatype"] not in parents:
+                    if dt["datatype"].get("condition", "").lstrip().startswith("in("):
+                        in_types.append(dt)
+                    else:
+                        other_types.append(dt)
+
+            if len(in_types) == 1:
+                return in_types[0]["datatype"]
+            elif len(in_types) > 1:
+                in_types = sorted(in_types, key=lambda k: k["success_rate"], reverse=True)
+                return in_types[0]["datatype"]
+            elif len(other_types) == 1:
+                return other_types[0]["datatype"]
+            elif len(other_types) > 1:
+                other_types = sorted(other_types, key=lambda k: k["success_rate"], reverse=True)
+                return other_types[0]["datatype"]
+            else:
+                print(f"Error tiebreaking datatypes: {pformat(datatypes)}")
+                sys.exit(1)
+
+        for depth in range(0, len(dt_hierarchies)):
+            datatypes_to_check = [dt_hierarchies[depth][dt][0] for dt in dt_hierarchies[depth]]
+            matching_datatypes = []
+            for datatype in datatypes_to_check:
+                success_rate = is_match(datatype)
+                if success_rate:
+                    matching_datatypes.append({"datatype": datatype, "success_rate": success_rate})
+
+            if len(matching_datatypes) == 1:
+                return matching_datatypes[0]["datatype"]
+            elif len(matching_datatypes) > 1:
+                return tiebreak(matching_datatypes)
+
+    def get_from(target, potential_foreign_columns):
+        candidate_froms = []
+        for foreign in potential_foreign_columns:
+            table = foreign["table"]
+            column = foreign["column"]
+            sql_type = foreign["sql_type"]
+            num_matches = 0
+            num_values = len(target["values"])
+            for value in target["values"]:
+                if target.get("nulltype") == "empty" and value == "":
+                    # If this value is legitimately empty then it should not be taken into account
+                    # when counting the number of values in the target that are found in the
+                    # candidate foreign column:
+                    num_values -= 1
+                    continue
+                if sql_type != "text" and not isinstance(value, Number):
+                    # If this value is of the wrong type then there is no need to explicitly check
+                    # if it exists in the foreign column:
+                    continue
+                if sql_type == "text":
+                    value = f"'{value}'"
+                sql = f'SELECT 1 FROM "{table}" WHERE "{column}" = {value} LIMIT 1'
+                num_matches += len(config["db"].execute(sql).fetchall())
+            if ((num_values - num_matches) / num_values) < error_rate:
+                candidate_froms.append(f"from({foreign['table']}.{foreign['column']})")
+        return candidate_froms
+
+    target = sample[label]
+    if has_nulltype(target):
+        target["nulltype"] = "empty"
+
+    # Use the valve config to retrieve the valve datatype hierarchies:
+    dt_hierarchies = get_dt_hierarchies(config)
+    target["datatype"] = get_datatype(target, dt_hierarchies)["datatype"]
+
+    # Use the valve config to get a list of columns already loaded to the database, then compare
+    # the contents of each column with the contents of the target column and possibly annotate the
+    # target with a from() structure, if there is one and only one candidate from().
+    potential_foreign_columns = get_potential_foreign_columns(config, target["datatype"])
+    froms = get_from(target, potential_foreign_columns)
+    if len(froms) == 1:
+        target["structure"] = froms[0]
+    elif len(froms) > 1:
+        print(f"Column '{label}' has multiple from() candidates: {', '.join(froms)}")
+
+    # Check if the column is a unique/primary column:
+    if not target.get("structure"):
+        if target.get("nulltype") is None and not has_duplicates(target, True):
+            if is_primary_candidate:
+                target["structure"] = "primary"
+            else:
+                target["structure"] = "unique"
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(description="VALVE guesser (prototype)")
+    parser.add_argument("--verbose", action="store_true", help="Print logging output to STDERR.")
+    parser.add_argument(
+        "--sample_size",
+        type=int,
+        default=10000,
+        help="Sample size to use when guessing (default: 10,000)",
+    )
+    parser.add_argument(
+        "--error_rate",
+        type=float,
+        default=0.1,
+        help="""A number between 0 and 1 (inclusive) representing the proportion of errors expected
+        (default: 0.1)""",
+    )
+    parser.add_argument(
+        "--enum_size",
+        type=int,
+        default=10,
+        help="The maximum number of values to use for in(...) datatype conditions",
+    )
+    parser.add_argument(
+        "--seed", type=int, help="Seed to use for random sampling (default: current epoch time)"
+    )
+    parser.add_argument(
+        "--yes",
+        action="store_true",
+        help="Do not ask for confirmation before writing suggested modifications to the database",
+    )
+    parser.add_argument(
+        "VALVE_TABLE", help="The VALVE table table from which to read the VALVE configuration"
+    )
+    parser.add_argument(
+        "DATABASE",
+        help="""Can be one of (A) A URL of the form `postgresql://...` or
+        `sqlite://...` (B) The filename (including path) of a sqlite database.""",
+    )
+    parser.add_argument(
+        "TABLE", help="A .TSV file containing the data for which we will be guessing"
+    )
+    args = parser.parse_args()
+
+    VERBOSE = args.verbose
+
+    # Use the seed argument, or the epoch time if no seed is given, to set up the random generator:
+    if args.seed is not None:
+        seed = args.seed
+    else:
+        seed = time.time_ns()
+    random.seed(seed)
+
+    # Get the valve configuration and database info:
+    config = get_valve_config(args.VALVE_TABLE)
+    table_tsv = args.TABLE
+    table = Path(args.TABLE).stem
+    if table in config["table"]:
+        print(f"{table} is already configured.", file=sys.stderr)
+        sys.exit(0)
+    with sqlite3.connect(args.DATABASE) as conn:
+        config["db"] = conn
+
+    # Attach the condition parser to the config as well:
+    config["parser"] = Lark(grammar, parser="lalr", transformer=TreeToDict())
+
+    log(f"Getting random sample of {args.sample_size} rows from {table_tsv} ...")
+    sample = get_random_sample(table_tsv, args.sample_size)
+    for i, label in enumerate(sample):
+        log(f"Annotating label '{label}' ...")
+        annotate(label, sample, config, args.error_rate, i == 0)
+    log("Done!")
+
+    table_table_headers = ["table", "path", "type", "description"]
+    column_table_headers = [
+        "table",
+        "column",
+        "label",
+        "nulltype",
+        "datatype",
+        "structure",
+        "description",
+    ]
+    if not args.yes:
+        print()
+
+        print('The following row will be inserted to "table":')
+        data = [table_table_headers, [f"{table}", f"{table_tsv}", "", ""]]
+        # We add +2 for padding
+        col_width = max(len(word) for row in data for word in row) + 2
+        for row in data:
+            print("".join(word.ljust(col_width) for word in row))
+
+        print()
+
+        print('The following row will be inserted to "column":')
+        data = [column_table_headers]
+        for label in sample:
+            row = [
+                f"{table}",
+                f"{sample[label]['normalized']}",
+                f"{label if label != sample[label]['normalized'] else ''}",
+                f"{sample[label].get('nulltype', '')}",
+                f"{sample[label]['datatype']}",
+                f"{sample[label].get('structure', '')}",
+                f"{sample[label].get('description', '')}",
+            ]
+            data.append(row)
+        # We add +2 for padding
+        col_width = max(len(word) for row in data for word in row) + 2
+        for row in data:
+            print("".join(word.ljust(col_width) for word in row))
+
+        print()
+
+        answer = input("Do you want to write this updated configuration to the database? (y/n) ")
+        if answer.casefold() != "y":
+            print("Not writing updated configuration to the database.")
+            sys.exit(0)
+
+    log("Updating table configuration in database ...")
+    row_number = conn.execute('SELECT MAX(row_number) FROM "table"').fetchall()[0][0] + 1
+    sql = dedent(
+        f"""
+    INSERT INTO "table" ("row_number", {', '.join([f'"{k}"' for k in table_table_headers])})
+    VALUES ({row_number}, '{table}', '{table_tsv}', NULL, NULL)"""
+    )
+    log(sql, suppress_time=True)
+    log("", suppress_time=True)
+    conn.execute(sql)
+    conn.commit()
+
+    log("Updating column configuration in database ...")
+    row_number = conn.execute('SELECT MAX(row_number) FROM "column"').fetchall()[0][0] + 1
+    for label in sample:
+        values = ", ".join(
+            [
+                f"{row_number}",
+                f"'{table}'",
+                f"'{sample[label]['normalized']}'",
+                f"'{label}'" if label != sample[label]["normalized"] else "NULL",
+                f"'{sample[label]['nulltype']}'" if sample[label].get("nulltype") else "NULL",
+                f"'{sample[label]['datatype']}'",
+                f"'{sample[label]['structure']}'" if sample[label].get("structure") else "NULL",
+                f"'{sample[label]['description']}'" if sample[label].get("description") else "NULL",
+            ]
+        )
+        sql = dedent(
+            f"""
+        INSERT INTO "column" ("row_number", {', '.join([f'"{k}"' for k in column_table_headers])})
+        VALUES ({values})"""
+        )
+        log(sql, suppress_time=True)
+        conn.execute(sql)
+        conn.commit()
+        row_number += 1
+    log("", suppress_time=True)
+    log("Done!")
diff --git a/scripts/guess_grammar.py b/scripts/guess_grammar.py
new file mode 100644
index 00000000..5e611cfb
--- /dev/null
+++ b/scripts/guess_grammar.py
@@ -0,0 +1,120 @@
+from lark import Transformer
+
+# Grammar used to parse the the contents of `condition` and `structure` columns.
+# See: https://lark-parser.readthedocs.io/en/latest/index.html#
+grammar = r"""
+%import common.WS
+%ignore WS
+
+start: expression+
+?expression: string | function
+
+?string: label
+label: ALPHANUM | DQSTRING | SQSTRING
+
+function: function_name "(" arguments ")"
+function_name: ALPHANUM
+arguments: argument ("," argument)*
+?argument: string | field | function | named_arg | regex
+field: label "." label
+named_arg: label "=" label
+
+?regex: regex_sub | regex_match
+regex_match: "/" regex_pattern "/" regex_flags
+regex_sub: SUB_BEGIN "/" regex_pattern "/" regex_pattern "/" regex_flags
+regex_pattern: REGEX_WITH_FORWARD_SLASH | REGEX_WITHOUT_FORWARD_SLASH
+regex_flags: LOWER_ALPHA*
+
+SUB_BEGIN: "s"
+ALPHANUM: /[a-zA-Z0-9-_]/+
+DQSTRING: "\"" /[^"](\\\")?/* "\""
+SQSTRING: "'" /[^'](\\\')?/* "'"
+LOWER_ALPHA: /[a-z]/
+NO_SLASH: /[^\/]/
+REGEX_WITH_FORWARD_SLASH: NO_SLASH* "\\/" NO_SLASH*
+REGEX_WITHOUT_FORWARD_SLASH: NO_SLASH+
+"""
+
+
+class TreeToDict(Transformer):
+    """Transformer to convert a Tree, generated by the grammar used by CMI-PB to parse the contents
+    of `condition` and `structure` columns, into a list of expressions represented as dicts."""
+
+    def _sanity_check(self, token_list, expected_len):
+        if len(token_list) != expected_len:
+            raise Exception(f"Wrong number of tokens in: {token_list} (expecting {expected_len})")
+
+    def label(self, label):
+        self._sanity_check(label, 1)
+        label = label[0]
+        return {"type": "label", "value": label.value}
+
+    def field(self, field):
+        self._sanity_check(field, 2)
+        return {"type": "field", "table": field[0]["value"], "column": field[1]["value"]}
+
+    def named_arg(self, named_arg):
+        self._sanity_check(named_arg, 2)
+        return {"type": "named_arg", "key": named_arg[0]["value"], "value": named_arg[1]["value"]}
+
+    def regex_match(self, regex_match):
+        self._sanity_check(regex_match, 2)
+        return {"type": "regex", "pattern": regex_match[0], "flags": regex_match[1]}
+
+    def regex_sub(self, regex_sub):
+        self._sanity_check(regex_sub, 4)
+        return {
+            "type": "regex",
+            "pattern": regex_sub[1],
+            "replace": regex_sub[2],
+            "flags": regex_sub[3],
+        }
+
+    def regex_pattern(self, regex_pattern):
+        self._sanity_check(regex_pattern, 1)
+        return regex_pattern[0].value
+
+    def regex_flags(self, flags):
+        return [flag.value for flag in flags]
+
+    def arguments(self, arguments):
+        return arguments
+
+    def function_name(self, function_name):
+        self._sanity_check(function_name, 1)
+        return function_name[0].value
+
+    def function(self, function):
+        self._sanity_check(function, 2)
+        return {"type": "function", "name": function[0], "args": function[1]}
+
+    def start(self, start):
+        return start
+
+
+def reverse_parse(config, parsed_cond):
+    """Given a config map and a parsed condition, return the text version of the condition."""
+    cond_type = parsed_cond["type"]
+    text_cond = None
+    if cond_type == "label":
+        if config["datatype"].get(parsed_cond["value"]):
+            text_cond = config["datatype"][parsed_cond["value"]]["datatype"]
+        else:
+            text_cond = "'{}'".format(parsed_cond["value"])
+    elif cond_type == "field":
+        return "{}.{}".format(parsed_cond["table"], parsed_cond["column"])
+    elif cond_type == "named_arg":
+        text_cond = "{}={}".format(parsed_cond["key"], parsed_cond["value"])
+    elif cond_type == "regex":
+        pattern = parsed_cond["pattern"]
+        flags = "".join(parsed_cond["flags"])
+        replace = parsed_cond.get("replace")
+        text_cond = f"/{pattern}/{flags}" if not replace else f"s/{pattern}/{replace}/{flags}"
+    elif cond_type == "function":
+        text_cond = map(lambda arg: reverse_parse(config, arg), parsed_cond["args"])
+        text_cond = ", ".join(text_cond)
+        text_cond = "{}({})".format(parsed_cond["name"], text_cond)
+    else:
+        raise Exception(f"Unknown parsed_cond type: {cond_type} for {parsed_cond}")
+
+    return text_cond
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
new file mode 100644
index 00000000..9547a85f
--- /dev/null
+++ b/scripts/requirements.txt
@@ -0,0 +1 @@
+lark==1.1.8
diff --git a/src/lib.rs b/src/lib.rs
index 16de0d94..296fd7cf 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -969,31 +969,32 @@ pub async fn configure_db(
     // use that information to create the associated database tables, while saving constraint
     // information to constrains_config.
     let mut setup_statements = HashMap::new();
-    let table_names: Vec<String> = tables_config.keys().cloned().collect();
-    for table_name in table_names {
+    for table_name in tables_config.keys().cloned().collect::<Vec<_>>() {
         let optional_path = tables_config
             .get(&table_name)
             .and_then(|r| r.get("path"))
             .and_then(|p| p.as_str());
 
-        let path;
+        let mut path = None;
         match optional_path {
-            // If an entry of the tables_config has no path then it is an internal table which need
-            // not be configured explicitly. Currently the only example is the message table.
-            None => continue,
+            None => {
+                // If an entry of the tables_config has no path then it is an internal table which
+                // need not be configured explicitly. Currently the only examples are the message
+                // and history tables.
+                if table_name != "message" && table_name != "history" {
+                    panic!("No path defined for table {}", table_name);
+                }
+                continue;
+            }
             Some(p) if !Path::new(p).is_file() => {
                 eprintln!("WARN: File does not exist {}", p);
-                continue;
             }
             Some(p) if Path::new(p).canonicalize().is_err() => {
                 eprintln!("WARN: File path could not be made canonical {}", p);
-                continue;
             }
-
-            Some(p) => path = p.to_string(),
+            Some(p) => path = Some(p.to_string()),
         };
 
-        // Get the columns that have been previously configured:
         let defined_columns: Vec<String> = tables_config
             .get(&table_name)
             .and_then(|r| r.get("column"))
@@ -1003,65 +1004,58 @@ pub async fn configure_db(
             .and_then(|k| Some(k.collect()))
             .unwrap();
 
-        // Get the actual columns from the data itself. Note that we set has_headers to false
-        // (even though the files have header rows) in order to explicitly read the header row.
-        let mut rdr = csv::ReaderBuilder::new()
-            .has_headers(false)
-            .delimiter(b'\t')
-            .from_reader(File::open(path.clone()).unwrap_or_else(|err| {
-                panic!("Unable to open '{}': {}", path.clone(), err);
-            }));
-        let mut iter = rdr.records();
-        let actual_columns;
-        if let Some(result) = iter.next() {
-            actual_columns = result.unwrap();
-        } else {
-            panic!("'{}' is empty", path);
-        }
-
         // We use column_order to explicitly indicate the order in which the columns should appear
-        // in the table, for later reference.
+        // in the table, for later reference. The default is to preserve the order from the actual
+        // table file. If that does not exist, we use the ordering in defined_columns.
         let mut column_order = vec![];
-        let mut all_columns: SerdeMap = SerdeMap::new();
-        for column_name in &actual_columns {
-            let column;
-            if !defined_columns.contains(&column_name.to_string()) {
-                let mut cmap = SerdeMap::new();
-                cmap.insert(
-                    String::from("table"),
-                    SerdeValue::String(table_name.to_string()),
-                );
-                cmap.insert(
-                    String::from("column"),
-                    SerdeValue::String(column_name.to_string()),
-                );
-                cmap.insert(
-                    String::from("nulltype"),
-                    SerdeValue::String(String::from("empty")),
-                );
-                cmap.insert(
-                    String::from("datatype"),
-                    SerdeValue::String(String::from("text")),
-                );
-                column = SerdeValue::Object(cmap);
-            } else {
-                column = tables_config
-                    .get(&table_name)
-                    .and_then(|r| r.get("column"))
-                    .and_then(|v| v.as_object())
-                    .and_then(|o| o.get(column_name))
+        if let Some(path) = path {
+            // Get the actual columns from the data itself. Note that we set has_headers to
+            // false(even though the files have header rows) in order to explicitly read the
+            // header row.
+            let mut rdr = csv::ReaderBuilder::new()
+                .has_headers(false)
+                .delimiter(b'\t')
+                .from_reader(File::open(path.clone()).unwrap_or_else(|err| {
+                    panic!("Unable to open '{}': {}", path.clone(), err);
+                }));
+            let mut iter = rdr.records();
+            if let Some(result) = iter.next() {
+                let actual_columns = result
                     .unwrap()
-                    .clone();
+                    .iter()
+                    .map(|c| c.to_string())
+                    .collect::<Vec<_>>();
+                // Make sure that the actual columns found in the table file, and the columns
+                // defined in the column config, exactly match in terms of their content:
+                for column_name in &actual_columns {
+                    column_order.push(json!(column_name));
+                    if !defined_columns.contains(&column_name.to_string()) {
+                        panic!(
+                            "Column '{}.{}' not in column config",
+                            table_name, column_name
+                        );
+                    }
+                }
+                for column_name in &defined_columns {
+                    if !actual_columns.contains(&column_name.to_string()) {
+                        panic!(
+                            "Defined column '{}.{}' not found in table",
+                            table_name, column_name
+                        );
+                    }
+                }
+            } else {
+                panic!("'{}' is empty", path);
             }
-            column_order.push(SerdeValue::String(column_name.to_string()));
-            all_columns.insert(column_name.to_string(), column);
         }
 
+        if column_order.is_empty() {
+            column_order = defined_columns.iter().map(|c| json!(c)).collect::<Vec<_>>();
+        }
         tables_config
             .get_mut(&table_name)
             .and_then(|t| t.as_object_mut())
             .and_then(|o| {
-                o.insert(String::from("column"), SerdeValue::Object(all_columns));
                 o.insert(
                     String::from("column_order"),
                     SerdeValue::Array(column_order),
@@ -1097,9 +1091,11 @@ pub async fn configure_db(
     }
 
     // Sort the tables according to their foreign key dependencies so that tables are always loaded
-    // after the tables they depend on:
-    let unsorted_tables: Vec<String> = setup_statements.keys().cloned().collect();
-    let sorted_tables = verify_table_deps_and_sort(&unsorted_tables, &constraints_config);
+    // after the tables they depend on. Ignore the internal message and history tables:
+    let sorted_tables = verify_table_deps_and_sort(
+        &setup_statements.keys().cloned().collect(),
+        &constraints_config,
+    );
 
     if *command != ValveCommand::Config || verbose {
         // Generate DDL for the history table:
diff --git a/test/expected/table3.tsv b/test/expected/table3.tsv
index 04c78efc..c0f31eda 100644
--- a/test/expected/table3.tsv
+++ b/test/expected/table3.tsv
@@ -1,12 +1,12 @@
-source	id	label	type	parent
-MOB	MOB:0000013	   mobecular entity	owl:Class	material entity
-ZOB	ZOB:0000013	bar	owl:Class	car
-JOB	JOB:0000013	car	owl:Class	foo
-SOB	SOB:0000013	foo	owl:Class	bar
-YOB	YOB:0000013	mar	owl:Class	jafar
-COB	BFO:0000040	material entity	owl:Class	owl:Thing
-CO B	COB:0000013	molecular dentity	owl:Class	material entity
-COB	COB:0000013	molecular entity	owl:Class	material entity
-COB	VO:0000001	vaccine	owl:Class	material entity
-BOB	VO:0000001	vaccine	owl:Class	material entity
-BFOBBER	BFO:0000027	bazaar	owl:Class	barrie
+source	id	label	type	parent	related
+MOB	MOB:0000013	   mobecular entity	owl:Class	material entity	
+ZOB	ZOB:0000013	bar	owl:Class	car	
+JOB	JOB:0000013	car	owl:Class	foo	
+SOB	SOB:0000013	foo	owl:Class	bar	
+YOB	YOB:0000013	mar	owl:Class	jafar	
+COB	BFO:0000040	material entity	owl:Class	owl:Thing	
+CO B	COB:0000013	molecular dentity	owl:Class	material entity	
+COB	COB:0000013	molecular entity	owl:Class	material entity	
+COB	VO:0000001	vaccine	owl:Class	material entity	
+BOB	VO:0000001	vaccine	owl:Class	material entity	
+BFOBBER	BFO:0000027	bazaar	owl:Class	barrie	
diff --git a/test/generate_random_test_data.py b/test/generate_random_test_data.py
index 3f8d988d..87008651 100755
--- a/test/generate_random_test_data.py
+++ b/test/generate_random_test_data.py
@@ -1,209 +1,65 @@
 #!/usr/bin/env python3
 
+import json
 import math
 import random
 import string
+import subprocess
+import sys
 
 from argparse import ArgumentParser
 
 
 TOKEN_LENGTH = 9
+WINDOW_SIZE = 50
 
 
-CONFIG = {
-    "table1": {
-        "prefix": {
-            "allow_empty": False,
-            "datatype": "prefix",
-            "structure": {
-                "type": "primary",
-            },
-        },
-        "base": {
-            "allow_empty": False,
-            "datatype": "IRI",
-            "structure": {
-                "type": "unique",
-            },
-        },
-        "ontology IRI": {
-            "allow_empty": True,
-            "datatype": "IRI",
-        },
-        "version IRI": {
-            "allow_empty": True,
-            "datatype": "IRI",
-        },
-    },
-    "table2": {
-        "child": {
-            "allow_empty": False,
-            "datatype": "trimmed_line",
-            "structure": {
-                "type": "foreign",
-                "ftable": "table4",
-                "fcolumn": "other_foreign_column",
-            },
-        },
-        "parent": {
-            "allow_empty": True,
-            "datatype": "trimmed_line",
-            "structure": {
-                "type": "tree",
-                "tcolumn": "child",
-            },
-        },
-        "xyzzy": {
-            "allow_empty": True,
-            "datatype": "trimmed_line",
-            "structure": {
-                "type": "under",
-                "ttable": "table2",
-                "tcolumn": "child",
-                "uval": "d",
-            },
-        },
-        "foo": {
-            "allow_empty": True,
-            "datatype": "integer",
-            "structure": {
-                "type": "foreign",
-                "ftable": "table4",
-                "fcolumn": "numeric_foreign_column",
-            },
-        },
-        "bar": {
-            "allow_empty": True,
-            "datatype": "text",
-        },
-    },
-    "table3": {
-        "source": {
-            "allow_empty": False,
-            "datatype": "prefix",
-            "structure": {
-                "type": "foreign",
-                "ftable": "table1",
-                "fcolumn": "prefix",
-            },
-        },
-        "id": {
-            "allow_empty": False,
-            "datatype": "curie",
-            "structure": {
-                "type": "unique",
-            },
-        },
-        "label": {
-            "allow_empty": False,
-            "datatype": "label",
-            "structure": {
-                "type": "primary",
-            },
-        },
-        "parent": {
-            "allow_empty": True,
-            "datatype": "label",
-            "structure": {
-                "type": "tree",
-                "tcolumn": "label",
-            },
-        },
-        "related": {
-            "allow_empty": True,
-            "datatype": "trimmed_line",
-        },
-    },
-    "table4": {
-        "foreign_column": {
-            "allow_empty": False,
-            "datatype": "text",
-            "structure": {
-                "type": "unique",
-            },
-        },
-        "other_foreign_column": {
-            "allow_empty": False,
-            "datatype": "text",
-            "structure": {
-                "type": "unique",
-            },
-        },
-        "numeric_foreign_column": {
-            "allow_empty": False,
-            "datatype": "integer",
-            "structure": {
-                "type": "primary",
-            },
-        },
-    },
-    "table5": {
-        "foo": {
-            "allow_empty": False,
-            "datatype": "word",
-            "structure": {
-                "type": "primary",
-            },
-        },
-        "bar": {
-            "allow_empty": False,
-            "datatype": "integer",
-        },
-    },
-    "table6": {
-        "child": {
-            "allow_empty": False,
-            "datatype": "integer",
-            "structure": {
-                "type": "foreign",
-                "ftable": "table4",
-                "fcolumn": "numeric_foreign_column",
-            },
-        },
-        "parent": {
-            "allow_empty": True,
-            "datatype": "integer",
-            "structure": {
-                "type": "tree",
-                "tcolumn": "child",
-            },
-        },
-        "xyzzy": {
-            "allow_empty": True,
-            "datatype": "integer",
-            "structure": {
-                "type": "under",
-                "ttable": "table6",
-                "tcolumn": "child",
-                "uval": "4",
-            },
-        },
-        "foo": {
-            "allow_empty": True,
-            "datatype": "text",
-        },
-        "bar": {
-            "allow_empty": True,
-            "datatype": "integer",
-        },
-    },
-}
-
-
-def get_value_from_prev_insert(prev_inserts, from_table, from_column, to_table, to_column):
-    global CONFIG
+def get_special_tables(config):
+    return [k for k, v in config["special"].items() if v is not None]
+
+
+def get_table_columns(config, table):
+    return [column for column in config["table"][table]["column_order"]]
+
+
+def has_nulltype(config, table, column):
+    return bool(config["table"][table]["column"][column].get("nulltype"))
+
+
+def get_column_structure(config, table, column):
+    return config["table"][table]["column"][column].get("structure")
+
+
+def get_column_datatype(config, table, column):
+    return config["table"][table]["column"][column]["datatype"]
+
+
+def get_foreign_key(config, table, column):
+    return [f for f in config["constraints"]["foreign"][table] if f["column"] == column][0]
+
+
+def get_tree(config, table, column):
+    return [f for f in config["constraints"]["tree"][table] if f["parent"] == column][0]
+
+
+def get_under(config, table, column):
+    return [f for f in config["constraints"]["under"][table] if f["column"] == column][0]
+
+
+def get_value_from_prev_insert(config, prev_inserts, from_table, from_column, to_table, to_column):
+    global WINDOW_SIZE
 
     # Note: because we are loading the tables and columns in the correct order (i.e. such that
     # all dependencies are loaded before the tables and columns they depend on), the list of
     # previous inserts for the from_table/from_column will never be empty.
     if len(prev_inserts[from_table][from_column]) == 1:
-        if CONFIG[to_table][to_column]["allow_empty"]:
+        if has_nulltype(config, to_table, to_column):
             return ""
         else:
             return prev_inserts[from_table][from_column][0]
     else:
-        # Select at random from the last 100 inserted values:
-        prev_inserts[from_table][from_column] = prev_inserts[from_table][from_column][-100:]
+        # Select at random from the last N inserted values, with N given by WINDOW_SIZE:
+        prev_inserts[from_table][from_column] = prev_inserts[from_table][from_column][-WINDOW_SIZE:]
         from_values = prev_inserts[from_table][from_column]
         # We'd ideally like to exclude the last inserted value from consideration, but we save it
         # here in case we cannot:
@@ -219,58 +75,58 @@ def get_value_from_prev_insert(prev_inserts, from_table, from_column, to_table,
             return values_to_choose_from[random.randrange(len(values_to_choose_from))]
 
 
-def get_constrained_cell_value(table, column, row_num, prev_inserts):
+def get_constrained_cell_value(config, table, column, row_num, prev_inserts):
     global TOKEN_LENGTH
-    global CONFIG
-
-    structure = CONFIG[table][column].get("structure")
-    if structure and structure["type"] == "foreign":
-        ftable = structure["ftable"]
-        fcolumn = structure["fcolumn"]
-        cell = get_value_from_prev_insert(prev_inserts, ftable, fcolumn, table, column)
-    elif structure and structure["type"] == "tree":
-        tcolumn = structure["tcolumn"]
-        cell = get_value_from_prev_insert(prev_inserts, table, tcolumn, table, column)
-    elif structure and structure["type"] == "under":
+
+    structure = get_column_structure(config, table, column)
+    datatype = get_column_datatype(config, table, column).casefold()
+    if structure.startswith("from("):
+        fkey = get_foreign_key(config, table, column)
+        ftable = fkey["ftable"]
+        fcolumn = fkey["fcolumn"]
+        cell = get_value_from_prev_insert(config, prev_inserts, ftable, fcolumn, table, column)
+    elif structure.startswith("tree("):
+        tkey = get_tree(config, table, column)
+        tcolumn = tkey["child"]
+        cell = get_value_from_prev_insert(config, prev_inserts, table, tcolumn, table, column)
+    elif structure.startswith("under("):
         # Note that properly satisfying the under constraint requires, not only that
         # the cell is in the specified tree column, but also (a) that the tree
         # actually exists, and (b) that the value is "under" the under value. To do
         # this properly, though, would require a decent amount of memory. So perhaps
         # it's not worth it to check for (a) and (b) and allow any offending cells
         # to generate errors which we can then verify are handled properly by valve.
-        ttable = structure["ttable"]
-        tcolumn = structure["tcolumn"]
-        cell = get_value_from_prev_insert(prev_inserts, ttable, tcolumn, table, column)
-    elif CONFIG[table][column]["datatype"] in [
+        ukey = get_under(config, table, column)
+        ttable = ukey["ttable"]
+        tcolumn = ukey["tcolumn"]
+        cell = get_value_from_prev_insert(config, prev_inserts, ttable, tcolumn, table, column)
+    elif datatype in [
         "prefix",
-        "IRI",
+        "iri",
         "trimmed_line",
         "label",
         "word",
     ]:
         cell = "".join(random.choices(string.ascii_lowercase, k=TOKEN_LENGTH))
-    elif CONFIG[table][column]["datatype"] == "curie":
+    elif datatype == "curie":
         cell = (
             "".join(random.choices(string.ascii_lowercase, k=3)).upper()
             + ":"
             + "".join(random.choices(string.ascii_lowercase, k=TOKEN_LENGTH))
         )
-    elif CONFIG[table][column]["datatype"] == "text":
+    elif datatype == "text":
         cell = (
             "".join(random.choices(string.ascii_lowercase, k=TOKEN_LENGTH))
             + " "
             + "".join(random.choices(string.ascii_lowercase, k=TOKEN_LENGTH))
         )
-    elif CONFIG[table][column]["datatype"] == "integer":
+    elif datatype == "integer":
         # No leading 0s:
         cell = "".join(random.choices("123456789", k=1)) + "".join(
             random.choices(string.digits, k=TOKEN_LENGTH - 1)
         )
     else:
-        print(
-            f"Warning: Unknown datatype: {CONFIG[table][column]['datatype']}. "
-            "Generating a random string."
-        )
+        print(f"Warning: Unknown datatype: {datatype}. Generating a random string.")
         cell = "".join(random.choices(string.ascii_lowercase, k=TOKEN_LENGTH))
 
     return cell
@@ -278,20 +134,20 @@ def get_constrained_cell_value(table, column, row_num, prev_inserts):
 
 def main():
     global TOKEN_LENGTH
-    global CONFIG
 
     parser = ArgumentParser(
         description="""
     Deterministically generate a specified amount of data, a specified percentage of which are
-    errors, using a hard-coded VALVE configuration, given the specified seed, to a specified
-    output directory.
-    """
+    errors, using the given VALVE table configuration and seed, to the output directory."""
     )
     parser.add_argument("seed", help="The seed to use to generate the random data")
     parser.add_argument("num_rows", help="The number of rows per table to generate")
     parser.add_argument(
         "pct_errors", help="The percentage of rows in each table that should have errors"
     )
+    parser.add_argument(
+        "input_table", help="The .TSV file representing the VALVE table configuration"
+    )
     parser.add_argument(
         "output_dir", help="The output directory to write the new table configuration to"
     )
@@ -299,48 +155,65 @@ def main():
     seed = int(args.seed)
     num_rows = int(args.num_rows)
     pct_errors = int(args.pct_errors)
+    input_table = args.input_table
     outdir = args.output_dir
 
+    # Use the seed argument to seed the random data that will be generated:
     random.seed(seed)
 
+    # Get the VALVE configuration:
+    result = subprocess.run(["./valve", "--dump_config", input_table], capture_output=True)
+    if result.returncode != 0:
+        error = result.stderr.decode()
+        output = result.stdout.decode()
+        if output:
+            error = f"{error}\n{output}"
+        print(f"{error}", file=sys.stderr)
+        sys.exit(result.returncode)
+    config = json.loads(result.stdout.decode())
+
     # This is a record of the last inserted values for each table and column. When one column
     # takes its values from another column, then we look here and fetch the last inserted value of
     # the second column.
     prev_inserts = {}
+
+    # The data tables to generate:
+    data_tables = [t for t in config["sorted_table_list"] if t not in get_special_tables(config)]
+
+    # The TSV files corresponding to each data table:
     tsv_files = {}
-    tables_in_order = ["table4", "table1", "table2", "table3", "table5", "table6"]
-    for table in tables_in_order:
+    for table in data_tables:
         tsv_files[table] = open(f"{outdir}/{table}.tsv", "w")
-        columns = [column for column in CONFIG[table]]
+        columns = get_table_columns(config, table)
         print("\t".join(columns), file=tsv_files[table])
 
     num_error_rows = math.ceil((pct_errors / 100) * num_rows)
     error_proportion = None if not num_error_rows else math.floor(num_rows / num_error_rows)
     for row_num in range(1, num_rows + 1):
-        for table in tables_in_order:
+        for table in data_tables:
             is_error_row = error_proportion and row_num % error_proportion == 1
-            columns = [column for column in CONFIG[table]]
+            columns = get_table_columns(config, table)
             error_column = random.randrange(len(columns))
             row = {}
             for column_num, column in enumerate(columns):
                 is_error_column = is_error_row and column_num == error_column
                 if (
                     not is_error_column
-                    and CONFIG[table][column]["allow_empty"]
+                    and has_nulltype(config, table, column)
                     and row_num % random.randrange(2, num_rows) == 1
                 ):
                     # If the column allows empty values, assign an empty value "sometimes":
                     cell = ""
                 elif not is_error_column:
-                    cell = get_constrained_cell_value(table, column, row_num, prev_inserts)
+                    cell = get_constrained_cell_value(config, table, column, row_num, prev_inserts)
                 else:
-                    if CONFIG[table][column].get("structure") and CONFIG[table][column][
-                        "structure"
-                    ]["type"] in ["unique", "primary"]:
+                    structure = get_column_structure(config, table, column)
+                    datatype = get_column_datatype(config, table, column)
+                    if structure in ["unique", "primary"]:
                         cell = ""
-                    elif CONFIG[table][column]["datatype"] in [
+                    elif datatype in [
                         "prefix",
-                        "IRI",
+                        "iri",
                         "word",
                         "curie",
                     ]:
@@ -350,7 +223,7 @@ def main():
                             + "".join(random.choices(string.ascii_lowercase, k=TOKEN_LENGTH))
                         )
                     else:
-                        if CONFIG[table][column]["datatype"] == "integer":
+                        if datatype == "integer":
                             cell = "".join(random.choices(string.ascii_lowercase, k=TOKEN_LENGTH))
                         else:
                             # No leading 0s:
diff --git a/test/guess_test_data/column.tsv b/test/guess_test_data/column.tsv
new file mode 100644
index 00000000..2659b524
--- /dev/null
+++ b/test/guess_test_data/column.tsv
@@ -0,0 +1,33 @@
+table	column	label	nulltype	datatype	structure	description
+table	table	table_name		table_name	primary	name of this table
+table	path			path		path to the TSV file for this table, relative to the table.tsv file
+table	type		empty	table_type		type of this table, used for tables with special meanings
+table	description		empty	description		a description of this table
+column	table			table_name	from(table.table)	the table that this column belongs to
+column	column			column_name		the name of this column
+column	label		empty	label		the human-readable label for this column
+column	nulltype		empty	datatype_name	from(datatype.datatype)	the datatype for NULL values in this column
+column	datatype			datatype_name	from(datatype.datatype)	the datatype for this column
+column	structure		empty	trimmed_line		schema information for this column
+column	description		empty	description		a description of this column
+datatype	datatype			datatype_name	primary	the name of this datatype
+datatype	parent		empty	datatype_name	tree(datatype)	the parent datatype
+datatype	transform		empty	word		
+datatype	condition		empty	datatype_condition		
+datatype	structure		empty	trimmed_line		
+datatype	description		empty	trimmed_text		
+datatype	SQLite type		empty	datatype_name		
+datatype	PostgreSQL type		empty	datatype_name		
+datatype	RDF type		empty	datatype_name		
+datatype	HTML type		empty	datatype_name		
+rule	table			table_name		
+rule	when column			column_name		
+rule	when condition			datatype_condition		
+rule	then column			column_name		
+rule	then condition			datatype_condition		
+rule	level			word		
+rule	description		empty	description		
+table1	prefix			prefix	primary	
+table1	base			IRI	unique	
+table1	ontology IRI		empty	IRI		
+table1	version IRI		empty	IRI		
diff --git a/test/guess_test_data/column_expected.tsv b/test/guess_test_data/column_expected.tsv
new file mode 100644
index 00000000..f7e6a20e
--- /dev/null
+++ b/test/guess_test_data/column_expected.tsv
@@ -0,0 +1,53 @@
+table	column	label	nulltype	datatype	structure	description
+table	table	table_name		table_name	primary	name of this table
+table	path			path		path to the TSV file for this table, relative to the table.tsv file
+table	type		empty	table_type		type of this table, used for tables with special meanings
+table	description		empty	description		a description of this table
+column	table			table_name	from(table.table)	the table that this column belongs to
+column	column			column_name		the name of this column
+column	label		empty	label		the human-readable label for this column
+column	nulltype		empty	datatype_name	from(datatype.datatype)	the datatype for NULL values in this column
+column	datatype			datatype_name	from(datatype.datatype)	the datatype for this column
+column	structure		empty	trimmed_line		schema information for this column
+column	description		empty	description		a description of this column
+datatype	datatype			datatype_name	primary	the name of this datatype
+datatype	parent		empty	datatype_name	tree(datatype)	the parent datatype
+datatype	transform		empty	word		
+datatype	condition		empty	datatype_condition		
+datatype	structure		empty	trimmed_line		
+datatype	description		empty	trimmed_text		
+datatype	SQLite type		empty	datatype_name		
+datatype	PostgreSQL type		empty	datatype_name		
+datatype	RDF type		empty	datatype_name		
+datatype	HTML type		empty	datatype_name		
+rule	table			table_name		
+rule	when column			column_name		
+rule	when condition			datatype_condition		
+rule	then column			column_name		
+rule	then condition			datatype_condition		
+rule	level			word		
+rule	description		empty	description		
+table1	prefix			prefix	primary	
+table1	base			IRI	unique	
+table1	ontology IRI		empty	IRI		
+table1	version IRI		empty	IRI		
+table2	child			trimmed_line	from(table4.other_foreign_column)	
+table2	parent		empty	trimmed_line	tree(child)	
+table2	xyzzy		empty	trimmed_line	under(table2.child, d)	
+table2	foo		empty	integer	from(table4.numeric_foreign_column)	
+table2	bar		empty	text		
+table3	source			prefix	from(table1.prefix)	
+table3	id			CURIE	unique	
+table3	label			label	primary	
+table3	parent		empty	label	tree(label)	
+table3	related		empty	trimmed_line		
+table4	foreign_column			text	unique	
+table4	other_foreign_column			text	unique	
+table4	numeric_foreign_column			integer	primary	
+table5	foo			word	primary	
+table5	bar			integer		
+table6	child			integer	from(table4.numeric_foreign_column)	
+table6	parent		empty	integer	tree(child)	
+table6	xyzzy		empty	integer	under(table6.child, 4)	
+table6	foo		empty	text		
+table6	bar		empty	integer		
diff --git a/test/guess_test_data/datatype.tsv b/test/guess_test_data/datatype.tsv
new file mode 100644
index 00000000..c118588d
--- /dev/null
+++ b/test/guess_test_data/datatype.tsv
@@ -0,0 +1,22 @@
+datatype	parent	transform	condition	structure	description	SQLite type	PostgreSQL type	RDF type	HTML type
+CURIE	nonspace		match(/\S+:\S+/)	concat(prefix, ":", suffix)	a Compact URI			CURIE	
+IRI	nonspace		exclude(/\s/)		an Internationalized Resource Identifier			IRI	
+column_name	trimmed_line		match(/\S([^\n]*\S)*/)		a column name				
+datatype_condition	line		exclude(/\n/)		a datatype condition specification				
+datatype_name	word		exclude(/\W/)		a datatype name				
+description	trimmed_text		match(/\S(.*\S)*/)		a brief description				
+empty	text		equals('')		the empty string	NULL	NULL	null	
+integer	nonspace		match(/-?\d+/)		a positive or negative integer	INTEGER	INTEGER		
+label	trimmed_line		match(/\S([^\n]*\S)*/)						
+line	text		exclude(/\n/)		a line of text				input
+natural_number	integer		match(/\d+/)		a natural number, including zero	INTEGER	INTEGER		
+nonspace	trimmed_line		exclude(/\s/)		text without whitespace				
+path	line		exclude(/\n/)		a path to a file				
+prefix	word		exclude(/\W/)		a prefix for a CURIE				
+suffix	word		exclude(/\W/)		a suffix for a CURIE				
+table_name	word		exclude(/\W/)		a table name				
+table_type	word	lowercase	in('table', 'column', 'datatype')		a table type				
+text					any text	TEXT	TEXT	xsd:string	textarea
+trimmed_line	line		match(/\S([^\n]*\S)*/)		a line of text that does not begin or end with whitespace				
+trimmed_text	text		exclude(/^\s+|\s+$/)		text that does not begin or end with whitespace				
+word	nonspace		exclude(/\W/)		a single word: letters, numbers, underscore				
diff --git a/test/guess_test_data/rule.tsv b/test/guess_test_data/rule.tsv
new file mode 100644
index 00000000..a46b8d52
--- /dev/null
+++ b/test/guess_test_data/rule.tsv
@@ -0,0 +1,2 @@
+table	when column	when condition	then column	then condition	level	description
+table1	ontology IRI	null	version IRI	null	error	'version IRI' must be null whenever 'ontology IRI' is null
diff --git a/test/guess_test_data/table.tsv b/test/guess_test_data/table.tsv
new file mode 100644
index 00000000..ac5800f1
--- /dev/null
+++ b/test/guess_test_data/table.tsv
@@ -0,0 +1,6 @@
+table	path	description	type
+column	test/guess_test_data/column.tsv	Columns for all of the tables.	column
+datatype	test/guess_test_data/datatype.tsv	Datatypes for all of the columns	datatype
+rule	test/guess_test_data/rule.tsv	More complex "when" rules	rule
+table	test/guess_test_data/table.tsv	All of the user-editable tables in this project.	table
+table1	test/guess_test_data/table1.tsv	The first data table	
diff --git a/test/guess_test_data/table_expected.tsv b/test/guess_test_data/table_expected.tsv
new file mode 100644
index 00000000..dfb683c4
--- /dev/null
+++ b/test/guess_test_data/table_expected.tsv
@@ -0,0 +1,11 @@
+table	path	description	type
+column	test/guess_test_data/column_expected.tsv	Columns for all of the tables.	column
+datatype	test/guess_test_data/datatype.tsv	Datatypes for all of the columns	datatype
+rule	test/guess_test_data/rule.tsv	More complex "when" rules	rule
+table	test/guess_test_data/table_expected.tsv	All of the user-editable tables in this project.	table
+table1	test/guess_test_data/table1.tsv	The first data table	
+table2	test/guess_test_data/ontology/table2.tsv	The second data table	
+table3	test/guess_test_data/ontology/table3.tsv	The third data table	
+table4	test/guess_test_data/ontology/table4.tsv	The fourth data table	
+table5	test/guess_test_data/ontology/table5.tsv	The fifth data table	
+table6	test/guess_test_data/ontology/table6.tsv	The sixth data table (like table2 but all numeric)	
diff --git a/test/perf_test_data/column.tsv b/test/perf_test_data/column.tsv
index 80268a30..f7e6a20e 100644
--- a/test/perf_test_data/column.tsv
+++ b/test/perf_test_data/column.tsv
@@ -12,7 +12,14 @@ column	structure		empty	trimmed_line		schema information for this column
 column	description		empty	description		a description of this column
 datatype	datatype			datatype_name	primary	the name of this datatype
 datatype	parent		empty	datatype_name	tree(datatype)	the parent datatype
+datatype	transform		empty	word		
 datatype	condition		empty	datatype_condition		
+datatype	structure		empty	trimmed_line		
+datatype	description		empty	trimmed_text		
+datatype	SQLite type		empty	datatype_name		
+datatype	PostgreSQL type		empty	datatype_name		
+datatype	RDF type		empty	datatype_name		
+datatype	HTML type		empty	datatype_name		
 rule	table			table_name		
 rule	when column			column_name		
 rule	when condition			datatype_condition		
diff --git a/test/random_test_data/column.tsv b/test/random_test_data/column.tsv
index 80268a30..f7e6a20e 100644
--- a/test/random_test_data/column.tsv
+++ b/test/random_test_data/column.tsv
@@ -12,7 +12,14 @@ column	structure		empty	trimmed_line		schema information for this column
 column	description		empty	description		a description of this column
 datatype	datatype			datatype_name	primary	the name of this datatype
 datatype	parent		empty	datatype_name	tree(datatype)	the parent datatype
+datatype	transform		empty	word		
 datatype	condition		empty	datatype_condition		
+datatype	structure		empty	trimmed_line		
+datatype	description		empty	trimmed_text		
+datatype	SQLite type		empty	datatype_name		
+datatype	PostgreSQL type		empty	datatype_name		
+datatype	RDF type		empty	datatype_name		
+datatype	HTML type		empty	datatype_name		
 rule	table			table_name		
 rule	when column			column_name		
 rule	when condition			datatype_condition		
diff --git a/test/src/column.tsv b/test/src/column.tsv
index 07f38290..9c6c8256 100644
--- a/test/src/column.tsv
+++ b/test/src/column.tsv
@@ -12,7 +12,14 @@ column	structure		empty	trimmed_line		schema information for this column
 column	description		empty	description		a description of this column
 datatype	datatype			datatype_name	primary	the name of this datatype
 datatype	parent		empty	datatype_name	tree(datatype)	the parent datatype
+datatype	transform		empty	word		
 datatype	condition		empty	datatype_condition		
+datatype	structure		empty	trimmed_line		
+datatype	description		empty	trimmed_text		
+datatype	SQLite type		empty	trimmed_line		
+datatype	PostgreSQL type		empty	trimmed_line		
+datatype	RDF type		empty	trimmed_line		
+datatype	HTML type		empty	datatype_name		
 rule	table			table_name		
 rule	when column			column_name		
 rule	when condition			datatype_condition		
@@ -32,6 +39,7 @@ table2	bar		empty	text
 table3	source			prefix	from(table1.prefix)	
 table3	id			CURIE	unique	
 table3	label			label	primary	
+table3	type		empty	CURIE		
 table3	parent		empty	label	tree(label)	
 table3	related		empty	trimmed_line		
 table4	foreign_column			text	unique	
diff --git a/test/src/ontology/table3.tsv b/test/src/ontology/table3.tsv
index 710e1e16..e8d75e99 100644
--- a/test/src/ontology/table3.tsv
+++ b/test/src/ontology/table3.tsv
@@ -1,11 +1,11 @@
-source	id	label	type	parent
-MOB	MOB:0000013	   mobecular entity	owl:Class	material entity
-ZOB	ZOB:0000013	bar	owl:Class	car
-JOB	JOB:0000013	car	owl:Class	foo
-SOB	SOB:0000013	foo	owl:Class	bar
-YOB	YOB:0000013	mar	owl:Class	jafar
-COB	BFO:0000040	material entity	owl:Class	owl:Thing
-CO B	COB:0000013	molecular dentity	owl:Class	material entity
-COB	COB:0000013	molecular entity	owl:Class	material entity
-COB	VO:0000001	vaccine	owl:Class	material entity
-BOB	VO:0000001	vaccine	owl:Class	material entity
+source	id	label	type	parent	related
+MOB	MOB:0000013	   mobecular entity	owl:Class	material entity	
+ZOB	ZOB:0000013	bar	owl:Class	car	
+JOB	JOB:0000013	car	owl:Class	foo	
+SOB	SOB:0000013	foo	owl:Class	bar	
+YOB	YOB:0000013	mar	owl:Class	jafar	
+COB	BFO:0000040	material entity	owl:Class	owl:Thing	
+CO B	COB:0000013	molecular dentity	owl:Class	material entity	
+COB	COB:0000013	molecular entity	owl:Class	material entity	
+COB	VO:0000001	vaccine	owl:Class	material entity	
+BOB	VO:0000001	vaccine	owl:Class	material entity