From 431851f4fbbb46f639ea9b1f6aad1745c4e9951b Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 10 Sep 2023 10:37:45 -0400 Subject: [PATCH] add numeric and real data to performance tests --- test/generate_random_test_data.py | 54 +++++++++++++++++++++++-------- test/perf_test_data/datatype.tsv | 4 ++- 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/test/generate_random_test_data.py b/test/generate_random_test_data.py index 3f8d988d..06e0320e 100755 --- a/test/generate_random_test_data.py +++ b/test/generate_random_test_data.py @@ -65,7 +65,7 @@ }, "foo": { "allow_empty": True, - "datatype": "integer", + "datatype": "real", "structure": { "type": "foreign", "ftable": "table4", @@ -131,7 +131,7 @@ }, "numeric_foreign_column": { "allow_empty": False, - "datatype": "integer", + "datatype": "real", "structure": { "type": "primary", }, @@ -153,7 +153,7 @@ "table6": { "child": { "allow_empty": False, - "datatype": "integer", + "datatype": "real", "structure": { "type": "foreign", "ftable": "table4", @@ -162,7 +162,7 @@ }, "parent": { "allow_empty": True, - "datatype": "integer", + "datatype": "real", "structure": { "type": "tree", "tcolumn": "child", @@ -170,7 +170,7 @@ }, "xyzzy": { "allow_empty": True, - "datatype": "integer", + "datatype": "real", "structure": { "type": "under", "ttable": "table6", @@ -184,7 +184,7 @@ }, "bar": { "allow_empty": True, - "datatype": "integer", + "datatype": "numeric", }, }, } @@ -261,11 +261,18 @@ def get_constrained_cell_value(table, column, row_num, prev_inserts): + " " + "".join(random.choices(string.ascii_lowercase, k=TOKEN_LENGTH)) ) - elif CONFIG[table][column]["datatype"] == "integer": + elif CONFIG[table][column]["datatype"] in ["integer", "real", "numeric"]: # No leading 0s: cell = "".join(random.choices("123456789", k=1)) + "".join( random.choices(string.digits, k=TOKEN_LENGTH - 1) ) + if CONFIG[table][column]["datatype"] != "integer": + cell = ( + cell + + "." + + "".join(random.choices("0123456789", k=1)) + + "".join(random.choices(string.digits, k=3)) + ) else: print( f"Warning: Unknown datatype: {CONFIG[table][column]['datatype']}. " @@ -349,14 +356,33 @@ def main(): + " " + "".join(random.choices(string.ascii_lowercase, k=TOKEN_LENGTH)) ) - else: - if CONFIG[table][column]["datatype"] == "integer": - cell = "".join(random.choices(string.ascii_lowercase, k=TOKEN_LENGTH)) - else: - # No leading 0s: - cell = "".join(random.choices("123456789", k=1)) + "".join( - random.choices(string.digits, k=TOKEN_LENGTH - 1) + elif CONFIG[table][column]["datatype"] == "text": + # No leading 0s: + cell = "".join(random.choices("123456789", k=1)) + "".join( + random.choices(string.digits, k=TOKEN_LENGTH - 1) + ) + # Randomly add some decimal places: + if random.choice([True, False]): + cell = ( + cell + + "." + + "".join(random.choices("0123456789", k=1)) + + "".join(random.choices(string.digits, k=3)) ) + elif CONFIG[table][column]["datatype"] != "integer": + cell = "".join(random.choices(string.ascii_lowercase, k=TOKEN_LENGTH)) + elif random.choice([True, False]): + cell = "".join(random.choices(string.ascii_lowercase, k=TOKEN_LENGTH)) + else: + cell = "".join(random.choices("123456789", k=1)) + "".join( + random.choices(string.digits, k=TOKEN_LENGTH - 1) + ) + cell = ( + cell + + "." + + "".join(random.choices("0123456789", k=1)) + + "".join(random.choices(string.digits, k=3)) + ) row[column] = cell if not prev_inserts.get(table): diff --git a/test/perf_test_data/datatype.tsv b/test/perf_test_data/datatype.tsv index c118588d..c162ca56 100644 --- a/test/perf_test_data/datatype.tsv +++ b/test/perf_test_data/datatype.tsv @@ -11,12 +11,14 @@ label trimmed_line match(/\S([^\n]*\S)*/) line text exclude(/\n/) a line of text input natural_number integer match(/\d+/) a natural number, including zero INTEGER INTEGER nonspace trimmed_line exclude(/\s/) text without whitespace +numeric nonspace match(/-?\d+(\.\d+)?/) a positive or negative number NUMERIC NUMERIC path line exclude(/\n/) a path to a file prefix word exclude(/\W/) a prefix for a CURIE +real nonspace match(/-?\d+(\.\d+)?/) a positive or negative real number REAL REAL suffix word exclude(/\W/) a suffix for a CURIE table_name word exclude(/\W/) a table name table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT TEXT xsd:string textarea +text any text TEXT VARCHAR(100) xsd:string textarea trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace word nonspace exclude(/\W/) a single word: letters, numbers, underscore