From f4495be667609d30bee4039a07e9f9f201f7cb56 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 27 Nov 2023 11:49:34 -0500 Subject: [PATCH] do not store label if it is the same as the normalized column name, and replace consecutive illegal characters with a single underscore (and remove trailing underscores). --- scripts/guess.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/guess.py b/scripts/guess.py index 61e4ea14..0f9ab864 100755 --- a/scripts/guess.py +++ b/scripts/guess.py @@ -55,7 +55,7 @@ def get_random_sample(table, sample_size): for i in sample_row_numbers: for label, value in rows[i].items(): if label not in sample: - ncolumn = re.sub(pattern, "", label).casefold() + ncolumn = re.sub(pattern, "_", label).casefold().strip("_") if has_ncolumn(sample, ncolumn): print( "The data has more than one column with the normalized name " @@ -465,7 +465,7 @@ def get_from(target, potential_foreign_columns): row = [ f"{table}", f"{sample[label]['normalized']}", - f"{label}", + f"{label if label != sample[label]['normalized'] else ''}", f"{sample[label].get('nulltype', '')}", f"{sample[label]['datatype']}", f"{sample[label].get('structure', '')}", @@ -504,7 +504,7 @@ def get_from(target, potential_foreign_columns): f"{row_number}", f"'{table}'", f"'{sample[label]['normalized']}'", - f"'{label}'", + f"'{label}'" if label != sample[label]["normalized"] else "NULL", f"'{sample[label]['nulltype']}'" if sample[label].get("nulltype") else "NULL", f"'{sample[label]['datatype']}'", f"'{sample[label]['structure']}'" if sample[label].get("structure") else "NULL",