From f4495be667609d30bee4039a07e9f9f201f7cb56 Mon Sep 17 00:00:00 2001
From: Michael Cuffaro <consulting@michaelcuffaro.com>
Date: Mon, 27 Nov 2023 11:49:34 -0500
Subject: [PATCH] do not store label if it is the same as the normalized column
 name, and replace consecutive illegal characters with a single underscore
 (and remove trailing underscores).

---
 scripts/guess.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/guess.py b/scripts/guess.py
index 61e4ea14..0f9ab864 100755
--- a/scripts/guess.py
+++ b/scripts/guess.py
@@ -55,7 +55,7 @@ def get_random_sample(table, sample_size):
         for i in sample_row_numbers:
             for label, value in rows[i].items():
                 if label not in sample:
-                    ncolumn = re.sub(pattern, "", label).casefold()
+                    ncolumn = re.sub(pattern, "_", label).casefold().strip("_")
                     if has_ncolumn(sample, ncolumn):
                         print(
                             "The data has more than one column with the normalized name "
@@ -465,7 +465,7 @@ def get_from(target, potential_foreign_columns):
             row = [
                 f"{table}",
                 f"{sample[label]['normalized']}",
-                f"{label}",
+                f"{label if label != sample[label]['normalized'] else ''}",
                 f"{sample[label].get('nulltype', '')}",
                 f"{sample[label]['datatype']}",
                 f"{sample[label].get('structure', '')}",
@@ -504,7 +504,7 @@ def get_from(target, potential_foreign_columns):
                 f"{row_number}",
                 f"'{table}'",
                 f"'{sample[label]['normalized']}'",
-                f"'{label}'",
+                f"'{label}'" if label != sample[label]["normalized"] else "NULL",
                 f"'{sample[label]['nulltype']}'" if sample[label].get("nulltype") else "NULL",
                 f"'{sample[label]['datatype']}'",
                 f"'{sample[label]['structure']}'" if sample[label].get("structure") else "NULL",