Skip to content

Commit

Permalink
do not store label if it is the same as the normalized column name, a…
Browse files Browse the repository at this point in the history
…nd replace consecutive illegal characters with a single underscore (and remove trailing underscores).
  • Loading branch information
lmcmicu committed Nov 27, 2023
1 parent 4a4ea32 commit f4495be
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions scripts/guess.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def get_random_sample(table, sample_size):
for i in sample_row_numbers:
for label, value in rows[i].items():
if label not in sample:
ncolumn = re.sub(pattern, "", label).casefold()
ncolumn = re.sub(pattern, "_", label).casefold().strip("_")
if has_ncolumn(sample, ncolumn):
print(
"The data has more than one column with the normalized name "
Expand Down Expand Up @@ -465,7 +465,7 @@ def get_from(target, potential_foreign_columns):
row = [
f"{table}",
f"{sample[label]['normalized']}",
f"{label}",
f"{label if label != sample[label]['normalized'] else ''}",
f"{sample[label].get('nulltype', '')}",
f"{sample[label]['datatype']}",
f"{sample[label].get('structure', '')}",
Expand Down Expand Up @@ -504,7 +504,7 @@ def get_from(target, potential_foreign_columns):
f"{row_number}",
f"'{table}'",
f"'{sample[label]['normalized']}'",
f"'{label}'",
f"'{label}'" if label != sample[label]["normalized"] else "NULL",
f"'{sample[label]['nulltype']}'" if sample[label].get("nulltype") else "NULL",
f"'{sample[label]['datatype']}'",
f"'{sample[label]['structure']}'" if sample[label].get("structure") else "NULL",
Expand Down

0 comments on commit f4495be

Please sign in to comment.