Skip to content

Commit

Permalink
add new column splitter and tests (passing, but incomplete)
Browse files Browse the repository at this point in the history
  • Loading branch information
icyveins7 committed Mar 13, 2024
1 parent b2826df commit f79e311
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 4 deletions.
74 changes: 70 additions & 4 deletions sew/formatSpec.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def generate(self):


@staticmethod
def _parseColumnDesc(desc: str) -> list[str, str]:
def _parseColumnDesc(desc: str) -> list[str, str] | None:
'''
Helper method to parse the section of the CREATE TABLE statement
that describes a single column.
Expand All @@ -104,17 +104,83 @@ def _parseColumnDesc(desc: str) -> list[str, str]:
Returns
-------
list[str, str]
sdesc : list[str, str] or None
A list of two strings, the column name and the type.
Returns None if an empty string is passed in.
'''
sdesc = desc.strip().split(" ")
sdesc = desc.strip()
if len(sdesc) == 0: # Empty string
return None

sdesc = sdesc.split(" ")
if len(sdesc) == 1:
return [sdesc[0], ""] # Blank for type

else:
return [sdesc[0], " ".join(sdesc[1:])] # This accounts for INTEGER PRIMARY KEY for e.g.


@staticmethod
def _splitColumnsSql(fmtstr: str) -> tuple[list[list[str]], list[str], list[list[str]]]:
'''
Helper method to split the extracted SQL from the CREATE TABLE statement into
3 constituent parts:
1) Description of the columns
2) Description of the conditions on the columns e.g. UNIQUE
3) FOREIGN KEY constraints
Parameters
----------
fmtstr : str
The extracted SQL from the CREATE TABLE statement.
This should be everything in the outermost brackets following the table name.
Returns
-------
cols : list[list[str]]
List of list of strings, with each inner list being returned from ._parseColumnDesc().
conds : list[str]
List of strings, with each one specifying an extra condition.
Example: UNIQUE(col1, col2).
foreign_keys : list[list[str]]
List of list of strings, with each inner list representing the
child, parent relationship.
Example: ["col_child", "parent_table(col_parent)"]
'''

# Remove any uniques
uniques = re.finditer(r"UNIQUE\(.+?\)", fmtstr, flags=re.IGNORECASE) # Non-greedy regex
conds = []
for unique in uniques:
fmtstr = fmtstr.replace(unique.group(), "") # Drop the substring
conds.append(unique.group())

# Remove any foreign keys
foreignkeys = re.finditer(r"FOREIGN KEY(.+?) REFERENCES (.+?)\)", fmtstr, flags=re.IGNORECASE)
foreign_keys = []
for foreign in foreignkeys:
fmtstr = fmtstr.replace(foreign.group(), "") # Drop the substring
# Get the child column name by searching the first brackets
childCol = re.search(r"\(.+?\)", foreign.group(), flags=re.IGNORECASE).group()[1:-1]
# Get the parent table/column name by taking everything after REFERENCES
parentColStart = re.search(r"REFERENCES ", foreign.group(), flags=re.IGNORECASE).span()[1]
parentCol = foreign.group()[parentColStart:]
foreign_keys.append([childCol, parentCol])

# Now parse each remaining column description
cols = [
sdesc
for i in fmtstr.split(",") # Just split by the commas, these should be the only ones left
if (sdesc := FormatSpecifier._parseColumnDesc(i)) is not None
]

return cols, conds, foreign_keys


@classmethod
def fromSql(cls, stmt: str):
'''
Expand Down Expand Up @@ -221,4 +287,4 @@ def getParents(fmt: dict):
# print(id(genFmtspec.fmt))
# print(fmtspec.generate())
# print(id(fmtspec.fmt))
assert(genFmtspec.fmt == fmtspec.fmt)
assert(genFmtspec.fmt == fmtspec.fmt)
40 changes: 40 additions & 0 deletions tests/statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,47 @@ def test_col_desc_parse(self):
)


# Test the create table splitter
def test_table_sql_splitter(self):
# Make something simple
desc = ' col1 INTEGER, col2, col3 real ' # With some blanks
cols, conds, fks = sew.FormatSpecifier._splitColumnsSql(desc)
self.assertEqual(
cols,
[
["col1", "INTEGER"],
["col2", ""],
["col3", "real"]
]
)
self.assertEqual(
conds,
[]
)
self.assertEqual(
fks,
[]
)

# Make something with some uniques
desc = ' col1 INTEGER, col2, col3 real, UNIQUE(col1, col2) '
cols, conds, fks = sew.FormatSpecifier._splitColumnsSql(desc)
self.assertEqual(
cols,
[
["col1", "INTEGER"],
["col2", ""],
["col3", "real"]
]
)
self.assertEqual(
conds,
["UNIQUE(col1, col2)"]
)
self.assertEqual(
fks,
[]
)



Expand Down

0 comments on commit f79e311

Please sign in to comment.