Skip to content

Commit

Permalink
fixing quotes for sqlite
Browse files Browse the repository at this point in the history
  • Loading branch information
zshandy committed Apr 28, 2024
1 parent 154a326 commit 10f23d3
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 15 deletions.
2 changes: 1 addition & 1 deletion lineagex/ColumnLineageNoConn.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __init__(
self.unnest_dict = {}
self.input_table_dict = input_table_dict
# self.sql_ast = parse_one(sql, read=dialect)
self.sql_ast = parse_one_sql(sql=sql)
self.sql_ast = parse_one_sql(sql='''''' + sql + '''''')
self.all_used_col = []
self.table_list = []
self.all_subquery_table = []
Expand Down
2 changes: 1 addition & 1 deletion lineagex/LineageXNoConn.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def _find_lineage_no_conn(self):
for name, sql in self.sql_files_dict.items():
try:
# sql_ast = parse_one(sql, read=self.dialect)
sql_ast = parse_one_sql(sql=sql)
sql_ast = parse_one_sql(sql='''''' + sql + '''''')
all_tables = self._resolve_table(part_ast=sql_ast)
for t in all_tables:
if t in self.sql_files_dict.keys() and t not in self.finished_list:
Expand Down
2 changes: 1 addition & 1 deletion lineagex/SqlToDict.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def _preprocess_sql(
ret_sql = remove_comments(str1=new_sql)
if self.dialect == "sqlite":
ret_sql = ret_sql.replace('"', "'")
ret_sql = ret_sql.replace("`", '"')
ret_sql = ret_sql.replace(" REL)", " REAL)").replace("IS NOT ''", "IS NOT NULL").replace("`", '"')
elif self.dialect == "postgres":
ret_sql = ret_sql.replace("`", '')
# remove any database names in the query
Expand Down
41 changes: 30 additions & 11 deletions lineagex/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,36 @@ def remove_comments(str1: Optional[str] = "") -> str:
# remove trailing -- and # comments
# pattern = r"(?:--|#)(?!.*(['""])[^'""]*\1)[^'\n\r]*"
# q = " ".join([re.sub(pattern, "", line) for line in lines])
q = " ".join(
[
re.split("--|#", line)[0]
if line.find("'#") == -1
and line.find('"#') == -1
and line.find("'--") == -1
and line.find('"--') == -1
else line
for line in lines
]
)
q = ""
comment_symbol = ["--", "#"]
for line in lines:
new_line = line
for c in comment_symbol:
quoted = False
# if there is a comment symbol
if line.find(c) != -1:
c_idx = line.find(c)
# if there is a ' on the left
if line.rfind("'", c_idx) != -1:
q_idx = line.rfind("'", c_idx)
# find the corresponding ' on the right
if line.find("'", q_idx) != -1:
quoted = True
if not quoted:
new_line = re.split("--|#", line)[0]
q += " " + new_line

# q = " ".join(
# [
# re.split("--|#", line)[0]
# if line.find("'#") == -1
# and line.find('"#') == -1
# and line.find("'--") == -1
# and line.find('"--') == -1
# else line
# for line in lines
# ]
# )
# replace all spaces around commas
q = re.sub(r"\s*,\s*", ",", q)
# replace all multiple spaces to one space
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "lineagex"
version = "0.0.25"
version = "0.0.26"
description = "A column lineage tool"
authors = ["zshandy <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 10f23d3

Please sign in to comment.