Skip to content

Commit

Permalink
adding SQL
Browse files Browse the repository at this point in the history
  • Loading branch information
zshandy committed Feb 12, 2024
1 parent ecf379d commit f6fc49e
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 158 deletions.
6 changes: 5 additions & 1 deletion lineagex/LineageXNoConn.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ def __init__(
self.target_schema = target_schema
search_path_schema = [x.strip() for x in search_path_schema.split(",")]
search_path_schema.append(target_schema)
self.sql_files_dict = SqlToDict(sql, search_path_schema).sql_files_dict
s2d = SqlToDict(sql, search_path_schema)
self.sql_files_dict = s2d.sql_files_dict
self.org_sql_files_dict = s2d.org_sql_files_dict
self.dialect = dialect
self.input_table_dict = {}
self.finished_list = []
Expand Down Expand Up @@ -91,6 +93,8 @@ def _run_lineage_no_conn(self, name: Optional[str] = "", sql: Optional[str] = ""
# "table_name": self.target_schema + "." + name,
# }
# else:
# if name in self.org_sql_files_dict.keys():
# sql = self.org_sql_files_dict[name]
self.output_dict[name] = {
"tables": col_lineage.table_list,
"columns": col_lineage.column_dict,
Expand Down
23 changes: 15 additions & 8 deletions lineagex/SqlToDict.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def __init__(
self.schema_list = schema_list
self.sql_files = []
self.sql_files_dict = {}
self.org_sql_files_dict = {}
self.deletion_dict = {}
self.insertion_dict = {}
self.curr_name = ""
Expand All @@ -28,13 +29,13 @@ def _sql_to_dict(self) -> None:
"""
if isinstance(self.path, list):
for idx, val in enumerate(self.path):
self._preprocess_sql(org_sql=val, file=str(idx))
self._preprocess_sql(new_sql=val, file=str(idx), org_sql=val)
else:
self.sql_files = get_files(path=self.path)
for f in self.sql_files:
org_sql = open(f, mode="r", encoding="utf-8-sig").read()
org_sql = remove_comments(str1=org_sql)
org_sql_split = list(filter(None, org_sql.split(";")))
new_sql = remove_comments(str1=org_sql)
org_sql_split = list(filter(None, new_sql.split(";")))
# pop DROP IF EXISTS
if len(org_sql_split) > 0:
for s in org_sql_split:
Expand All @@ -47,24 +48,24 @@ def _sql_to_dict(self) -> None:
if f.endswith(".sql") or f.endswith(".SQL"):
f = os.path.basename(f)[:-4]
if len(org_sql_split) <= 1:
self._preprocess_sql(org_sql=org_sql_split[0], file=f)
self._preprocess_sql(new_sql=org_sql_split[0], file=f, org_sql=org_sql)
else:
for idx, val in enumerate(org_sql_split):
self._preprocess_sql(org_sql=val, file=f + "_" + str(idx))
self._preprocess_sql(new_sql=val, file=f + "_" + str(idx), org_sql=org_sql)
for key, value in self.sql_files_dict.copy().items():
if key.startswith("."):
self.sql_files_dict[key[1:]] = value
del self.sql_files_dict[key]

def _preprocess_sql(
self, org_sql: Optional[str] = "", file: Optional[str] = ""
self, new_sql: Optional[str] = "", file: Optional[str] = "", org_sql: Optional[str] = ""
) -> None:
"""
Process the sql, remove database name in the clause/datetime_add/datetime_sub adding quotes
:param org_sql: the original sql, file: file name for the sql
:param new_sql: the sql for parsing, file: file name for the sql, org_sql: the most original sql
:return: None
"""
ret_sql = remove_comments(str1=org_sql)
ret_sql = remove_comments(str1=new_sql)
ret_sql = ret_sql.replace("`", "")
# remove any database names in the query
if self.schema_list:
Expand Down Expand Up @@ -114,6 +115,7 @@ def _preprocess_sql(
if temp[5] in self.sql_files_dict.keys():
print("WARNING: duplicate script detected for {}".format(temp[5]))
self.sql_files_dict[temp[5]] = ret_sql
self.org_sql_files_dict[temp[5]] = org_sql
elif re.search("CREATE VIEW", ret_sql, flags=re.IGNORECASE) or re.search(
"CREATE TABLE", ret_sql, flags=re.IGNORECASE
):
Expand All @@ -123,6 +125,7 @@ def _preprocess_sql(
if temp[2] in self.sql_files_dict.keys():
print("WARNING: duplicate script detected for {}".format(temp[2]))
self.sql_files_dict[temp[2]] = ret_sql
self.org_sql_files_dict[temp[2]] = org_sql
# adjust to INSERT/DELETE/SELECT/
elif ret_sql.find("INSERT INTO") != -1:
# find the current name in the insertion dict and how many times it has been inserted
Expand All @@ -136,6 +139,7 @@ def _preprocess_sql(
insert_counter = self.insertion_dict[self.curr_name]
self.curr_name = self.curr_name + "_INSERTION_{}".format(insert_counter)
self.sql_files_dict[self.curr_name] = find_select(q=ret_sql)
self.org_sql_files_dict[self.curr_name] = org_sql
elif ret_sql.find("DELETE FROM") != -1:
# find the current name in the insertion dict and how many times it has been deleted
self.curr_name = re.sub(rem_regex, "", ret_sql.split(" ")[2])
Expand All @@ -148,6 +152,7 @@ def _preprocess_sql(
delete_counter = self.deletion_dict[self.curr_name]
self.curr_name = self.curr_name + "_DELETION_{}".format(delete_counter)
self.sql_files_dict[self.curr_name] = find_select(q=ret_sql)
self.org_sql_files_dict[self.curr_name] = org_sql
elif re.search("CREATE EXTENSION", ret_sql, flags=re.IGNORECASE):
return
else:
Expand All @@ -156,8 +161,10 @@ def _preprocess_sql(
if name in self.sql_files_dict.keys():
print("WARNING: duplicate script detected for {}".format(name))
self.sql_files_dict[name] = ret_sql
self.org_sql_files_dict[name] = org_sql
else:
self.sql_files_dict[file] = ret_sql
self.org_sql_files_dict[file] = org_sql


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion lineagex/app.js

Large diffs are not rendered by default.

294 changes: 147 additions & 147 deletions lineagex/vendor.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "lineagex"
version = "0.0.16"
version = "0.0.17"
description = "A column lineage tool"
authors = ["zshandy <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit f6fc49e

Please sign in to comment.