Skip to content

Commit

Permalink
sqlite support
Browse files Browse the repository at this point in the history
  • Loading branch information
zshandy committed Feb 10, 2025
1 parent 10f23d3 commit 5303ab4
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 15 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,13 @@ When entering the `conn_string` parameter, only supported databases' connection
## Database Connection Types
- [x] Postgres
- [x] dbt-Postgres
- [x] Sqlite
- [ ] Mysql
- [ ] Sqlite
- [ ] SQL Server
- [ ] Oracle
- [ ] ...


# Documentation
Doc: https://sfu-db.github.io/lineagex/intro.html or just [here](https://sfu-db.github.io/lineagex/intro.html)
Doc: https://sfu-db.github.io/lineagex/intro.html or just [here](https://sfu-db.github.io/lineagex/intro.html)
Javascript Source: Compiled and forked from [here](https://github.com/Bert0324/lineage-dag) by Bert Huang
17 changes: 11 additions & 6 deletions lineagex/ColumnLineageNoConn.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,9 +340,10 @@ def _sub_shared_col_conds_cte(
)
all_cte_sub_cols.extend(temp_sub_cols)
if len(all_cte_sub_table) == 1:
self.table_alias_dict[
sub_ast.find(exp.TableAlias).alias_or_name
] = all_cte_sub_table[0]
if sub_ast.find(exp.TableAlias):
self.table_alias_dict[
sub_ast.find(exp.TableAlias).alias_or_name
] = all_cte_sub_table[0]
potential_cte_sub_table = temp_sub_table
if type(sub_ast.parent) in from_join_exp:
temp_sub_dict = {}
Expand All @@ -359,7 +360,10 @@ def _sub_shared_col_conds_cte(
# target_dict=temp_sub_dict,
# source_table=temp_sub_table,
# )
sub_name = sub_ast.find(exp.TableAlias).alias_or_name
if sub_ast.find(exp.TableAlias) is not None:
sub_name = sub_ast.find(exp.TableAlias).alias_or_name
else:
sub_name = "no_name_subquery"
self.cte_dict[sub_name] = temp_sub_dict
sub_ast.replace(exp.Table(this=sub_name))
sub_ast.pop()
Expand Down Expand Up @@ -745,7 +749,8 @@ def _find_alias_col(
if len(temp) < 2:
for t in temp_table:
if t in self.input_table_dict.keys():
if col_sql in self.input_table_dict[t]:
# resolve any case mismatching
if col_sql.lower() in [x.lower() for x in self.input_table_dict[t]]:
if ref:
return [[], [t + "." + col_sql]]
else:
Expand Down Expand Up @@ -927,7 +932,7 @@ def _resolve_agg_star(
temp_col = temp_col + cols[0] + cols[1]
target_dict[col_name] = [
[""],
list(self.all_used_col.union(set(temp_col))),
list(set(self.all_used_col).union(set(temp_col))),
]
elif t_name in self.cte_dict.keys():
temp_col = []
Expand Down
9 changes: 7 additions & 2 deletions lineagex/LineageXNoConn.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(
dialect: str = "postgres",
target_schema: Optional[str] = "public",
search_path_schema: Optional[str] = "public",
input_table_dict: Optional[dict] = None
) -> None:
self.output_dict = {}
self.parsed = 0
Expand All @@ -45,7 +46,10 @@ def __init__(
self.sql_files_dict = s2d.sql_files_dict
self.org_sql_files_dict = s2d.org_sql_files_dict
self.dialect = dialect
self.input_table_dict = {}
if input_table_dict is None:
self.input_table_dict = {}
else:
self.input_table_dict = input_table_dict
self.finished_list = []
self._find_lineage_no_conn()

Expand All @@ -58,7 +62,8 @@ def _find_lineage_no_conn(self):
start_time = time.time()
for name, sql in self.sql_files_dict.items():
try:
# sql_ast = parse_one(sql, read=self.dialect)
sql_ast = parse_one(sql, read=self.dialect)
#print(sql)
sql_ast = parse_one_sql(sql='''''' + sql + '''''')
all_tables = self._resolve_table(part_ast=sql_ast)
for t in all_tables:
Expand Down
14 changes: 11 additions & 3 deletions lineagex/SqlToDict.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re
from typing import List, Optional, Union

from .utils import find_select, get_files, remove_comments
from .utils import find_select, get_files, remove_comments_sqlite, remove_comments_pg

rem_regex = re.compile(r"[^a-zA-Z0-9_.]")

Expand Down Expand Up @@ -35,7 +35,13 @@ def _sql_to_dict(self) -> None:
self.sql_files = get_files(path=self.path)
for f in self.sql_files:
org_sql = open(f, mode="r", encoding="latin-1").read()
new_sql = remove_comments(str1=org_sql)
if self.dialect == "sqlite":
new_sql = remove_comments_sqlite(str1=org_sql)
elif self.dialect == "postgres":
new_sql = remove_comments_pg(str1=org_sql)
else:
new_sql = remove_comments_pg(str1=org_sql)
#new_sql = remove_comments(str1=org_sql)
org_sql_split = list(filter(None, new_sql.split(";")))
# pop DROP IF EXISTS
if len(org_sql_split) > 0:
Expand Down Expand Up @@ -67,11 +73,13 @@ def _preprocess_sql(
:param new_sql: the sql for parsing, file: file name for the sql, org_sql: the most original sql
:return: None
"""
ret_sql = remove_comments(str1=new_sql)
#ret_sql = remove_comments(str1=new_sql)
if self.dialect == "sqlite":
ret_sql = remove_comments_sqlite(str1=new_sql)
ret_sql = ret_sql.replace('"', "'")
ret_sql = ret_sql.replace(" REL)", " REAL)").replace("IS NOT ''", "IS NOT NULL").replace("`", '"')
elif self.dialect == "postgres":
ret_sql = remove_comments_pg(str1=new_sql)
ret_sql = ret_sql.replace("`", '')
# remove any database names in the query
if self.schema_list:
Expand Down
2 changes: 2 additions & 0 deletions lineagex/lineagex.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(
conn_string: Optional[str] = None,
search_path_schema: Optional[str] = "",
dialect: str = "postgres",
input_table_dict: Optional[dict] = None,
) -> None:
validate_sql(sql)
target_schema, search_path_schema = validate_schema(
Expand All @@ -74,6 +75,7 @@ def __init__(
dialect=dialect,
target_schema=target_schema,
search_path_schema=search_path_schema,
input_table_dict=input_table_dict,
)
save_js_file()
self.output_dict = lx.output_dict
Expand Down
22 changes: 21 additions & 1 deletion lineagex/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from psycopg2.extensions import connection


def remove_comments(str1: Optional[str] = "") -> str:
def remove_comments_sqlite(str1: Optional[str] = "") -> str:
"""
Remove comments/excessive spaces/"create table as"/"create view as" from the sql file
:param str1: the original sql
Expand Down Expand Up @@ -57,6 +57,26 @@ def remove_comments(str1: Optional[str] = "") -> str:
return str1


def remove_comments_pg(str1: Optional[str] = "") -> str:
"""
Remove comments/excessive spaces/"create table as"/"create view as" from the sql file
:param str1: the original sql
:return: the parsed sql
"""
# remove the /* */ comments
q = re.sub(r"/\*[^*]*\*+(?:[^*/][^*]*\*+)*/", "", str1)
# remove whole line -- and # comments
lines = [line for line in q.splitlines() if not re.match("^\s*(--|#)", line)]
# remove trailing -- and # comments
q = " ".join([re.split("--|#", line)[0] for line in lines])
# replace all spaces around commas
q = re.sub(r"\s*,\s*", ",", q)
# replace all multiple spaces to one space
str1 = re.sub("\s\s+", " ", q)
str1 = str1.replace("\n", " ").strip()
return str1


def find_column(
table_name: Optional[str] = "",
engine: Any = None,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "lineagex"
version = "0.0.26"
version = "0.0.27"
description = "A column lineage tool"
authors = ["zshandy <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 5303ab4

Please sign in to comment.