Skip to content

Commit e634e9e

Browse files
mayurinehatechakru-r
authored andcommitted
fix(ingest/snowflake): use fast query fingerprint for lineage (datahub-project#12275)
1 parent c8c60af commit e634e9e

File tree

2 files changed

+12
-5
lines changed

2 files changed

+12
-5
lines changed

metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py

+4
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
ColumnRef,
4141
DownstreamColumnRef,
4242
)
43+
from datahub.sql_parsing.sqlglot_utils import get_query_fingerprint
4344
from datahub.utilities.perf_timer import PerfTimer
4445
from datahub.utilities.time import ts_millis_to_datetime
4546

@@ -239,6 +240,9 @@ def get_known_query_lineage(
239240
downstream_table_urn = self.identifiers.gen_dataset_urn(dataset_name)
240241

241242
known_lineage = KnownQueryLineageInfo(
243+
query_id=get_query_fingerprint(
244+
query.query_text, self.identifiers.platform, fast=True
245+
),
242246
query_text=query.query_text,
243247
downstream=downstream_table_urn,
244248
upstreams=self.map_query_result_upstreams(

metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ class KnownQueryLineageInfo:
165165
timestamp: Optional[datetime] = None
166166
session_id: Optional[str] = None
167167
query_type: QueryType = QueryType.UNKNOWN
168+
query_id: Optional[str] = None
168169

169170

170171
@dataclasses.dataclass
@@ -618,11 +619,13 @@ def add_known_query_lineage(
618619
self.report.num_known_query_lineage += 1
619620

620621
# Generate a fingerprint for the query.
621-
with self.report.sql_fingerprinting_timer:
622-
query_fingerprint = get_query_fingerprint(
623-
known_query_lineage.query_text,
624-
platform=self.platform.platform_name,
625-
)
622+
query_fingerprint = known_query_lineage.query_id
623+
if not query_fingerprint:
624+
with self.report.sql_fingerprinting_timer:
625+
query_fingerprint = get_query_fingerprint(
626+
known_query_lineage.query_text,
627+
platform=self.platform.platform_name,
628+
)
626629
formatted_query = self._maybe_format_query(known_query_lineage.query_text)
627630

628631
# Register the query.

0 commit comments

Comments
 (0)