Skip to content

Commit

Permalink
fix!: normalize before qualifying tables
Browse files Browse the repository at this point in the history
closes #4538
  • Loading branch information
tobymao committed Dec 19, 2024
1 parent e56d6a9 commit ffcffa7
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 23 deletions.
2 changes: 1 addition & 1 deletion sqlglot/optimizer/qualify.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ def qualify(
The qualified expression.
"""
schema = ensure_schema(schema, dialect=dialect)
expression = normalize_identifiers(expression, dialect=dialect)
expression = qualify_tables(
expression,
db=db,
Expand All @@ -78,6 +77,7 @@ def qualify(
dialect=dialect,
infer_csv_schemas=infer_csv_schemas,
)
expression = normalize_identifiers(expression, dialect=dialect)

if isolate_tables:
expression = isolate_table_selects(expression, schema=schema)
Expand Down
44 changes: 22 additions & 22 deletions tests/fixtures/optimizer/optimizer.sql
Original file line number Diff line number Diff line change
Expand Up @@ -630,11 +630,11 @@ PIVOT(SUM(`u_cte`.`f`) AS `sum` FOR `u_cte`.`h` IN ('x', 'y')) AS `_q_0`;
# dialect: snowflake
SELECT * FROM u PIVOT (SUM(f) FOR h IN ('x', 'y'));
SELECT
"_q_0"."G" AS "G",
"_q_0"."'x'" AS "'x'",
"_q_0"."'y'" AS "'y'"
"_Q_0"."G" AS "G",
"_Q_0"."'x'" AS "'x'",
"_Q_0"."'y'" AS "'y'"
FROM "U" AS "U"
PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_q_0";
PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_Q_0";

# title: selecting all columns from a pivoted source and generating spark
# note: spark doesn't allow pivot aliases or qualified columns for the pivot's "field" (`h`)
Expand Down Expand Up @@ -690,14 +690,14 @@ PIVOT(MAX("SOURCE"."VALUE") FOR "SOURCE"."KEY" IN ('a', 'b', 'c')) AS "FINAL"("I
# dialect: snowflake
SELECT * FROM m_sales AS m_sales(empid, dept, jan, feb) UNPIVOT(sales FOR month IN (jan, feb)) ORDER BY empid;
SELECT
"_q_0"."EMPID" AS "EMPID",
"_q_0"."DEPT" AS "DEPT",
"_q_0"."MONTH" AS "MONTH",
"_q_0"."SALES" AS "SALES"
"_Q_0"."EMPID" AS "EMPID",
"_Q_0"."DEPT" AS "DEPT",
"_Q_0"."MONTH" AS "MONTH",
"_Q_0"."SALES" AS "SALES"
FROM "M_SALES" AS "M_SALES"("EMPID", "DEPT", "JAN", "FEB")
UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_q_0"
UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_Q_0"
ORDER BY
"_q_0"."EMPID";
"_Q_0"."EMPID";

# title: unpivoted table source, unpivot has column aliases
# execute: false
Expand All @@ -722,10 +722,10 @@ UNPIVOT("sales" FOR "month" IN ("m_sales"."jan", "m_sales"."feb")) AS "unpiv"("a
# dialect: snowflake
SELECT * FROM (SELECT * FROM m_sales) AS m_sales(empid, dept, jan, feb) UNPIVOT(sales FOR month IN (jan, feb)) ORDER BY empid;
SELECT
"_q_0"."EMPID" AS "EMPID",
"_q_0"."DEPT" AS "DEPT",
"_q_0"."MONTH" AS "MONTH",
"_q_0"."SALES" AS "SALES"
"_Q_0"."EMPID" AS "EMPID",
"_Q_0"."DEPT" AS "DEPT",
"_Q_0"."MONTH" AS "MONTH",
"_Q_0"."SALES" AS "SALES"
FROM (
SELECT
"M_SALES"."EMPID" AS "EMPID",
Expand All @@ -734,9 +734,9 @@ FROM (
"M_SALES"."FEB" AS "FEB"
FROM "M_SALES" AS "M_SALES"
) AS "M_SALES"
UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_q_0"
UNPIVOT("SALES" FOR "MONTH" IN ("JAN", "FEB")) AS "_Q_0"
ORDER BY
"_q_0"."EMPID";
"_Q_0"."EMPID";

# title: unpivoted table source with a single value column, unpivot columns can be qualified
# execute: false
Expand Down Expand Up @@ -832,13 +832,13 @@ WHERE
GROUP BY `dAy`, `top_term`, rank
ORDER BY `DaY` DESC;
SELECT
`TOp_TeRmS`.`refresh_date` AS `day`,
`TOp_TeRmS`.`term` AS `top_term`,
`TOp_TeRmS`.`rank` AS `rank`
FROM `bigquery-public-data.GooGle_tReNDs.TOp_TeRmS` AS `TOp_TeRmS`
`top_terms`.`refresh_date` AS `day`,
`top_terms`.`term` AS `top_term`,
`top_terms`.`rank` AS `rank`
FROM `bigquery-public-data.GooGle_tReNDs.TOp_TeRmS` AS `top_terms`
WHERE
`TOp_TeRmS`.`rank` = 1
AND `TOp_TeRmS`.`refresh_date` >= DATE_SUB(CURRENT_DATE, INTERVAL '2' WEEK)
`top_terms`.`rank` = 1
AND `top_terms`.`refresh_date` >= DATE_SUB(CURRENT_DATE, INTERVAL '2' WEEK)
GROUP BY
`day`,
`top_term`,
Expand Down
29 changes: 29 additions & 0 deletions tests/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,35 @@ def test_normalize(self):

@patch("sqlglot.generator.logger")
def test_qualify_columns(self, logger):
self.assertEqual(
optimizer.qualify.qualify(
parse_one(
"""
SELECT Teams.Name, count(*)
FROM raw.TeamMemberships as TeamMemberships
join raw.Teams
on Teams.Id = TeamMemberships.TeamId
GROUP BY 1
""",
read="bigquery",
),
schema={
"raw": {
"TeamMemberships": {
"Id": "INTEGER",
"UserId": "INTEGER",
"TeamId": "INTEGER",
},
"Teams": {
"Id": "INTEGER",
"Name": "STRING",
},
}
},
dialect="bigquery",
).sql(dialect="bigquery"),
"SELECT `teams`.`name` AS `name`, count(*) AS `_col_1` FROM `raw`.`TeamMemberships` AS `teammemberships` JOIN `raw`.`Teams` AS `teams` ON `teams`.`id` = `teammemberships`.`teamid` GROUP BY `teams`.`name`",
)
self.assertEqual(
optimizer.qualify.qualify(
parse_one(
Expand Down

0 comments on commit ffcffa7

Please sign in to comment.