From e95601f0a0bf9af9b386e1de2079ab83a79b41aa Mon Sep 17 00:00:00 2001 From: surister Date: Wed, 3 Jul 2024 22:44:39 +0200 Subject: [PATCH] Python: Add `Table` model, save all table references to `metadata.tables`, remove `table_name` and `schema` from metadata. --- CHANGES.md | 2 +- cratedb_sqlparse_py/README.md | 20 ++++++- .../cratedb_sqlparse/AstBuilder.py | 4 +- .../cratedb_sqlparse/models.py | 28 ++++++++++ .../cratedb_sqlparse/parser.py | 15 +---- cratedb_sqlparse_py/tests/test_enricher.py | 56 ++++++------------- 6 files changed, 67 insertions(+), 58 deletions(-) create mode 100644 cratedb_sqlparse_py/cratedb_sqlparse/models.py diff --git a/CHANGES.md b/CHANGES.md index 0ff9db2..e909a86 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,7 +1,7 @@ # Changelog ## Unreleased -- Build Script: Replace version instead of appending to the index file. +- Add `Table` model and save all table references in `metadata.tables` ## 2024/05/21 v0.0.2 diff --git a/cratedb_sqlparse_py/README.md b/cratedb_sqlparse_py/README.md index 78dbbd1..fcf74d1 100644 --- a/cratedb_sqlparse_py/README.md +++ b/cratedb_sqlparse_py/README.md @@ -118,7 +118,7 @@ from cratedb_sqlparse import sqlparse stmt = sqlparse("SELECT A, B FROM doc.tbl12") print(stmt.metadata) -# Metadata(schema='doc', table_name='tbl12', parameterized_properties={}, with_properties={}) +# Metadata(tables=[Table(schema='doc', name='tbl12')], parameterized_properties={}, with_properties={}) ``` #### Query properties. @@ -137,9 +137,23 @@ stmt = sqlparse(""" """)[0] print(stmt.metadata) -# Metadata(schema='doc', table_name='tbl12', parameterized_properties={}, with_properties={'allocation.max_retries': '5', 'blocks.metadata': 'false'}) +# Metadata(tables=[Table(schema='doc', name='tbl12')], with_properties={'allocation.max_retries': '5', 'blocks.metadata': 'false'}) ``` +#### Table name +```python +print(stmt.metadata.tables) +# [Table(schema='doc', name='tbl12')] + +table = stmt.metadata.tables[0] +print(table.schema, table.name, table.fqn, sep='\n') +# doc +# tbl12 +# '"doc"."tbl12"' +``` + + + #### Parameterized properties. Parameterized properties are properties without a real defined value, marked with a dollar string, `metadata.parameterized_properties` @@ -155,7 +169,7 @@ stmt = sqlparse(""" """)[0] print(stmt.metadata) -# Metadata(schema='doc', table_name='tbl12', parameterized_properties={'blocks.metadata': '$1'}, with_properties={'allocation.max_retries': '5', 'blocks.metadata': '$1'}) +# Metadata(tables=[Table(schema='doc', name='tbl12')], parameterized_properties={'blocks.metadata': '$1'}, with_properties={'allocation.max_retries': '5', 'blocks.metadata': '$1'}) ``` In this case, `blocks.metadata` will be in `with_properties` and `parameterized_properties` as well. diff --git a/cratedb_sqlparse_py/cratedb_sqlparse/AstBuilder.py b/cratedb_sqlparse_py/cratedb_sqlparse/AstBuilder.py index bfbc197..4946bf7 100644 --- a/cratedb_sqlparse_py/cratedb_sqlparse/AstBuilder.py +++ b/cratedb_sqlparse_py/cratedb_sqlparse/AstBuilder.py @@ -2,6 +2,7 @@ from cratedb_sqlparse.generated_parser.SqlBaseParser import SqlBaseParser from cratedb_sqlparse.generated_parser.SqlBaseParserVisitor import SqlBaseParserVisitor +from cratedb_sqlparse.models import Table class AstBuilder(SqlBaseParserVisitor): @@ -40,8 +41,7 @@ def visitTableName(self, ctx: SqlBaseParser.TableNameContext): else: schema, name = parts - self.stmt.metadata.table_name = name - self.stmt.metadata.schema = schema + self.stmt.metadata.tables.append(Table(schema=schema, name=name)) def visitGenericProperties(self, ctx: SqlBaseParser.GenericPropertiesContext): node_properties = ctx.genericProperty() diff --git a/cratedb_sqlparse_py/cratedb_sqlparse/models.py b/cratedb_sqlparse_py/cratedb_sqlparse/models.py new file mode 100644 index 0000000..56a9903 --- /dev/null +++ b/cratedb_sqlparse_py/cratedb_sqlparse/models.py @@ -0,0 +1,28 @@ +import dataclasses +from typing import List + + +def quote(text: str, quote_with: str = '"') -> str: + return quote_with + text + quote_with + + +@dataclasses.dataclass +class Table: + name: str + schema: str = None + + @property + def fqn(self) -> str: + return (quote(self.schema) + "." if self.schema else "") + quote(self.name) + + +@dataclasses.dataclass +class Metadata: + """ + Represents the metadata of the query, the actual interesting parts of the query such as: + table, schema, columns, options... + """ + + tables: List[Table] = dataclasses.field(default_factory=list) + parameterized_properties: dict = dataclasses.field(default_factory=dict) + with_properties: dict = dataclasses.field(default_factory=dict) diff --git a/cratedb_sqlparse_py/cratedb_sqlparse/parser.py b/cratedb_sqlparse_py/cratedb_sqlparse/parser.py index fcfdfed..3e37d43 100644 --- a/cratedb_sqlparse_py/cratedb_sqlparse/parser.py +++ b/cratedb_sqlparse_py/cratedb_sqlparse/parser.py @@ -1,4 +1,3 @@ -import dataclasses import logging from typing import List @@ -8,6 +7,7 @@ from cratedb_sqlparse.AstBuilder import AstBuilder from cratedb_sqlparse.generated_parser.SqlBaseLexer import SqlBaseLexer from cratedb_sqlparse.generated_parser.SqlBaseParser import SqlBaseParser +from cratedb_sqlparse.models import Metadata def BEGIN_DOLLAR_QUOTED_STRING_action(self, localctx, actionIndex): @@ -125,19 +125,6 @@ def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): self.errors.append(error) -@dataclasses.dataclass -class Metadata: - """ - Represents the metadata of the query, the actual interesting parts of the query such as: - table, schema, columns, options... - """ - - schema: str = None - table_name: str = None - parameterized_properties: dict = dataclasses.field(default_factory=dict) - with_properties: dict = dataclasses.field(default_factory=dict) - - class Statement: """ Represents a CrateDB SQL statement. diff --git a/cratedb_sqlparse_py/tests/test_enricher.py b/cratedb_sqlparse_py/tests/test_enricher.py index 8fd5176..a6c7c8b 100644 --- a/cratedb_sqlparse_py/tests/test_enricher.py +++ b/cratedb_sqlparse_py/tests/test_enricher.py @@ -1,6 +1,6 @@ def test_table_metadata(): from cratedb_sqlparse import sqlparse - from cratedb_sqlparse.parser import Metadata + from cratedb_sqlparse.models import Metadata query = "SELECT 1; SELECT 2;" stmts = sqlparse(query) @@ -9,43 +9,6 @@ def test_table_metadata(): assert isinstance(stmt.metadata, Metadata) -def test_table_name_statement(): - from cratedb_sqlparse import sqlparse - - query = "CREATE TABLE doc.tbl2 (a TEXT)" - - stmts = sqlparse(query) - stmt = stmts[0] - - assert stmt.metadata.schema == "doc" - assert stmt.metadata.table_name == "tbl2" - - -def test_table_name_statements(): - from cratedb_sqlparse import sqlparse - - query = """ - SELECT A,B,C,D,E FROM doc.tbl1; - SELECT A,B FROM "doc"."tbl1"; - SELECT A,B FROM "tbl1"; - SELECT A,B FROM tbl1; - """ - - stmts = sqlparse(query=query) - - assert stmts[0].metadata.schema == "doc" - assert stmts[0].metadata.table_name == "tbl1" - - assert stmts[1].metadata.schema == "doc" - assert stmts[1].metadata.table_name == "tbl1" - - assert stmts[2].metadata.schema is None - assert stmts[2].metadata.table_name == "tbl1" - - assert stmts[3].metadata.schema is None - assert stmts[3].metadata.table_name == "tbl1" - - def test_table_with_properties(): from cratedb_sqlparse import sqlparse @@ -68,3 +31,20 @@ def test_with_with_parameterized_properties(): # Has all the keys. assert stmt.metadata.with_properties == expected assert stmt.metadata.parameterized_properties == expected + + +def test_table_names(): + from cratedb_sqlparse import sqlparse + + query = "SELECT _ FROM a.b, d" + + stmt = sqlparse(query)[0] + + assert len(stmt.metadata.tables) == 2 + assert stmt.metadata.tables[0].schema == "a" + assert stmt.metadata.tables[0].name == "b" + assert stmt.metadata.tables[0].fqn == '"a"."b"' + + assert stmt.metadata.tables[1].schema is None + assert stmt.metadata.tables[1].name == "d" + assert stmt.metadata.tables[1].fqn == '"d"'