Skip to content

Commit

Permalink
Python: Add Table model, save all table references to `metadata.tab…
Browse files Browse the repository at this point in the history
…les`, remove `table_name` and `schema` from metadata.
  • Loading branch information
surister committed Jul 4, 2024
1 parent 39430fe commit e95601f
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 58 deletions.
2 changes: 1 addition & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Changelog

## Unreleased
- Build Script: Replace version instead of appending to the index file.
- Add `Table` model and save all table references in `metadata.tables`


## 2024/05/21 v0.0.2
Expand Down
20 changes: 17 additions & 3 deletions cratedb_sqlparse_py/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ from cratedb_sqlparse import sqlparse
stmt = sqlparse("SELECT A, B FROM doc.tbl12")

print(stmt.metadata)
# Metadata(schema='doc', table_name='tbl12', parameterized_properties={}, with_properties={})
# Metadata(tables=[Table(schema='doc', name='tbl12')], parameterized_properties={}, with_properties={})
```

#### Query properties.
Expand All @@ -137,9 +137,23 @@ stmt = sqlparse("""
""")[0]

print(stmt.metadata)
# Metadata(schema='doc', table_name='tbl12', parameterized_properties={}, with_properties={'allocation.max_retries': '5', 'blocks.metadata': 'false'})
# Metadata(tables=[Table(schema='doc', name='tbl12')], with_properties={'allocation.max_retries': '5', 'blocks.metadata': 'false'})
```

#### Table name
```python
print(stmt.metadata.tables)
# [Table(schema='doc', name='tbl12')]

table = stmt.metadata.tables[0]
print(table.schema, table.name, table.fqn, sep='\n')
# doc
# tbl12
# '"doc"."tbl12"'
```



#### Parameterized properties.

Parameterized properties are properties without a real defined value, marked with a dollar string, `metadata.parameterized_properties`
Expand All @@ -155,7 +169,7 @@ stmt = sqlparse("""
""")[0]

print(stmt.metadata)
# Metadata(schema='doc', table_name='tbl12', parameterized_properties={'blocks.metadata': '$1'}, with_properties={'allocation.max_retries': '5', 'blocks.metadata': '$1'})
# Metadata(tables=[Table(schema='doc', name='tbl12')], parameterized_properties={'blocks.metadata': '$1'}, with_properties={'allocation.max_retries': '5', 'blocks.metadata': '$1'})
```

In this case, `blocks.metadata` will be in `with_properties` and `parameterized_properties` as well.
Expand Down
4 changes: 2 additions & 2 deletions cratedb_sqlparse_py/cratedb_sqlparse/AstBuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from cratedb_sqlparse.generated_parser.SqlBaseParser import SqlBaseParser
from cratedb_sqlparse.generated_parser.SqlBaseParserVisitor import SqlBaseParserVisitor
from cratedb_sqlparse.models import Table


class AstBuilder(SqlBaseParserVisitor):
Expand Down Expand Up @@ -40,8 +41,7 @@ def visitTableName(self, ctx: SqlBaseParser.TableNameContext):
else:
schema, name = parts

self.stmt.metadata.table_name = name
self.stmt.metadata.schema = schema
self.stmt.metadata.tables.append(Table(schema=schema, name=name))

def visitGenericProperties(self, ctx: SqlBaseParser.GenericPropertiesContext):
node_properties = ctx.genericProperty()
Expand Down
28 changes: 28 additions & 0 deletions cratedb_sqlparse_py/cratedb_sqlparse/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import dataclasses
from typing import List


def quote(text: str, quote_with: str = '"') -> str:
return quote_with + text + quote_with


@dataclasses.dataclass
class Table:
name: str
schema: str = None

@property
def fqn(self) -> str:
return (quote(self.schema) + "." if self.schema else "") + quote(self.name)


@dataclasses.dataclass
class Metadata:
"""
Represents the metadata of the query, the actual interesting parts of the query such as:
table, schema, columns, options...
"""

tables: List[Table] = dataclasses.field(default_factory=list)
parameterized_properties: dict = dataclasses.field(default_factory=dict)
with_properties: dict = dataclasses.field(default_factory=dict)
15 changes: 1 addition & 14 deletions cratedb_sqlparse_py/cratedb_sqlparse/parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import dataclasses
import logging
from typing import List

Expand All @@ -8,6 +7,7 @@
from cratedb_sqlparse.AstBuilder import AstBuilder
from cratedb_sqlparse.generated_parser.SqlBaseLexer import SqlBaseLexer
from cratedb_sqlparse.generated_parser.SqlBaseParser import SqlBaseParser
from cratedb_sqlparse.models import Metadata


def BEGIN_DOLLAR_QUOTED_STRING_action(self, localctx, actionIndex):
Expand Down Expand Up @@ -125,19 +125,6 @@ def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
self.errors.append(error)


@dataclasses.dataclass
class Metadata:
"""
Represents the metadata of the query, the actual interesting parts of the query such as:
table, schema, columns, options...
"""

schema: str = None
table_name: str = None
parameterized_properties: dict = dataclasses.field(default_factory=dict)
with_properties: dict = dataclasses.field(default_factory=dict)


class Statement:
"""
Represents a CrateDB SQL statement.
Expand Down
56 changes: 18 additions & 38 deletions cratedb_sqlparse_py/tests/test_enricher.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
def test_table_metadata():
from cratedb_sqlparse import sqlparse
from cratedb_sqlparse.parser import Metadata
from cratedb_sqlparse.models import Metadata

query = "SELECT 1; SELECT 2;"
stmts = sqlparse(query)
Expand All @@ -9,43 +9,6 @@ def test_table_metadata():
assert isinstance(stmt.metadata, Metadata)


def test_table_name_statement():
from cratedb_sqlparse import sqlparse

query = "CREATE TABLE doc.tbl2 (a TEXT)"

stmts = sqlparse(query)
stmt = stmts[0]

assert stmt.metadata.schema == "doc"
assert stmt.metadata.table_name == "tbl2"


def test_table_name_statements():
from cratedb_sqlparse import sqlparse

query = """
SELECT A,B,C,D,E FROM doc.tbl1;
SELECT A,B FROM "doc"."tbl1";
SELECT A,B FROM "tbl1";
SELECT A,B FROM tbl1;
"""

stmts = sqlparse(query=query)

assert stmts[0].metadata.schema == "doc"
assert stmts[0].metadata.table_name == "tbl1"

assert stmts[1].metadata.schema == "doc"
assert stmts[1].metadata.table_name == "tbl1"

assert stmts[2].metadata.schema is None
assert stmts[2].metadata.table_name == "tbl1"

assert stmts[3].metadata.schema is None
assert stmts[3].metadata.table_name == "tbl1"


def test_table_with_properties():
from cratedb_sqlparse import sqlparse

Expand All @@ -68,3 +31,20 @@ def test_with_with_parameterized_properties():
# Has all the keys.
assert stmt.metadata.with_properties == expected
assert stmt.metadata.parameterized_properties == expected


def test_table_names():
from cratedb_sqlparse import sqlparse

query = "SELECT _ FROM a.b, d"

stmt = sqlparse(query)[0]

assert len(stmt.metadata.tables) == 2
assert stmt.metadata.tables[0].schema == "a"
assert stmt.metadata.tables[0].name == "b"
assert stmt.metadata.tables[0].fqn == '"a"."b"'

assert stmt.metadata.tables[1].schema is None
assert stmt.metadata.tables[1].name == "d"
assert stmt.metadata.tables[1].fqn == '"d"'

0 comments on commit e95601f

Please sign in to comment.