Skip to content

Commit

Permalink
WIP: put dataset field name shrinking in SqlBuilder.
Browse files Browse the repository at this point in the history
This means results handlers that have datasets need to re-expand them.
But we don't have any of those yet.
  • Loading branch information
TallJimbo committed Mar 4, 2024
1 parent ddc2246 commit 18ba8ac
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
@dataclasses.dataclass
class AnalyzedDatasetSearch:
name: str
shrunk: str
dimensions: DimensionGroup
collection_records: list[CollectionRecord] = dataclasses.field(default_factory=list)
messages: list[str] = dataclasses.field(default_factory=list)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
from ..dimensions import DimensionRecordSet

Check warning on line 37 in python/lsst/daf/butler/direct_query_driver/_convert_results.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_convert_results.py#L37

Added line #L37 was not covered by tests

if TYPE_CHECKING:
from ..name_shrinker import NameShrinker
from ..queries.driver import DimensionRecordResultPage, PageKey
from ..queries.result_specs import DimensionRecordResultSpec
from ..registry.nameShrinker import NameShrinker


def convert_dimension_record_results(

Check warning on line 45 in python/lsst/daf/butler/direct_query_driver/_convert_results.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_convert_results.py#L45

Added line #L45 was not covered by tests
Expand Down
12 changes: 2 additions & 10 deletions python/lsst/daf/butler/direct_query_driver/_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

from .. import ddl
from ..dimensions import DataIdValue, DimensionGroup, DimensionUniverse
from ..name_shrinker import NameShrinker
from ..queries import tree as qt
from ..queries.driver import (

Check warning on line 45 in python/lsst/daf/butler/direct_query_driver/_driver.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_driver.py#L41-L45

Added lines #L41 - L45 were not covered by tests
DataCoordinateResultPage,
Expand All @@ -60,7 +61,6 @@
from ..registry import CollectionSummary, CollectionType, NoDefaultCollectionError, RegistryDefaults
from ..registry.interfaces import ChainedCollectionRecord, CollectionRecord
from ..registry.managers import RegistryManagerInstances
from ..registry.nameShrinker import NameShrinker
from ._analyzed_query import AnalyzedDatasetSearch, AnalyzedQuery, DataIdExtractionVisitor
from ._convert_results import convert_dimension_record_results
from ._sql_column_visitor import SqlColumnVisitor

Check warning on line 66 in python/lsst/daf/butler/direct_query_driver/_driver.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_driver.py#L61-L66

Added lines #L61 - L66 were not covered by tests
Expand Down Expand Up @@ -432,11 +432,8 @@ def analyze_query(
query.data_coordinate_uploads.update(tree.data_coordinate_uploads)

Check warning on line 432 in python/lsst/daf/butler/direct_query_driver/_driver.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_driver.py#L432

Added line #L432 was not covered by tests
# Add dataset_searches and filter out collections that don't have the
# right dataset type or governor dimensions.
name_shrinker = make_dataset_name_shrinker(self.db.dialect)
for dataset_type_name, dataset_search in tree.datasets.items():
dataset = AnalyzedDatasetSearch(
dataset_type_name, name_shrinker.shrink(dataset_type_name), dataset_search.dimensions
)
dataset = AnalyzedDatasetSearch(dataset_type_name, dataset_search.dimensions)

Check warning on line 436 in python/lsst/daf/butler/direct_query_driver/_driver.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_driver.py#L436

Added line #L436 was not covered by tests
for collection_record, collection_summary in self.resolve_collection_path(
dataset_search.collections
):
Expand Down Expand Up @@ -784,8 +781,3 @@ def _process_page(
)
case _:

Check warning on line 782 in python/lsst/daf/butler/direct_query_driver/_driver.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_driver.py#L782

Added line #L782 was not covered by tests
raise NotImplementedError("TODO")


def make_dataset_name_shrinker(dialect: sqlalchemy.Dialect) -> NameShrinker:
max_dataset_field_length = max(len(field) for field in qt.DATASET_FIELD_NAMES)
return NameShrinker(dialect.max_identifier_length - max_dataset_field_length - 1, 6)
67 changes: 49 additions & 18 deletions python/lsst/daf/butler/direct_query_driver/_sql_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import sqlalchemy

from .. import ddl
from ..name_shrinker import NameShrinker
from ..nonempty_mapping import NonemptyMapping
from ..queries import tree as qt
from ._postprocessing import Postprocessing
Expand Down Expand Up @@ -65,6 +66,8 @@ class SqlBuilder:

special: dict[str, sqlalchemy.ColumnElement[Any]] = dataclasses.field(default_factory=dict)

name_shrinker: NameShrinker | None = None

EMPTY_COLUMNS_NAME: ClassVar[str] = "IGNORED"
"""Name of the column added to a SQL ``SELECT`` query in order to represent
relations that have no real columns.
Expand Down Expand Up @@ -111,23 +114,27 @@ def select(
distinct: bool | Sequence[sqlalchemy.ColumnElement[Any]] = False,
group_by: Sequence[sqlalchemy.ColumnElement] = (),
) -> sqlalchemy.Select:
if self.name_shrinker is None:
self.name_shrinker = self._make_name_shrinker()
sql_columns: list[sqlalchemy.ColumnElement[Any]] = []

Check warning on line 119 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L118-L119

Added lines #L118 - L119 were not covered by tests
for logical_table, field in columns:
name = columns.get_qualified_name(logical_table, field)

Check warning on line 121 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L121

Added line #L121 was not covered by tests
if field is None:
sql_columns.append(self.dimension_keys[logical_table][0].label(name))

Check warning on line 123 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L123

Added line #L123 was not covered by tests
elif columns.is_timespan(logical_table, field):
sql_columns.extend(self.timespans[logical_table].flatten(name))
else:
sql_columns.append(self.fields[logical_table][field].label(name))
name = self.name_shrinker.shrink(name)

Check warning on line 125 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L125

Added line #L125 was not covered by tests
if columns.is_timespan(logical_table, field):
sql_columns.extend(self.timespans[logical_table].flatten(name))

Check warning on line 127 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L127

Added line #L127 was not covered by tests
else:
sql_columns.append(self.fields[logical_table][field].label(name))

Check warning on line 129 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L129

Added line #L129 was not covered by tests
if postprocessing is not None:
for element in postprocessing.iter_missing(columns):
assert (

Check warning on line 132 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L132

Added line #L132 was not covered by tests
element.name in columns.dimensions.elements
), "Region aggregates not handled by this method."
sql_columns.append(

Check warning on line 135 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L135

Added line #L135 was not covered by tests
self.fields[element.name]["region"].label(
columns.get_qualified_name(element.name, "region")
self.name_shrinker.shrink(columns.get_qualified_name(element.name, "region"))
)
)
for label, sql_column in self.special.items():
Expand All @@ -148,18 +155,23 @@ def select(
return result

Check warning on line 155 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L154-L155

Added lines #L154 - L155 were not covered by tests

def make_table_spec(
self,
columns: qt.ColumnSet,
postprocessing: Postprocessing | None = None,
self, columns: qt.ColumnSet, postprocessing: Postprocessing | None = None
) -> ddl.TableSpec:
assert not self.special, "special columns not supported in make_table_spec"

Check warning on line 160 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L160

Added line #L160 was not covered by tests
if self.name_shrinker is None:
self.name_shrinker = self._make_name_shrinker()

Check warning on line 162 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L162

Added line #L162 was not covered by tests
results = ddl.TableSpec(
[columns.get_column_spec(logical_table, field).to_sql_spec() for logical_table, field in columns]
[
columns.get_column_spec(logical_table, field).to_sql_spec(name_shrinker=self.name_shrinker)
for logical_table, field in columns
]
)
if postprocessing:
for element in postprocessing.iter_missing(columns):
results.fields.add(

Check warning on line 171 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L171

Added line #L171 was not covered by tests
ddl.FieldSpec.for_region(columns.get_qualified_name(element.name, "region"))
ddl.FieldSpec.for_region(
self.name_shrinker.shrink(columns.get_qualified_name(element.name, "region"))
)
)
return results

Check warning on line 176 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L176

Added line #L176 was not covered by tests

Expand All @@ -175,19 +187,25 @@ def extract_columns(
self, columns: qt.ColumnSet, postprocessing: Postprocessing | None = None
) -> SqlBuilder:
assert self.sql_from_clause is not None, "Cannot extract columns with no FROM clause."

Check warning on line 189 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L189

Added line #L189 was not covered by tests
if self.name_shrinker is None:
self.name_shrinker = self._make_name_shrinker()

Check warning on line 191 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L191

Added line #L191 was not covered by tests
for logical_table, field in columns:
name = columns.get_qualified_name(logical_table, field)

Check warning on line 193 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L193

Added line #L193 was not covered by tests
if field is None:
self.dimension_keys[logical_table].append(self.sql_from_clause.columns[name])

Check warning on line 195 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L195

Added line #L195 was not covered by tests
elif columns.is_timespan(logical_table, field):
self.timespans[logical_table] = self.db.getTimespanRepresentation().from_columns(
self.sql_from_clause.columns, name
)
else:
self.fields[logical_table][field] = self.sql_from_clause.columns[name]
name = self.name_shrinker.shrink(name)

Check warning on line 197 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L197

Added line #L197 was not covered by tests
if columns.is_timespan(logical_table, field):
self.timespans[logical_table] = self.db.getTimespanRepresentation().from_columns(

Check warning on line 199 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L199

Added line #L199 was not covered by tests
self.sql_from_clause.columns, name
)
else:
self.fields[logical_table][field] = self.sql_from_clause.columns[name]

Check warning on line 203 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L203

Added line #L203 was not covered by tests
if postprocessing is not None:
for element in postprocessing.iter_missing(columns):
self.fields[element.name]["region"] = self.sql_from_clause.columns[name]
self.fields[element.name]["region"] = self.sql_from_clause.columns[

Check warning on line 206 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L206

Added line #L206 was not covered by tests
self.name_shrinker.shrink(columns.get_qualified_name(element.name, "region"))
]
if postprocessing.check_validity_match_count:
self.special[postprocessing.VALIDITY_MATCH_COUNT] = self.sql_from_clause.columns[

Check warning on line 210 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L210

Added line #L210 was not covered by tests
postprocessing.VALIDITY_MATCH_COUNT
Expand All @@ -211,6 +229,11 @@ def join(self, other: SqlBuilder) -> SqlBuilder:
self.sql_where_terms += other.sql_where_terms
self.needs_distinct = self.needs_distinct or other.needs_distinct
self.special.update(other.special)

Check warning on line 231 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L229-L231

Added lines #L229 - L231 were not covered by tests
if other.name_shrinker:
if self.name_shrinker is not None:
self.name_shrinker.update(other.name_shrinker)

Check warning on line 234 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L234

Added line #L234 was not covered by tests
else:
self.name_shrinker = other.name_shrinker
return self

Check warning on line 237 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L236-L237

Added lines #L236 - L237 were not covered by tests

def where_sql(self, *arg: sqlalchemy.ColumnElement[bool]) -> SqlBuilder:
Expand All @@ -227,7 +250,8 @@ def cte(
) -> SqlBuilder:
return SqlBuilder(

Check warning on line 251 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L251

Added line #L251 was not covered by tests
self.db,
self.select(columns, postprocessing, distinct=distinct, group_by=group_by).cte(),
sql_from_clause=self.select(columns, postprocessing, distinct=distinct, group_by=group_by).cte(),
name_shrinker=self.name_shrinker,
).extract_columns(columns, postprocessing)

def subquery(
Expand All @@ -240,7 +264,10 @@ def subquery(
) -> SqlBuilder:
return SqlBuilder(

Check warning on line 265 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L265

Added line #L265 was not covered by tests
self.db,
self.select(columns, postprocessing, distinct=distinct, group_by=group_by).subquery(),
sql_from_clause=self.select(
columns, postprocessing, distinct=distinct, group_by=group_by
).subquery(),
name_shrinker=self.name_shrinker,
).extract_columns(columns, postprocessing)

def union_subquery(
Expand All @@ -253,5 +280,9 @@ def union_subquery(
other_selects = [other.select(columns, postprocessing) for other in others]
return SqlBuilder(

Check warning on line 281 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L281

Added line #L281 was not covered by tests
self.db,
select0.union(*other_selects).subquery(),
sql_from_clause=select0.union(*other_selects).subquery(),
name_shrinker=self.name_shrinker,
).extract_columns(columns, postprocessing)

def _make_name_shrinker(self) -> NameShrinker:
return NameShrinker(self.db.dialect.max_identifier_length, 6)

Check warning on line 288 in python/lsst/daf/butler/direct_query_driver/_sql_builder.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_query_driver/_sql_builder.py#L288

Added line #L288 was not covered by tests

0 comments on commit 18ba8ac

Please sign in to comment.