Skip to content

Commit

Permalink
Improve error messages for dataset import conflicts.
Browse files Browse the repository at this point in the history
Just had to debug some of these, and found the integer primary keys
embedded in the old messages hard to use.  Unfortunately I couldn't get
rid of all of them: this code has no access to the general mapping
from dataset type ID to dataset type name.  But I think that's the
least likely field to be causing the conflict here, so it's not a huge
loss.
  • Loading branch information
TallJimbo committed Aug 21, 2023
1 parent 75408b4 commit 83620cf
Showing 1 changed file with 36 additions and 9 deletions.
45 changes: 36 additions & 9 deletions python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,9 +728,9 @@ def _validateImport(self, tmp_tags: sqlalchemy.schema.Table, run: RunRecord) ->
sqlalchemy.sql.select(
dataset.columns.id.label("dataset_id"),
dataset.columns.dataset_type_id.label("dataset_type_id"),
tmp_tags.columns.dataset_type_id.label("new dataset_type_id"),
tmp_tags.columns.dataset_type_id.label("new_dataset_type_id"),
dataset.columns[self._runKeyColumn].label("run"),
tmp_tags.columns[collFkName].label("new run"),
tmp_tags.columns[collFkName].label("new_run"),
)
.select_from(dataset.join(tmp_tags, dataset.columns.id == tmp_tags.columns.dataset_id))
.where(
Expand All @@ -742,8 +742,28 @@ def _validateImport(self, tmp_tags: sqlalchemy.schema.Table, run: RunRecord) ->
.limit(1)
)
with self._db.query(query) as result:
# Only include the first one in the exception message
if (row := result.first()) is not None:
# Only include the first one in the exception message
existing_run = self._collections[row.run].name
new_run = self._collections[row.new_run].name
if row.dataset_type_id == self._dataset_type_id:
if row.new_dataset_type_id == self._dataset_type_id:
raise ConflictingDefinitionError(
f"Current run {existing_run!r} and new run {new_run!r} do not agree for "
f"dataset {row.dataset_id}."
)
else:
raise ConflictingDefinitionError(
f"Dataset {row.dataset_id} was provided with type {self.datasetType.name!r} "
f"in run {new_run!r}, but was already defined with type ID {row.dataset_type_id} "
f"in run {run!r}."
)
else:
raise ConflictingDefinitionError(
f"Dataset {row.dataset_id} was provided with type ID {row.new_dataset_type_id} "
f"in run {new_run!r}, but was already defined with type {self.datasetType.name!r} "
f"in run {run!r}."
)
raise ConflictingDefinitionError(
f"Existing dataset type or run do not match new dataset: {row._asdict()}"
)
Expand All @@ -753,10 +773,10 @@ def _validateImport(self, tmp_tags: sqlalchemy.schema.Table, run: RunRecord) ->
sqlalchemy.sql.select(
tags.columns.dataset_id,
tags.columns.dataset_type_id.label("type_id"),
tmp_tags.columns.dataset_type_id.label("new type_id"),
tmp_tags.columns.dataset_type_id.label("new_type_id"),
*[tags.columns[dim] for dim in self.datasetType.dimensions.required.names],
*[
tmp_tags.columns[dim].label(f"new {dim}")
tmp_tags.columns[dim].label(f"new_{dim}")
for dim in self.datasetType.dimensions.required.names
],
)
Expand All @@ -783,12 +803,11 @@ def _validateImport(self, tmp_tags: sqlalchemy.schema.Table, run: RunRecord) ->
# Check that matching run+dataId have the same dataset ID.
query = (
sqlalchemy.sql.select(
tags.columns.dataset_type_id.label("dataset_type_id"),
*[tags.columns[dim] for dim in self.datasetType.dimensions.required.names],
tags.columns.dataset_id,
tmp_tags.columns.dataset_id.label("new dataset_id"),
tmp_tags.columns.dataset_id.label("new_dataset_id"),
tags.columns[collFkName],
tmp_tags.columns[collFkName].label(f"new {collFkName}"),
tmp_tags.columns[collFkName].label(f"new_{collFkName}"),
)
.select_from(
tags.join(
Expand All @@ -807,8 +826,16 @@ def _validateImport(self, tmp_tags: sqlalchemy.schema.Table, run: RunRecord) ->
.limit(1)
)
with self._db.query(query) as result:
# only include the first one in the exception message
if (row := result.first()) is not None:
# only include the first one in the exception message
data_id = {dim: getattr(row, dim) for dim in self.datasetType.dimensions.required.names}
existing_collection = self._collections[getattr(row, collFkName)].name
new_collection = self._collections[getattr(row, f"new_{collFkName}")].name
raise ConflictingDefinitionError(
f"Dataset with type {self.datasetType.name!r} and data ID {data_id} "
f"has ID {row.dataset_id} in existing collection {existing_collection!r} "
f"but ID {row.new_dataset_id} in new collection {new_collection!r}."
)
raise ConflictingDefinitionError(
f"Existing dataset type and dataId does not match new dataset: {row._asdict()}"
)

0 comments on commit 83620cf

Please sign in to comment.