Skip to content

Commit

Permalink
Orchestrator: write registry entries resulting from overrides to thei…
Browse files Browse the repository at this point in the history
…r own versioned path (airbytehq#30699)
  • Loading branch information
bnchrch authored and ariesgun committed Oct 23, 2023
1 parent a8de021 commit a6cdccc
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -214,13 +214,19 @@ def get_connector_type_from_registry_entry(registry_entry: dict) -> TaggedRegist
raise Exception("Could not determine connector type from registry entry")


def get_registry_entry_write_path(metadata_entry: LatestMetadataEntry, registry_name: str):
def get_registry_entry_write_path(registry_entry: PolymorphicRegistryEntry, metadata_entry: LatestMetadataEntry, registry_name: str) -> str:
metadata_path = metadata_entry.file_path
if metadata_path is None:
raise Exception(f"Metadata entry {metadata_entry} does not have a file path")

metadata_folder = os.path.dirname(metadata_path)
return os.path.join(metadata_folder, registry_name)
if metadata_entry.is_latest_version_path:
# if the metadata entry is the latest version, write the registry entry to the same path as the metadata entry
metadata_folder = os.path.dirname(metadata_path)
return os.path.join(metadata_folder, registry_name)
else:
# if the metadata entry is not the latest version, write the registry entry to its own version specific path
# this is handle the case when a dockerImageTag is overridden
return HACKS.construct_registry_entry_write_path(registry_entry, registry_name)


@sentry_sdk.trace
Expand All @@ -241,10 +247,9 @@ def persist_registry_entry_to_json(
Returns:
GCSFileHandle: The registry_entry directory manager.
"""
registry_entry_write_path = get_registry_entry_write_path(metadata_entry, registry_name)
registry_entry_write_path = get_registry_entry_write_path(registry_entry, metadata_entry, registry_name)
registry_entry_json = registry_entry.json(exclude_none=True)
file_handle = registry_directory_manager.write_data(registry_entry_json.encode("utf-8"), ext="json", key=registry_entry_write_path)
HACKS.write_registry_to_overrode_file_paths(registry_entry, registry_name, metadata_entry, registry_directory_manager)
return file_handle


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,14 @@

from typing import Union

from dagster import get_dagster_logger
from dagster_gcp.gcs.file_manager import GCSFileHandle, GCSFileManager
from metadata_service.constants import METADATA_FILE_NAME
from metadata_service.gcs_upload import get_metadata_remote_file_path
from metadata_service.models.generated.ConnectorRegistryDestinationDefinition import ConnectorRegistryDestinationDefinition
from metadata_service.models.generated.ConnectorRegistrySourceDefinition import ConnectorRegistrySourceDefinition
from orchestrator.models.metadata import LatestMetadataEntry

PolymorphicRegistryEntry = Union[ConnectorRegistrySourceDefinition, ConnectorRegistryDestinationDefinition]


def _is_docker_repository_overridden(
metadata_entry: LatestMetadataEntry,
registry_entry: PolymorphicRegistryEntry,
) -> bool:
"""Check if the docker repository is overridden in the registry entry."""
registry_entry_docker_repository = registry_entry.dockerRepository
metadata_docker_repository = metadata_entry.metadata_definition.data.dockerRepository
return registry_entry_docker_repository != metadata_docker_repository


def _get_version_specific_registry_entry_file_path(registry_entry, registry_name):
"""Get the file path for the version specific registry entry file."""
docker_reposiory = registry_entry.dockerRepository
Expand All @@ -44,48 +31,56 @@ def _check_for_invalid_write_path(write_path: str):
)


def write_registry_to_overrode_file_paths(
def construct_registry_entry_write_path(
registry_entry: PolymorphicRegistryEntry,
registry_name: str,
metadata_entry: LatestMetadataEntry,
registry_directory_manager: GCSFileManager,
) -> GCSFileHandle:
) -> str:
"""
Write the registry entry to the docker repository and version specific file paths
in the event that the docker repository is overridden.
Construct a registry entry write path from its parts.
Underlying issue:
The registry entry files (oss.json and cloud.json) are traditionally written to
the same path as the metadata.yaml file that created them. This is fine for the
most cases, but when the docker repository is overridden, the registry entry
files need to be written to a different path.
This is barely a hack.
But it is related to a few imperfect design decisions that we have to work around.
1. Metadata files and the registry entries are saved to the same top level folder.
2. That save path is determined by the docker repository and version of the image
3. A metadata file can include overrides for the docker repository and version of the image depending on the registry
4. The platform looks up registry entries by docker repository and version of the image.
5. The registry generation depends on what ever registry entry is written to a path ending in latest/{registry_name}.json
This means that when a metadata file overrides the docker repository and version of the image,
the registry entry needs to be written to a different path than the metadata file.
*But only in the case that its a versioned path and NOT a latest path.*
Example:
If metadata file for source-posgres is at version 2.0.0 but there is a override for the cloud registry
that changes the docker repository to source-postgres-strict-encrypt and the version to 1.0.0
Then we will have a metadata file written to:
gs://my-bucket/metadata/source-postgres/2.0.0/metadata.yaml
and registry entries written to:
gs://my-bucket/metadata/source-postgres/2.0.0/oss.json
gs://my-bucket/metadata/source-postgres-strict-encrypt/1.0.0/cloud.json
For example if source-postgres:dev.123 is overridden to source-postgres-strict-encrypt:dev.123
then the oss.json file needs to be written to the path that would be assumed
by the platform when looking for a specific registry entry. In this case, for cloud, it would be
gs://my-bucket/metadata/source-postgres-strict-encrypt/dev.123/cloud.json
But if the metadata file is written to a latest path, then the registry entry will be written to the same path:
gs://my-bucket/metadata/source-postgres/latest/oss.json
gs://my-bucket/metadata/source-postgres/latest/cloud.json
Ideally we would not have to do this, but the combination of prereleases and common overrides
make this nessesary.
Future Solution:
To resolve this properly we need to
1. Separate the save paths for metadata files and registry entries
2. Have the paths determined by definitionId and a metadata version
3. Allow for references to other metadata files in the metadata file instead of overrides
Args:
registry_entry (PolymorphicRegistryEntry): The registry entry to write
registry_name (str): The name of the registry entry (oss or cloud)
metadata_entry (LatestMetadataEntry): The metadata entry that created the registry entry
registry_directory_manager (GCSFileManager): The file manager to use to write the registry entry
Returns:
GCSFileHandle: The file handle of the written registry entry
str: The registry entry write path corresponding to the registry entry
"""
if not _is_docker_repository_overridden(metadata_entry, registry_entry):
return None
logger = get_dagster_logger()
registry_entry_json = registry_entry.json(exclude_none=True)
overrode_registry_entry_version_write_path = _get_version_specific_registry_entry_file_path(registry_entry, registry_name)
_check_for_invalid_write_path(overrode_registry_entry_version_write_path)
logger.info(f"Writing registry entry to {overrode_registry_entry_version_write_path}")
file_handle = registry_directory_manager.write_data(
registry_entry_json.encode("utf-8"), ext="json", key=overrode_registry_entry_version_write_path
)
logger.info(f"Successfully wrote registry entry to {file_handle.public_url}")
return file_handle
return overrode_registry_entry_version_write_path
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from metadata_service.models.generated.ConnectorRegistryV0 import ConnectorRegistryV0
from orchestrator.assets.registry_entry import (
get_connector_type_from_registry_entry,
get_registry_entry_write_path,
get_registry_status_lists,
metadata_to_registry_entry,
safe_parse_metadata_definition,
Expand Down Expand Up @@ -277,11 +278,15 @@ def test_overrides_application(registry_type, expected_docker_image_tag, expecte

mock_metadata_entry = mock.Mock()
mock_metadata_entry.metadata_definition.dict.return_value = metadata
mock_metadata_entry.file_path = f"metadata/{expected_docker_image_tag}/metadata.yaml"
mock_metadata_entry.icon_url = "test-icon-url"

result = metadata_to_registry_entry(mock_metadata_entry, registry_type)
assert result["dockerImageTag"] == expected_docker_image_tag
assert result["additionalField"] == expected_additional_field
registry_entry = metadata_to_registry_entry(mock_metadata_entry, registry_type)
assert registry_entry["dockerImageTag"] == expected_docker_image_tag
assert registry_entry["additionalField"] == expected_additional_field

expected_write_path = f"metadata/{expected_docker_image_tag}/{registry_type}"
assert get_registry_entry_write_path(registry_entry, mock_metadata_entry, registry_type) == expected_write_path


def test_source_type_extraction():
Expand Down

0 comments on commit a6cdccc

Please sign in to comment.