Skip to content

Commit

Permalink
save progress on switching to extracting doc path from documentationUrl
Browse files Browse the repository at this point in the history
  • Loading branch information
lmossman committed Sep 27, 2023
1 parent ac6a184 commit f17901a
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 221 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ def metadata_service():

@metadata_service.command(help="Validate a given metadata YAML file.")
@click.argument("metadata_file_path", type=click.Path(exists=True, path_type=pathlib.Path), required=True)
@click.argument("doc_path", type=click.Path(exists=True, path_type=pathlib.Path), required=True)
def validate(metadata_file_path: pathlib.Path, doc_path: pathlib.Path):
@click.argument("docs_path", type=click.Path(exists=True, path_type=pathlib.Path), required=True)
def validate(metadata_file_path: pathlib.Path, docs_path: pathlib.Path):
metadata_file_path = metadata_file_path if not metadata_file_path.is_dir() else metadata_file_path / METADATA_FILE_NAME

click.echo(f"Validating {metadata_file_path}...")

metadata, error = validate_and_load(metadata_file_path, PRE_UPLOAD_VALIDATORS, ValidatorOptions(doc_path=doc_path))
metadata, error = validate_and_load(metadata_file_path, PRE_UPLOAD_VALIDATORS, ValidatorOptions(docs_path=str(docs_path)))
if metadata:
click.echo(f"{metadata_file_path} is a valid ConnectorMetadataDefinitionV0 YAML file.")
else:
Expand All @@ -44,12 +44,12 @@ def validate(metadata_file_path: pathlib.Path, doc_path: pathlib.Path):

@metadata_service.command(help="Upload a metadata YAML file to a GCS bucket.")
@click.argument("metadata-file-path", type=click.Path(exists=True, path_type=pathlib.Path), required=True)
@click.argument("doc-path", type=click.Path(exists=True, path_type=pathlib.Path), required=True)
@click.argument("docs-path", type=click.Path(exists=True, path_type=pathlib.Path), required=True)
@click.argument("bucket-name", type=click.STRING, required=True)
@click.option("--prerelease", type=click.STRING, required=False, default=None, help="The prerelease tag of the connector.")
def upload(metadata_file_path: pathlib.Path, doc_path: pathlib.Path, bucket_name: str, prerelease: str):
def upload(metadata_file_path: pathlib.Path, docs_path: pathlib.Path, bucket_name: str, prerelease: str):
metadata_file_path = metadata_file_path if not metadata_file_path.is_dir() else metadata_file_path / METADATA_FILE_NAME
validator_opts = ValidatorOptions(doc_path=str(doc_path), prerelease_tag=prerelease)
validator_opts = ValidatorOptions(docs_path=str(docs_path), prerelease_tag=prerelease)
try:
upload_info = upload_metadata_to_gcs(bucket_name, metadata_file_path, validator_opts)
log_metadata_upload_info(upload_info)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import hashlib
import json
import os
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Tuple, List
Expand Down Expand Up @@ -69,6 +70,14 @@ def get_doc_remote_file_path(dockerRepository: str, version: str, inapp: bool) -
"""
return f"{METADATA_FOLDER}/{dockerRepository}/{version}/{DOC_INAPP_FILE_NAME if inapp else DOC_FILE_NAME}"

def get_doc_local_file_path(metadata: ConnectorMetadataDefinitionV0, docs_path: Path, inapp:bool) -> Path:
pattern = re.compile(r'^https://docs\.airbyte\.com/(.+)$')
match = pattern.search(metadata.data.documentationUrl)
if match:
extension = ".inapp.md" if inapp else ".md"
return (docs_path / match.group(1)).with_suffix(extension)
return None

def compute_gcs_md5(file_name: str) -> str:
hash_md5 = hashlib.md5()
with open(file_name, "rb") as f:
Expand Down Expand Up @@ -132,15 +141,18 @@ def _icon_upload(metadata: ConnectorMetadataDefinitionV0, bucket: storage.bucket
return False, f"No Icon found at {local_icon_path}"
return upload_file_if_changed(local_icon_path, bucket, latest_icon_path)

def _doc_upload(metadata: ConnectorMetadataDefinitionV0, bucket: storage.bucket.Bucket, doc_path: Path, latest: bool, inapp: bool) -> Tuple[bool, str]:
local_doc_path = doc_path if not inapp else Path(str(doc_path).replace('.md', '.inapp.md'))
def _doc_upload(metadata: ConnectorMetadataDefinitionV0, bucket: storage.bucket.Bucket, docs_path: Path, latest: bool, inapp: bool) -> Tuple[bool, str]:
local_doc_path = get_doc_local_file_path(metadata, docs_path, inapp)
if not local_doc_path:
return False, f"Metadata does not contain a valid Airbyte documentation url, skipping doc upload."

remote_doc_path = get_doc_remote_file_path(metadata.data.dockerRepository, "latest" if latest else metadata.data.dockerImageTag, inapp)

if local_doc_path.exists():
doc_uploaded, doc_blob_id = upload_file_if_changed(local_doc_path, bucket, remote_doc_path)
else:
if inapp:
doc_uploaded, doc_blob_id = False, f"No doc found at {local_doc_path}"
doc_uploaded, doc_blob_id = False, f"No inapp doc found at {local_doc_path}, skipping inapp doc upload."
else:
raise FileNotFoundError(f"Expected to find connector doc file at {local_doc_path}, but none was found.")

Expand Down Expand Up @@ -197,19 +209,19 @@ def upload_metadata_to_gcs(
credentials = service_account.Credentials.from_service_account_info(service_account_info)
storage_client = storage.Client(credentials=credentials)
bucket = storage_client.bucket(bucket_name)
doc_path = Path(validator_opts.doc_path)
docs_path = Path(validator_opts.docs_path)

icon_uploaded, icon_blob_id = _icon_upload(metadata, bucket, metadata_file_path)

version_uploaded, version_blob_id = _version_upload(metadata, bucket, metadata_file_path)

doc_version_uploaded, doc_version_blob_id = _doc_upload(metadata, bucket, doc_path, False, False)
doc_inapp_version_uploaded, doc_inapp_version_blob_id = _doc_upload(metadata, bucket, doc_path, False, True)
doc_version_uploaded, doc_version_blob_id = _doc_upload(metadata, bucket, docs_path, False, False)
doc_inapp_version_uploaded, doc_inapp_version_blob_id = _doc_upload(metadata, bucket, docs_path, False, True)

if not validator_opts.prerelease_tag:
latest_uploaded, latest_blob_id = _latest_upload(metadata, bucket, metadata_file_path)
doc_latest_uploaded, doc_latest_blob_id = _doc_upload(metadata, bucket, doc_path, True, False)
doc_inapp_latest_uploaded, doc_inapp_latest_blob_id = _doc_upload(metadata, bucket, doc_path, True, True)
doc_latest_uploaded, doc_latest_blob_id = _doc_upload(metadata, bucket, docs_path, True, False)
doc_inapp_latest_uploaded, doc_inapp_latest_blob_id = _doc_upload(metadata, bucket, docs_path, True, True)
else:
latest_uploaded, latest_blob_id = False, None
doc_latest_uploaded, doc_latest_blob_id = doc_inapp_latest_uploaded, doc_inapp_latest_blob_id = False, None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

@dataclass(frozen=True)
class ValidatorOptions:
doc_path: str
docs_path: str
prerelease_tag: Optional[str] = None


Expand Down Expand Up @@ -138,12 +138,12 @@ def validate_major_version_bump_has_breaking_change_entry(
return True, None


def validate_doc_path_exists(
def validate_docs_path_exists(
metadata_definition: ConnectorMetadataDefinitionV0, validator_opts: ValidatorOptions
) -> ValidationResult:
"""Ensure that the doc_path exists."""
if not pathlib.Path(validator_opts.doc_path).exists():
return False, f"Could not find file at {validator_opts.doc_path}."
if not pathlib.Path(validator_opts.docs_path).exists():
return False, f"Could not find {validator_opts.docs_path}."

return True, None

Expand All @@ -152,7 +152,7 @@ def validate_doc_path_exists(
validate_all_tags_are_keyvalue_pairs,
validate_at_least_one_language_tag,
validate_major_version_bump_has_breaking_change_entry,
validate_doc_path_exists
validate_docs_path_exists
]

POST_UPLOAD_VALIDATORS = PRE_UPLOAD_VALIDATORS + [
Expand All @@ -179,6 +179,7 @@ def validate_and_load(
return None, f"Validation error: {e}"

for validator in validators_to_run:
print(f"!!!!!!!!!!! running validator {validator}")
is_valid, error = validator(metadata_model, validator_opts)
if not is_valid:
return None, f"Validation error: {error}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@ def invalid_metadata_upload_files() -> List[str]:
return list_all_paths_in_fixture_directory("metadata_upload/invalid")


@pytest.fixture(scope="session")
def valid_doc_file() -> str:
return os.path.join(os.path.dirname(__file__), DOC_FILE_NAME)


@pytest.fixture(scope="session")
def get_fixture_path() -> Callable[[str], str]:
def _get_fixture_path(fixture_name: str) -> str:
Expand Down
Loading

0 comments on commit f17901a

Please sign in to comment.