Skip to content

Commit

Permalink
feat(repositories): Seperate model and model repository metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
devsjc committed Oct 23, 2024
1 parent 62ac89c commit 9684a97
Show file tree
Hide file tree
Showing 23 changed files with 645 additions and 353 deletions.
168 changes: 168 additions & 0 deletions .github/workflows/branch_ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# Workflow that runs on pushes to non-default branches

name: Non-Default Branch CI (Python)

on:
push:
branches-ignore: ["main"]

# Specify concurrency such that only one workflow can run at a time
# * Different workflow files are not affected
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

# Registry for storing Container images
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}

# Ensure the GitHub token can remove packages
permissions:
packages: write


jobs:

lint-typecheck:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Setup uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
cache-dependency-glob: "pyproject.toml"

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"

- name: Install package
run: uv sync --all-extras

- name: Lint package
run: uv run ruff check --output-format=github .

- name: Typecheck package
run: uv run mypy .

test-unit:
runs-on: ubuntu-latest
needs: lint-typecheck

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Setup uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
cache-dependency-glob: "pyproject.toml"

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"

- name: Install package
run: uv sync --all-extras

# Run unittests
# * Produce JUnit XML report
- name: Run unit tests
run: uv run xmlrunner discover -s src/nwp_consumer -p "test_*.py" --output-file ut-report.xml

# Create test summary to be visualised on the job summary screen on GitHub
# * Runs even if previous steps fail
- name: Create test summary
uses: test-summary/action@v2
with:
paths: "*t-report.xml"
show: "fail, skip"
if: always()

# Define a job that builds the documentation
# * Surfaces the documentation as an artifact
build-docs:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Setup uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
cache-dependency-glob: "pyproject.toml"

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"

- name: Build documentation
run: uv run pydoctor

- name: Upload documentation
uses: actions/upload-artifact@v4
with:
name: docs
path: docs

# * Builds and pushes an OCI Container image to the registry defined in the environment variables
# * Only runs if test job passes
build-container:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
needs: ["lint-typecheck", "test-unit"]

steps:
# Do a non-shallow clone of the repo to ensure tags are present
# * This allows setuptools-git-versioning to automatically set the version
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Buildx
uses: docker/setup-buildx-action@v2

- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

# Tag the built image according to the event type
# The event is a branch commit, so use the commit sha
- name: Extract metadata (tags, labels) for Container
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
# Build and push the Container image to the registry
# * Creates a multiplatform-aware image
# * Pulls build cache from the registry
- name: Build and push container image
uses: docker/build-push-action@v4
with:
context: .
file: Containerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
cache-from: type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:buildcache
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ jobs:
runs-on: ubuntu-latest
container: quay.io/condaforge/miniforge3:latest
needs: build-venv
if: github.event.name == '

steps:
- name: Checkout repository
Expand Down
36 changes: 22 additions & 14 deletions src/nwp_consumer/cmd/main.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,59 @@
"""Entrypoints to the nwp-consumer service."""

import argparse
import logging
import os
import sys
from typing import NamedTuple

from nwp_consumer.internal import handlers, repositories, services
from nwp_consumer.internal import handlers, ports, repositories, services

log = logging.getLogger("nwp-consumer")

def parse_env() -> argparse.Namespace:
class Adaptors(NamedTuple):
"""Adaptors for the CLI."""
model_repository: type[ports.ModelRepository]
notification_repository: type[ports.NotificationRepository]

def parse_env() -> Adaptors:
"""Parse from the environment."""
config = argparse.Namespace()
model_repository_adaptor: type[ports.ModelRepository]
match os.getenv("MODEL_REPOSITORY"):
case None:
log.error("MODEL_REPOSITORY is not set in environment.")
sys.exit(1)
case "ceda-metoffice-global":
config.model_repository = repositories.CedaMetOfficeGlobalModelRepository()
model_repository_adaptor = repositories.CedaMetOfficeGlobalModelRepository
case _ as model:
log.error(f"Unknown model: {model}")
sys.exit(1)

notification_repository_adaptor: type[ports.NotificationRepository]
match os.getenv("NOTIFICATION_REPOSITORY", "stdout"):
case "stdout":
config.notification_repository = repositories.StdoutNotificationRepository()
notification_repository_adaptor = repositories.StdoutNotificationRepository
case "dagster-pipes":
config.notification_repository = repositories.DagsterPipesNotificationRepository()
notification_repository_adaptor = repositories.DagsterPipesNotificationRepository
case _ as notification:
log.error(f"Unknown notification repository: {notification}")
sys.exit(1)

return config
return Adaptors(
model_repository=model_repository_adaptor,
notification_repository=notification_repository_adaptor,
)


def run_cli() -> None:
"""Entrypoint for the CLI handler."""
args = parse_env()
adaptors = parse_env()
c = handlers.CLIHandler(
consumer_usecase=services.ConsumerService(
model_repository=args.model_repository,
zarr_repository=None,
notification_repository=args.notification_repository,
model_repository=adaptors.model_repository,
notification_repository=adaptors.notification_repository,
),
archiver_usecase=services.ArchiverService(
model_repository=args.model_repository,
notification_repository=args.notification_repository,
model_repository=adaptors.model_repository,
notification_repository=adaptors.notification_repository,
),
)
returncode: int = c.run()
Expand Down
12 changes: 0 additions & 12 deletions src/nwp_consumer/internal/config/__init__.py

This file was deleted.

21 changes: 0 additions & 21 deletions src/nwp_consumer/internal/config/config.py

This file was deleted.

4 changes: 2 additions & 2 deletions src/nwp_consumer/internal/entities/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
should not contain any logic that is specific to a particular implementation.
"""

from .repometadata import ModelRepositoryMetadata, ModelFileMetadata
from .repometadata import ModelRepositoryMetadata, ModelMetadata
from .tensorstore import ParameterScanResult, TensorStore
from .postprocess import PostProcessOptions, CodecOptions
from .notification import PerformanceMetadata, StoreCreatedNotification, StoreAppendedNotification
Expand All @@ -25,7 +25,7 @@

__all__ = [
"ModelRepositoryMetadata",
"ModelFileMetadata",
"ModelMetadata",
"ParameterScanResult",
"TensorStore",
"PostProcessOptions",
Expand Down
14 changes: 7 additions & 7 deletions src/nwp_consumer/internal/entities/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
import pandas as pd
import pytz
import xarray as xr
from returns.result import Failure, Result, ResultE, Success
from returns.result import Failure, ResultE, Success

from .parameters import Parameter

Expand Down Expand Up @@ -157,7 +157,7 @@ def from_pandas(
))

# Convert the pandas Index objects to lists of the appropriate types
return Result.from_value(
return Success(
cls(
# NOTE: The timezone information is stripped from the datetime objects
# as numpy cannot handle timezone-aware datetime objects. As such, it
Expand Down Expand Up @@ -289,7 +289,7 @@ def determine_region(
"""
# Ensure the inner and outer maps have the same rank and dimension labels
if inner.dims != self.dims:
return Result.from_failure(
return Failure(
KeyError(
"Cannot find slices in non-matching coordinate mappings: "
"both objects must have identical dimensions (rank and labels)."
Expand All @@ -303,7 +303,7 @@ def determine_region(
inner_dim_coords = getattr(inner, inner_dim_label)
outer_dim_coords = getattr(self, inner_dim_label)
if len(inner_dim_coords) > len(outer_dim_coords):
return Result.from_failure(
return Failure(
ValueError(
f"Coordinate values for dimension '{inner_dim_label}' in the inner map "
"exceed the number of coordinate values in the outer map. "
Expand All @@ -314,7 +314,7 @@ def determine_region(
if not set(inner_dim_coords).issubset(set(outer_dim_coords)):
diff_coords = list(set(inner_dim_coords).difference(set(outer_dim_coords)))
first_diff_index: int = inner_dim_coords.index(diff_coords[0])
return Result.from_failure(
return Failure(
ValueError(
f"Coordinate values for dimension '{inner_dim_label}' not all present "
"within outer dimension map. The inner map must be entirely contained "
Expand All @@ -338,7 +338,7 @@ def determine_region(
# TODO: of which might loop around the edges of the grid. In this case, it would
# TODO: be useful to determine if the run is non-contiguous only in that it wraps
# TODO: around that boundary, and in that case, split it and write it in two goes.
return Result.from_failure(
return Failure(
ValueError(
f"Coordinate values for dimension '{inner_dim_label}' do not correspond "
f"with a contiguous index set in the outer dimension map. "
Expand All @@ -349,7 +349,7 @@ def determine_region(

slices[inner_dim_label] = slice(outer_dim_indices[0], outer_dim_indices[-1] + 1)

return Result.from_value(slices)
return Success(slices)

def default_chunking(self) -> dict[str, int]:
"""The expected chunk sizes for each dimension.
Expand Down
Loading

0 comments on commit 9684a97

Please sign in to comment.