Skip to content

Commit

Permalink
add Verifier unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
troyraen committed Sep 19, 2024
1 parent 2c77cd6 commit 6fd3d27
Showing 1 changed file with 87 additions and 0 deletions.
87 changes: 87 additions & 0 deletions tests/hipscat_import/verification/test_run_verification.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,96 @@
from pathlib import Path

import numpy as np
import pandas as pd
import pytest

import hipscat_import.verification.run_verification as runner
from hipscat_import.verification.arguments import VerificationArguments


def test_verifier_with_valid_catalog(tmp_path, small_sky_object_catalog):
"""Verifier tests should pass with valid catalogs"""
args = VerificationArguments(
input_catalog_path=small_sky_object_catalog,
output_path=tmp_path,
use_schema_file=small_sky_object_catalog / "_metadata",
expected_total_rows=131,
# next arg exists but is currently unused. we just get the distributions of all fields.
# field_distribution_cols=[],
)
_test_verifier(args, assert_passed=True)


def test_verifier_with_invalid_catalog(tmp_path, invalid_catalog):
"""Verifier tests should fail with invalid catalogs"""
args = VerificationArguments(
input_catalog_path=invalid_catalog,
output_path=tmp_path,
use_schema_file=invalid_catalog / "_metadata",
expected_total_rows=131,
# next arg exists but is currently unused. we just get the distributions of all fields.
# field_distribution_cols=[],
)
_test_verifier(args, assert_passed=False)


def _test_verifier(args, assert_passed=True):
verifier = runner.run(args, write_mode="w")
_test_catalog_results(results=verifier.results_df, assert_passed=assert_passed)
_test_schema_results(results=verifier.results_df, assert_passed=assert_passed)
_test_row_count_results(results=verifier.results_df, assert_passed=assert_passed)
_test_verifier_reports_exist(verifier)


def _test_catalog_results(results, assert_passed=True):
"""hipscat is_valid_catalog should have passed."""
catalog_res = results.loc[results.test == "is valid catalog"]
_passed = catalog_res.passed.squeeze()
assert _passed if assert_passed else ~_passed


def _test_schema_results(results, assert_passed=True):
"""Schema checks should have passed."""
schema_res = results.loc[results.test == "schema"]

_passed = schema_res.loc[schema_res.targets == "_common_metadata vs input"].passed.squeeze()
assert _passed if assert_passed else ~_passed

_passed = schema_res.loc[schema_res.targets == "file footers vs input"].passed.squeeze()
assert _passed if assert_passed else ~_passed

_passed = schema_res.loc[schema_res.targets == "_metadata vs input"].passed.squeeze()
assert _passed if assert_passed else ~_passed


def _test_row_count_results(results, assert_passed=True):
"""Row-count checks should have passed."""
nrows_res = results.loc[results.test == "num rows"]

_passed = nrows_res.loc[nrows_res.targets == "_metadata vs file footers"].passed.squeeze()
assert _passed if assert_passed else ~_passed

_passed = nrows_res.loc[nrows_res.targets == "user total vs file footers"].passed.squeeze()
assert _passed if assert_passed else ~_passed


def _test_verifier_reports_exist(verifier):
"""Verifier should have written two reports to file."""
# [FIXME] handle paths better.

# verifier.record_results() writes this file
results = pd.read_csv(Path(verifier.args.output_path) / "verifier_results.csv")
assert results.equals(verifier.results_df)

# verifier.record_distributions() writes this file
distributions = pd.read_csv(
Path(verifier.args.output_path) / "field_distributions.csv", index_col="field"
)
# values are floats, so use np.allclose
assert np.allclose(distributions.minimum, verifier.distributions_df.minimum, equal_nan=True)
assert np.allclose(distributions.maximum, verifier.distributions_df.maximum, equal_nan=True)


def test_bad_args():
"""Runner should fail with empty or mis-typed arguments"""
with pytest.raises(TypeError, match="VerificationArguments"):
Expand Down

0 comments on commit 6fd3d27

Please sign in to comment.