diff --git a/src/standard_names/cmd/sndump.py b/src/standard_names/cmd/sndump.py index c7c79d5..0d79a12 100644 --- a/src/standard_names/cmd/sndump.py +++ b/src/standard_names/cmd/sndump.py @@ -17,28 +17,6 @@ def main(argv: tuple[str] | None = None) -> int: ---------- args : iterable of str, optional Arguments to pass to *parse_args*. If not provided, use ``sys.argv``. - - Examples - -------- - >>> import os - >>> from standard_names.registry import NamesRegistry - >>> from standard_names.registry import _get_latest_names_file - >>> from standard_names.cmd.sndump import main - - >>> (fname, _) = _get_latest_names_file() - >>> registry = NamesRegistry.from_path(fname) - - >>> names = main(['-n', fname]).split(os.linesep) - >>> len(names) == len(registry) - True - - >>> objects = main(['-o', fname]).split(os.linesep) - >>> len(objects) == len(registry.objects) - True - - >>> names = main(['-n', '-o', fname]).split(os.linesep) - >>> len(names) == len(registry) + len(registry.objects) - True """ VALID_FIELDS = { "op": "operators", diff --git a/src/standard_names/cmd/snscrape.py b/src/standard_names/cmd/snscrape.py index c5e6117..7dc2031 100644 --- a/src/standard_names/cmd/snscrape.py +++ b/src/standard_names/cmd/snscrape.py @@ -9,53 +9,59 @@ > data/scraped.yaml ``` """ - +import argparse from collections.abc import Iterable from standard_names.registry import NamesRegistry def main(argv: tuple[str] | None = None) -> int: + parser = argparse.ArgumentParser("Scrape standard names from a file or URL") + parser.add_argument("file", nargs="*", metavar="FILE", help="URL or file to scrape") + + args = parser.parse_args(argv) + + registry = scrape_names(args.file) + print(registry.dumps(format_="text", fields=("names",))) + + return 0 + + +def scrape_names(files: Iterable[str]) -> NamesRegistry: """Scrape standard names from a file or URL. + Parameters + ---------- + files : iterable of str + Files to search for names. + + Returns + ------- + NamesRegistry + A registry of the names found in the files. + Examples -------- - >>> import os >>> import tempfile - >>> import standard_names as csn + >>> from standard_names.cmd.snscrape import scrape_names - >>> contents = \"\"\" + >>> contents = ''' ... A file with text and names (air__temperature) mixed in. Some names ... have double underscores (like, Water__Temperature) by are not ... valid names. Others, like water__temperature, are good. - ... \"\"\" + ... ''' - >>> (fd, fname) = tempfile.mkstemp() - >>> os.close(fd) - - >>> with open(fname, 'w') as fp: + >>> with tempfile.NamedTemporaryFile("w") as fp: ... print(contents, file=fp) - - >>> names = csn.cmd.snscrape.main( - ... [fp.name, '--reader=plain_text', '--no-headers']) - >>> names.split(os.linesep) + ... _ = fp.seek(0) + ... registry = scrape_names([fp.name]) + >>> sorted(registry.names) ['air__temperature', 'water__temperature'] - - >>> os.remove(fname) """ - import argparse - - parser = argparse.ArgumentParser("Scrape standard names from a file or URL") - parser.add_argument("file", nargs="*", metavar="FILE", help="URL or file to scrape") - - args = parser.parse_args(argv) - registry = NamesRegistry([]) - for file in args.file: + for file in files: registry |= NamesRegistry(search_file_for_names(file)) - print(registry.dumps(format_="text", fields=("names",))) - - return 0 + return registry def find_all_names(lines: Iterable[str], engine: str = "peg") -> set[str]: diff --git a/src/standard_names/cmd/snvalidate.py b/src/standard_names/cmd/snvalidate.py index 2564560..4e5c042 100644 --- a/src/standard_names/cmd/snvalidate.py +++ b/src/standard_names/cmd/snvalidate.py @@ -3,39 +3,14 @@ import argparse import os -import sys +from collections.abc import Iterator from standard_names.error import BadRegistryError from standard_names.registry import NamesRegistry def main(argv: tuple[str] | None = None) -> int: - """Validate a list of names. - - Examples - -------- - >>> import os - >>> from standard_names.registry import _get_latest_names_file - >>> from standard_names.cmd.snvalidate import main - - >>> (fname, _) = _get_latest_names_file() - >>> main([fname]) - 0 - - >>> import tempfile - >>> (fd, fname) = tempfile.mkstemp() - >>> os.close(fd) - - >>> with open(fname, 'w') as fp: - ... print('air__temperature', file=fp) - ... print('Water__temperature', file=fp) - ... print('water_temperature', file=fp) - - >>> main([fp.name]) - 2 - - >>> os.remove(fname) - """ + """Validate a list of names.""" parser = argparse.ArgumentParser("Validate a list of standard names") parser.add_argument( @@ -47,14 +22,48 @@ def main(argv: tuple[str] | None = None) -> int: args = parser.parse_args(argv) - error_count = 0 + invalid_names = set() for file in args.file: - try: - NamesRegistry(file) - except BadRegistryError as err: - print(os.linesep.join(err.names), file=sys.stderr) - error_count += len(err.names) - return error_count + invalid_names |= validate_names(file) + + print(os.linesep.join(invalid_names)) + + return len(invalid_names) + + +def validate_names(names: Iterator[str]) -> set[str]: + """Find invalid names. + + Examples + -------- + >>> import os + >>> import tempfile + >>> from standard_names.registry import _get_latest_names_file + >>> from standard_names.cmd.snvalidate import validate_names + + >>> (fname, _) = _get_latest_names_file() + >>> with open(fname) as fp: + ... invalid_names = validate_names(fp) + >>> len(invalid_names) + 0 + + >>> names = [ + ... "air__temperature", + ... "Water__temperature", + ... "water_temperature", + ... ] + >>> invalid_names = validate_names(names) + >>> sorted(invalid_names) + ['Water__temperature', 'water_temperature'] + """ + try: + NamesRegistry(names) + except BadRegistryError as err: + invalid_names = set(err.names) + else: + invalid_names = set() + + return invalid_names if __name__ == "__main__":