Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix CLI and logging #60

Merged
merged 5 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
* Adding an end date to `CMIP6_UofT`'s temporal extent for better rendering in STAC Browser
* Updates to datacube extension helper routines for `CMIP6_UofT`.
* Make pyessv-archive a requirement for *only* the cmip6 implementation instead of for the whole CLI
* Fix bug where logger setup failed
* Simplify CLI argument constructor code (for cleaner and more testable code)
* Add tests for CLI and implementations when invoked through the CLI
* Refactored code dealing with requests and authentication to the `requests.py` file
mishaschwartz marked this conversation as resolved.
Show resolved Hide resolved
* Add `--log_file` command line option to specify a non-default location to write log files to
mishaschwartz marked this conversation as resolved.
Show resolved Hide resolved

## [0.6.0](https://github.com/crim-ca/stac-populator/tree/0.6.0) (2024-02-22)

Expand Down
226 changes: 38 additions & 188 deletions STACpopulator/cli.py
Original file line number Diff line number Diff line change
@@ -1,217 +1,67 @@
import argparse
import glob
import functools
import importlib
import logging
import os
import sys
from types import ModuleType
import warnings
from datetime import datetime
from http import cookiejar
from typing import Callable, Optional
from datetime import datetime, timezone
from typing import Callable

import requests
from requests.auth import AuthBase, HTTPBasicAuth, HTTPDigestAuth, HTTPProxyAuth
from requests.sessions import Session

from STACpopulator import __version__
from STACpopulator import __version__, implementations
from STACpopulator.exceptions import STACPopulatorError
from STACpopulator.logging import setup_logging

POPULATORS = {}


class HTTPBearerTokenAuth(AuthBase):
def __init__(self, token: str) -> None:
self._token = token

def __call__(self, r: requests.PreparedRequest) -> requests.PreparedRequest:
r.headers["Authorization"] = f"Bearer {self._token}"
return r


class HTTPCookieAuth(cookiejar.MozillaCookieJar):
"""
Employ a cookie-jar file for authorization.

Examples of useful command:

.. code-block:: shell

curl --cookie-jar /path/to/cookie-jar.txt [authorization-provider-arguments]

curl \
-k \
-X POST \
--cookie-jar /tmp/magpie-cookie.txt \
-d '{"user_name":"...","password":"..."}' \
-H 'Accept:application/json' \
-H 'Content-Type:application/json' \
'https://{hostname}/magpie/signin'

.. note::
Due to implementation details with :mod:`requests`, this must be passed directly to the ``cookies``
attribute rather than ``auth`` as in the case for other authorization handlers.
"""


def add_request_options(parser: argparse.ArgumentParser) -> None:
"""
Adds arguments to a parser to allow update of a request session definition used across a populator procedure.
"""
parser.add_argument(
"--no-verify",
"--no-ssl",
"--no-ssl-verify",
dest="verify",
action="store_false",
help="Disable SSL verification (not recommended unless for development/test servers).",
)
parser.add_argument("--cert", type=argparse.FileType(), required=False, help="Path to a certificate file to use.")
parser.add_argument(
"--auth-handler",
choices=["basic", "digest", "bearer", "proxy", "cookie"],
required=False,
help="Authentication strategy to employ for the requests session.",
)
parser.add_argument(
"--auth-identity",
required=False,
help="Bearer token, cookie-jar file or proxy/digest/basic username:password for selected authorization handler.",
)


def apply_request_options(session: Session, namespace: argparse.Namespace) -> None:
"""
Applies the relevant request session options from parsed input arguments.
"""
session.verify = namespace.verify
session.cert = namespace.cert
if namespace.auth_handler in ["basic", "digest", "proxy"]:
usr, pwd = namespace.auth_identity.split(":", 1)
if namespace.auth_handler == "basic":
session.auth = HTTPBasicAuth(usr, pwd)
elif namespace.auth_handler == "digest":
session.auth = HTTPDigestAuth(usr, pwd)
else:
session.auth = HTTPProxyAuth(usr, pwd)
elif namespace.auth_handler == "bearer":
session.auth = HTTPBearerTokenAuth(namespace.auth_identity)
elif namespace.auth_handler == "cookie":
session.cookies = HTTPCookieAuth(namespace.auth_identity)
session.cookies.load(namespace.auth_identity)


def make_main_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(prog="stac-populator", description="STACpopulator operations.")
def add_parser_args(parser: argparse.ArgumentParser) -> dict[str, Callable]:
parser.add_argument(
"--version",
"-V",
action="version",
version=f"%(prog)s {__version__}",
help="prints the version of the library and exits",
)
commands = parser.add_subparsers(title="command", dest="command", description="STAC populator command to execute.")

run_cmd_parser = make_run_command_parser(parser.prog)
commands.add_parser(
"run",
prog=f"{parser.prog} {run_cmd_parser.prog}",
parents=[run_cmd_parser],
formatter_class=run_cmd_parser.formatter_class,
usage=run_cmd_parser.usage,
add_help=False,
help=run_cmd_parser.description,
description=run_cmd_parser.description,
parser.add_argument("--debug", action="store_const", const=logging.DEBUG, help="set logger level to debug")
parser.add_argument(
"--log_file", help="file to write log output to. By default logs will be written to the current directory."
mishaschwartz marked this conversation as resolved.
Show resolved Hide resolved
)
commands_subparser = parser.add_subparsers(
title="command", dest="command", description="STAC populator command to execute.", required=True
)
run_parser = commands_subparser.add_parser("run", description="Run a STACpopulator implementation")
populators_subparser = run_parser.add_subparsers(
title="populator", dest="populator", description="Implementation to run."
)
for implementation_module_name, module in implementation_modules().items():
implementation_parser = populators_subparser.add_parser(implementation_module_name)
module.add_parser_args(implementation_parser)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Starting to look into this, and I'm not really fond of the logic depending on a function named "add_parser_args" being present in the module. I don't have a better alternative yet though.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The previous approach assumed a make_parser instead. The logic just changed the name to represent what the function does, but has similar dependencies.

The only workaround I can think of is to define an abstract ArgParser class with a add_parser_args method, and each implementation must define their ArgParser inheriting from it. The CLI could then look up the modules and filter by issubclass of ArgParser to find relevant references. That being said, it just defines an explicit structure, but the result would be the same.


# add more commands as needed...
parser.add_argument("--debug", action="store_true", help="Set logger level to debug")

return parser


def make_run_command_parser(parent) -> argparse.ArgumentParser:
"""
Groups all sub-populator CLI listed in :py:mod:`STACpopulator.implementations` as a common ``stac-populator`` CLI.

Dispatches the provided arguments to the appropriate sub-populator CLI as requested. Each sub-populator CLI must
implement functions ``make_parser`` and ``main`` to generate the arguments and dispatch them to the corresponding
caller. The ``main`` function should accept a sequence of string arguments, which can be passed to the parser
obtained from ``make_parser``.

An optional ``runner`` can also be defined in each populator module. If provided, the namespace arguments that have
already been parsed to resolve the populator to run will be used directly, avoiding parsing arguments twice.
"""
parser = argparse.ArgumentParser(prog="run", description="STACpopulator implementation runner.")
subparsers = parser.add_subparsers(title="populator", dest="populator", description="Implementation to run.")
populators_impl = "implementations"
populators_dir = os.path.join(os.path.dirname(__file__), populators_impl)
populator_mods = glob.glob(f"{populators_dir}/**/[!__init__]*.py", recursive=True) # potential candidate scripts
for populator_path in sorted(populator_mods):
populator_script = populator_path.split(populators_dir, 1)[1][1:]
populator_py_mod = os.path.splitext(populator_script)[0].replace(os.sep, ".")
populator_name, pop_mod_file = populator_py_mod.rsplit(".", 1)
populator_root = f"STACpopulator.{populators_impl}.{populator_name}"
pop_mod_file_loc = f"{populator_root}.{pop_mod_file}"
@functools.cache
def implementation_modules() -> dict[str, ModuleType]:
modules = {}
for implementation_module_name in implementations.__all__:
try:
populator_module = importlib.import_module(pop_mod_file_loc, populator_root)
except STACPopulatorError as e:
warnings.warn(f"Could not load extension {populator_name} because of error {e}")
continue
parser_maker: Callable[[], argparse.ArgumentParser] = getattr(populator_module, "make_parser", None)
populator_runner = getattr(populator_module, "runner", None) # optional, call main directly if not available
populator_caller = getattr(populator_module, "main", None)
if callable(parser_maker) and callable(populator_caller):
populator_parser = parser_maker()
populator_prog = f"{parent} {parser.prog} {populator_name}"
subparsers.add_parser(
populator_name,
prog=populator_prog,
parents=[populator_parser],
formatter_class=populator_parser.formatter_class,
add_help=False, # add help disabled otherwise conflicts with this main populator help
help=populator_parser.description,
description=populator_parser.description,
usage=populator_parser.usage,
modules[implementation_module_name] = importlib.import_module(
f".{implementation_module_name}", implementations.__package__
)
POPULATORS[populator_name] = {
"name": populator_name,
"caller": populator_caller,
"parser": populator_parser,
"runner": populator_runner,
}
return parser
except STACPopulatorError as e:
warnings.warn(f"Could not load extension {implementation_module_name} because of error {e}")
return modules


def main(*args: str) -> Optional[int]:
parser = make_main_parser()
args = args or sys.argv[1:] # same as was parse args does, but we must provide them to subparser
ns = parser.parse_args(args=args) # if 'command' or 'populator' unknown, auto prints the help message with exit(2)
params = vars(ns)
populator_cmd = params.pop("command")
if not populator_cmd:
parser.print_help()
return 0
result = None
if populator_cmd == "run":
populator_name = params.pop("populator")
def run(ns: argparse.Namespace) -> int:
if ns.command == "run":
logfile_name = ns.log_file or f"{ns.populator}_log_{datetime.now(timezone.utc).isoformat() + 'Z'}.jsonl"
setup_logging(logfile_name, ns.debug or logging.INFO)
return implementation_modules()[ns.populator].runner(ns) or 0

# Setup the application logger:
fname = f"{populator_name}_log_{datetime.utcnow().isoformat() + 'Z'}.jsonl"
log_level = logging.DEBUG if ns.debug else logging.INFO
setup_logging(fname, log_level)

if not populator_name:
parser.print_help()
return 0
populator_args = args[2:] # skip [command] [populator]
populator_caller = POPULATORS[populator_name]["caller"]
populator_runner = POPULATORS[populator_name]["runner"]
if populator_runner:
result = populator_runner(ns)
else:
result = populator_caller(*populator_args)
return 0 if result is None else result
def main(*args: str) -> int:
parser = argparse.ArgumentParser()
add_parser_args(parser)
ns = parser.parse_args(args or None)
return run(ns)


if __name__ == "__main__":
Expand Down
3 changes: 3 additions & 0 deletions STACpopulator/implementations/CMIP6_UofT/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .add_CMIP6 import add_parser_args, runner

__all__ = ["add_parser_args", "runner"]
25 changes: 13 additions & 12 deletions STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
import json
import logging
import os
from typing import Any, MutableMapping, NoReturn, Optional, Union
import sys
from typing import Any, MutableMapping, Optional, Union

from pystac import STACValidationError
from pystac.extensions.datacube import DatacubeExtension
from requests.sessions import Session

from STACpopulator.cli import add_request_options, apply_request_options
from STACpopulator.requests import add_request_options, apply_request_options
from STACpopulator.extensions.cmip6 import CMIP6Helper, CMIP6Properties
from STACpopulator.extensions.datacube import DataCubeHelper
from STACpopulator.extensions.thredds import THREDDSExtension, THREDDSHelper
Expand Down Expand Up @@ -78,17 +79,17 @@ def create_stac_item(

try:
item.validate()
except STACValidationError:
except STACValidationError as e:
raise Exception("Failed to validate STAC item") from e

# print(json.dumps(item.to_dict()))
return json.loads(json.dumps(item.to_dict()))


def make_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="CMIP6 STAC populator from a THREDDS catalog or NCML XML.")
parser.add_argument("stac_host", type=str, help="STAC API address")
parser.add_argument("href", type=str, help="URL to a THREDDS catalog or a NCML XML with CMIP6 metadata.")
def add_parser_args(parser: argparse.ArgumentParser) -> None:
parser.description="CMIP6 STAC populator from a THREDDS catalog or NCML XML."
parser.add_argument("stac_host", help="STAC API URL")
parser.add_argument("href", help="URL to a THREDDS catalog or a NCML XML with CMIP6 metadata.")
parser.add_argument("--update", action="store_true", help="Update collection and its items")
parser.add_argument(
"--mode",
Expand All @@ -105,10 +106,9 @@ def make_parser() -> argparse.ArgumentParser:
),
)
add_request_options(parser)
return parser


def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn:
def runner(ns: argparse.Namespace) -> int:
LOGGER.info(f"Arguments to call: {vars(ns)}")

with Session() as session:
Expand All @@ -123,13 +123,14 @@ def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn:
ns.stac_host, data_loader, update=ns.update, session=session, config_file=ns.config, log_debug=ns.debug
)
c.ingest()
return 0


def main(*args: str) -> Optional[int]:
parser = make_parser()
def main(*args: str) -> int:
parser = argparse.ArgumentParser()
fmigneault marked this conversation as resolved.
Show resolved Hide resolved
ns = parser.parse_args(args or None)
return runner(ns)


if __name__ == "__main__":
main()
sys.exit(main())
3 changes: 3 additions & 0 deletions STACpopulator/implementations/DirectoryLoader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .crawl_directory import add_parser_args, runner
mishaschwartz marked this conversation as resolved.
Show resolved Hide resolved

__all__ = ["add_parser_args", "runner"]
Loading
Loading