Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

code cleanup #66

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
* Add tests for CLI and implementations when invoked through the CLI
* Refactored code dealing with requests and authentication to the `STACpopulator/requests.py` file
* Add `--log-file` command line option to specify a non-default location to write log files to
* fix incorrect example in README
* move argument parsing for logging options to the implementation code
* fix bug where logging options were being set incorrectly
* rename files to avoid potential naming conflicts with other packages (`logging` and `requests`)


## [0.6.0](https://github.com/crim-ca/stac-populator/tree/0.6.0) (2024-02-22)

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ For example, using the [CMIP6_UofT][CMIP6_UofT] implementation, the script can b
python STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py \
"http://localhost:8880/stac/" \
"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html" \
"STACpopulator/implementations/CMIP6_UofT/collection_config.yml"
--config "STACpopulator/implementations/CMIP6_UofT/collection_config.yml"
```

*Note*: <br>
Expand Down
13 changes: 3 additions & 10 deletions STACpopulator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,20 @@
import sys
from types import ModuleType
import warnings
from datetime import datetime, timezone
from typing import Callable

from STACpopulator import __version__, implementations
from STACpopulator.exceptions import STACPopulatorError
from STACpopulator.logging import setup_logging
from STACpopulator.log import setup_logging


def add_parser_args(parser: argparse.ArgumentParser) -> dict[str, Callable]:
def add_parser_args(parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--version",
"-V",
action="version",
version=f"%(prog)s {__version__}",
help="prints the version of the library and exits",
)
parser.add_argument("--debug", action="store_const", const=logging.DEBUG, help="set logger level to debug")
parser.add_argument(
"--log-file", help="file to write log output to. By default logs will be written to the current directory."
)
commands_subparser = parser.add_subparsers(
title="command", dest="command", description="STAC populator command to execute.", required=True
)
Expand Down Expand Up @@ -52,8 +46,7 @@ def implementation_modules() -> dict[str, ModuleType]:

def run(ns: argparse.Namespace) -> int:
if ns.command == "run":
logfile_name = ns.log_file or f"{ns.populator}_log_{datetime.now(timezone.utc).isoformat() + 'Z'}.jsonl"
setup_logging(logfile_name, ns.debug or logging.INFO)
setup_logging(ns.log_file, ns.debug or logging.INFO)
return implementation_modules()[ns.populator].runner(ns) or 0


Expand Down
14 changes: 8 additions & 6 deletions STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from pystac.extensions.datacube import DatacubeExtension
from requests.sessions import Session

from STACpopulator.requests import add_request_options, apply_request_options
from STACpopulator import cli
from STACpopulator.log import add_logging_options
from STACpopulator.request_utils import add_request_options, apply_request_options
from STACpopulator.extensions.cmip6 import CMIP6Helper, CMIP6Properties
from STACpopulator.extensions.datacube import DataCubeHelper
from STACpopulator.extensions.thredds import THREDDSExtension, THREDDSHelper
Expand All @@ -31,7 +33,6 @@ def __init__(
update: Optional[bool] = False,
session: Optional[Session] = None,
config_file: Optional[Union[os.PathLike[str], str]] = None,
log_debug: Optional[bool] = False,
) -> None:
"""Constructor
Expand All @@ -40,7 +41,7 @@ def __init__(
:param data_loader: loader to iterate over ingestion data.
"""
super().__init__(
stac_host, data_loader, update=update, session=session, config_file=config_file, log_debug=log_debug
stac_host, data_loader, update=update, session=session, config_file=config_file
)

def create_stac_item(
Expand Down Expand Up @@ -106,6 +107,7 @@ def add_parser_args(parser: argparse.ArgumentParser) -> None:
),
)
add_request_options(parser)
add_logging_options(parser)


def runner(ns: argparse.Namespace) -> int:
Expand All @@ -120,7 +122,7 @@ def runner(ns: argparse.Namespace) -> int:
data_loader = ErrorLoader()

c = CMIP6populator(
ns.stac_host, data_loader, update=ns.update, session=session, config_file=ns.config, log_debug=ns.debug
ns.stac_host, data_loader, update=ns.update, session=session, config_file=ns.config
)
c.ingest()
return 0
Expand All @@ -130,8 +132,8 @@ def main(*args: str) -> int:
parser = argparse.ArgumentParser()
add_parser_args(parser)
ns = parser.parse_args(args or None)
return runner(ns)

ns.populator = "CMIP6_UofT"
return cli.run(ns)
Comment on lines +135 to +136
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is potentially dangerous if we add other sub-ArgumentParser commands than the top-level CLI run command which might not have the populator attribute.

Since the run function (not to confuse with the run subparser command) only does implementation_modules()[ns.populator].runner(ns), there's basically no difference from doing runner(ns) (as before), and there's no need to inject the populator argument anymore.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is necessary so that the logging options are applied when this file is executed directly from the command line.

I think this is potentially dangerous if we add other sub-ArgumentParser commands than the top-level CLI run command which might not have the populator attribute.

Agreed, if that happens, then we'd have to modify this code as well. The simpler and more maintainable thing to do would be to remove the option to call populator scripts directly from the command line.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure to understand why the logging options cause this? populator is only defined by the run subparser. What causes populator to become an issue if calling the script directly since run would be bypassed entirely?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I gave it a try, and no error on my end (beside the unrelated schema validation of course).

{BE7604F5-C019-4211-A68C-3435813DA442}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure to understand why the logging options cause this?

There's no error but the logging options are applied here

setup_logging(ns.log_file, ns.debug or logging.INFO)

So if you don't run it through the CLI functions then the logging options are never applied. We could have added a call to setup_logging in every runner function as well but that seemed like we're adding boilerplate (which we're actively trying to remove in #64)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. But can we at least resolve the populator name automatically rather than a hard coded value? I really dislike having different name resolution strategies by different part of the code. If the directory name is the "reference" for the populator name, then everywhere should resolve it the same way.


if __name__ == "__main__":
sys.exit(main())
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

from requests.sessions import Session

from STACpopulator.requests import add_request_options, apply_request_options
from STACpopulator import cli
from STACpopulator.log import add_logging_options
from STACpopulator.request_utils import add_request_options, apply_request_options
from STACpopulator.input import STACDirectoryLoader
from STACpopulator.models import GeoJSONPolygon
from STACpopulator.populator_base import STACpopulatorBase
Expand Down Expand Up @@ -51,6 +53,7 @@ def add_parser_args(parser: argparse.ArgumentParser) -> None:
help="Limit search of STAC Collections only to first top-most matches in the crawled directory structure.",
)
add_request_options(parser)
add_logging_options(parser)


def runner(ns: argparse.Namespace) -> int:
Expand All @@ -70,7 +73,8 @@ def main(*args: str) -> int:
parser = argparse.ArgumentParser()
add_parser_args(parser)
ns = parser.parse_args(args or None)
return runner(ns)
ns.populator = "DirectoryLoader"
return cli.run(ns)


if __name__ == "__main__":
Expand Down
12 changes: 12 additions & 0 deletions STACpopulator/logging.py → STACpopulator/log.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import argparse
import datetime as dt
import json
import logging.config
Expand Down Expand Up @@ -126,3 +127,14 @@ def filter(self, record: logging.LogRecord) -> bool | logging.LogRecord:
},
"loggers": {"root": {"level": "DEBUG", "handlers": ["stderr", "file"]}},
}

def add_logging_options(parser: argparse.ArgumentParser) -> None:
"""
Adds arguments to a parser to configure logging options.
"""
parser.add_argument("--debug", action="store_const", const=logging.DEBUG, help="set logger level to debug")
parser.add_argument(
"--log-file",
default=f"stac_populator_log_{dt.datetime.now(dt.timezone.utc).isoformat() + 'Z'}.jsonl",
help="file to write log output to. By default logs will be written to the current directory."
)
4 changes: 1 addition & 3 deletions STACpopulator/populator_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,9 @@ def __init__(
self,
stac_host: str,
data_loader: GenericLoader,
update: Optional[bool] = False,
update: bool = False,
session: Optional[Session] = None,
config_file: Optional[Union[os.PathLike[str], str]] = "collection_config.yml",
log_debug: Optional[bool] = False,
) -> None:
"""Constructor
Expand All @@ -41,7 +40,6 @@ def __init__(
:raises RuntimeError: Raised if one of the required definitions is not found in the collection info filename
"""

super().__init__()
self._collection_config_path = config_file
self._collection_info: MutableMapping[str, Any] = None
self._session = session
Expand Down
File renamed without changes.
Loading