From dfae8128137aa3a2f86233613204071c8837e20c Mon Sep 17 00:00:00 2001 From: Rohit Sanjay Date: Mon, 18 Sep 2023 16:43:33 -0700 Subject: [PATCH] WIP tests --- papermill_origami/dependencies.py | 17 +- papermill_origami/engine.py | 11 +- poetry.lock | 19 +- pyproject.toml | 2 + .../tests => tests}/__init__.py | 0 tests/e2e/__init__.py | 0 tests/e2e/conftest.py | 199 ++++++++++++++++++ tests/e2e/test_execute.py | 88 ++++++++ .../test_papermill_registration.py | 0 .../tests => tests}/test_path_util.py | 0 10 files changed, 331 insertions(+), 5 deletions(-) rename {papermill_origami/tests => tests}/__init__.py (100%) create mode 100644 tests/e2e/__init__.py create mode 100644 tests/e2e/conftest.py create mode 100644 tests/e2e/test_execute.py rename {papermill_origami/tests => tests}/test_papermill_registration.py (100%) rename {papermill_origami/tests => tests}/test_path_util.py (100%) diff --git a/papermill_origami/dependencies.py b/papermill_origami/dependencies.py index d38b6ff..e8b36f3 100644 --- a/papermill_origami/dependencies.py +++ b/papermill_origami/dependencies.py @@ -4,6 +4,7 @@ class Settings(BaseSettings): token: str + public_url: str = "https://app.noteable.io" api_url: str = "https://app.noteable.io/gate/api" timeout: int = 60 # TODO: update this to papermill_origami once Gate @@ -14,13 +15,25 @@ class Config: env_prefix = "noteable_" +_settings = None + + +def get_settings() -> Settings: + global _settings + + if _settings is None: + _settings = Settings() + + return _settings + + _singleton_api_client = None -def get_api_client(): +def get_api_client() -> APIClient: global _singleton_api_client - settings = Settings() + settings = get_settings() if _singleton_api_client is None: _singleton_api_client = APIClient( authorization_token=settings.token, diff --git a/papermill_origami/engine.py b/papermill_origami/engine.py index 2afe0ec..a4507c8 100644 --- a/papermill_origami/engine.py +++ b/papermill_origami/engine.py @@ -9,7 +9,7 @@ from origami.models.notebook import CodeCell, Notebook from papermill.engines import Engine, NotebookExecutionManager -from papermill_origami.dependencies import get_api_client +from papermill_origami.dependencies import get_api_client, get_settings from papermill_origami.path_util import parse_noteable_file_path engine_logger = logging.getLogger(__name__) @@ -17,6 +17,7 @@ class NoteableEngine(Engine): def __init__(self): + self.settings = get_settings() self.api_client = get_api_client() async def create_parameterized_notebook( @@ -72,7 +73,7 @@ async def async_execute_managed_notebook( kernel_session_id = None try: # Delay needed to allow RBAC rows for the new file to be created :( - await asyncio.sleep(1) + await asyncio.sleep(2) rtu_client = await self.api_client.connect_realtime(parameterized_notebook["id"]) @@ -148,6 +149,12 @@ async def async_execute_managed_notebook( if kernel_session_id: await self.api_client.shutdown_kernel(kernel_session_id) + # Set the executed_notebook_url and parameterized_notebook_id metadata + # for downstream consumers of the papermill managed notebook + parameterized_url = f"{self.settings.public_url}/f/{parameterized_notebook['id']}" + notebook_execution_manager.nb.metadata["executed_notebook_url"] = parameterized_url + notebook_execution_manager.nb.metadata["parameterized_notebook_id"] = parameterized_notebook['id'] + return notebook_execution_manager.nb @classmethod diff --git a/poetry.lock b/poetry.lock index 57a76f3..e837ecd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5794,6 +5794,23 @@ files = [ {file = "statsd-3.3.0.tar.gz", hash = "sha256:e3e6db4c246f7c59003e51c9720a51a7f39a396541cb9b147ff4b14d15b5dd1f"}, ] +[[package]] +name = "structlog" +version = "23.1.0" +description = "Structured Logging for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "structlog-23.1.0-py3-none-any.whl", hash = "sha256:79b9e68e48b54e373441e130fa447944e6f87a05b35de23138e475c05d0f7e0e"}, + {file = "structlog-23.1.0.tar.gz", hash = "sha256:270d681dd7d163c11ba500bc914b2472d2b50a8ef00faa999ded5ff83a2f906b"}, +] + +[package.extras] +dev = ["structlog[docs,tests,typing]"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-mermaid", "twisted"] +tests = ["coverage[toml]", "freezegun (>=0.2.8)", "pretend", "pytest (>=6.0)", "pytest-asyncio (>=0.17)", "simplejson"] +typing = ["mypy", "rich", "twisted"] + [[package]] name = "tabulate" version = "0.9.0" @@ -6605,4 +6622,4 @@ prefect = ["prefect-jupyter"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "cc3a53d80a3cb19d8976fee98f7300c8ef56dc738d435546f30eeff2d8e6295c" +content-hash = "a14efc84d896ce2e094929bc19dd2d29541151076ccbcdbfc59a53a6d9a16562" diff --git a/pyproject.toml b/pyproject.toml index ff5f1b7..b12d8bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ flytekitplugins-papermill = {version = "^1.2.1", optional = true} apache-airflow = { version = "^2.4.2", optional = true } prefect-jupyter = { version = "^0.2.0", optional = true } noteable-origami = "^1.0.0" +structlog = "^23.1.0" [tool.poetry.dev-dependencies] flake8-docstrings = "^1.6.0" @@ -66,6 +67,7 @@ prefect = ["prefect-jupyter"] [tool.poetry.plugins."papermill.io"] "https://" = "papermill_origami.iorw:NoteableHandler" +"http://" = "papermill_origami.iorw:NoteableHandler" [tool.poetry.plugins."papermill.engine"] noteable = "papermill_origami.engine:NoteableEngine" diff --git a/papermill_origami/tests/__init__.py b/tests/__init__.py similarity index 100% rename from papermill_origami/tests/__init__.py rename to tests/__init__.py diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py new file mode 100644 index 0000000..ee0b86f --- /dev/null +++ b/tests/e2e/conftest.py @@ -0,0 +1,199 @@ +import logging +import logging.config +import os +import uuid +from typing import Optional + +import httpx +import pytest +import structlog + +from origami.clients.api import APIClient +from origami.models.api.files import File +from origami.models.api.projects import Project +from origami.models.notebook import Notebook + +logger = structlog.get_logger() + + +@pytest.fixture(autouse=True, scope='session') +def setup_logging(): + """Configure structlog in tests the same way we do in production apps""" + structlog.configure( + processors=[ + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.StackInfoRenderer(), + structlog.processors.format_exc_info, + structlog.stdlib.ProcessorFormatter.wrap_for_formatter, + ], + logger_factory=structlog.stdlib.LoggerFactory(), + wrapper_class=structlog.stdlib.BoundLogger, + cache_logger_on_first_use=True, + ) + + # shared processors to be applied to both vanilla and structlog messages + # after each is appropriately pre-processed + processors = [ + # log level / logger name, effects coloring in ConsoleRenderer(colors=True) + structlog.stdlib.add_log_level, + structlog.stdlib.add_logger_name, + # timestamp format + structlog.processors.TimeStamper(fmt="iso"), + # To see all CallsiteParameterAdder options: + # https://www.structlog.org/en/stable/api.html#structlog.processors.CallsiteParameterAdder + # more options include module, pathname, process, process_name, thread, thread_name + structlog.processors.CallsiteParameterAdder( + { + structlog.processors.CallsiteParameter.FILENAME, + structlog.processors.CallsiteParameter.FUNC_NAME, + structlog.processors.CallsiteParameter.LINENO, + } + ), + # Any structlog.contextvars.bind_contextvars included in middleware/functions + structlog.contextvars.merge_contextvars, + # strip _record and _from_structlog keys from event dictionary + structlog.stdlib.ProcessorFormatter.remove_processors_meta, + structlog.dev.ConsoleRenderer(colors=True), + # ^^ In prod with any kind of logging service (datadog, grafana, etc), ConsoleRenderer + # would probably be replaced with structlog.processors.JSONRenderer() or similar + ] + + # Configs applied to logs generated by structlog or vanilla logging + logging.config.dictConfig( + { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "default": { + "()": structlog.stdlib.ProcessorFormatter, + "processors": processors, + "foreign_pre_chain": [structlog.stdlib.ExtraAdder()], + }, + }, + "handlers": { + "default": { + "class": "logging.StreamHandler", + "formatter": "default", + "stream": "ext://sys.stdout", + }, + }, + "loggers": { + # "" for applying handler to "root" (all libraries) + # you could set this to "kernel_sidecar" to only see logs from this library + "": { + "handlers": ["default"], + "level": 'INFO', + "propagate": True, + }, + }, + } + ) + + +@pytest.fixture +def jwt(): + token = os.environ.get('NOTEABLE_TOKEN') + if not token: + raise RuntimeError('NOTEABLE_TOKEN environment variable not set') + return token + + +@pytest.fixture +def api_base_url(): + # TODO: use env var or otherwise make configurable for CI + return 'http://localhost:8001/api' + + +@pytest.fixture +def test_space_id() -> uuid.UUID: + # TODO: use env var or otherwise make configurable for CI + return uuid.UUID('1f8300cd-454e-4b14-8adf-57d953d87a07') + + +@pytest.fixture +def test_project_id() -> uuid.UUID: + # TODO: use env var or otherwise make configurable for CI + return uuid.UUID('57da9e36-bd84-4f26-be5f-d4e92d1f4b95') + + +@pytest.fixture +def test_user_id() -> uuid.UUID: + # TODO: use env var or otherwise make configurable for CI + return uuid.UUID('5bd43c56-9fce-4e7e-b1d7-c92f567aac68') + + +class LogWarningTransport(httpx.AsyncHTTPTransport): + """ + Automatically log information about any non-2xx response. + """ + + async def handle_async_request(self, request: httpx.Request) -> httpx.Response: + resp = await super().handle_async_request(request) + if resp.is_error: + response_content = await resp.aread() + logger.warning(f'{request.method} {request.url} {resp.status_code} {response_content}') + return resp + + +@pytest.fixture +def api_client(api_base_url, jwt) -> APIClient: + return APIClient( + authorization_token=jwt, + api_base_url=api_base_url, + transport=LogWarningTransport(), + ) + + +@pytest.fixture +async def new_project(api_client: APIClient, test_space_id: uuid.UUID) -> Project: + """Create and cleanup a new Project""" + name = 'test-project-' + str(uuid.uuid4()) + new_project = await api_client.create_project(name=name, space_id=test_space_id) + yield new_project + await api_client.delete_project(new_project.id) + + +@pytest.fixture +async def file_maker(api_client: APIClient, test_project_id: uuid.UUID): + """Create and cleanup non-Notebook files""" + file_ids = [] + + async def make_file( + project_id: Optional[uuid.UUID] = None, path: Optional[str] = None, content: bytes = b"" + ) -> File: + if not project_id: + project_id = test_project_id + if not path: + salt = str(uuid.uuid4()) + path = f'test-file-{salt}.txt' + file = await api_client.create_file(project_id, path, content) + file_ids.append(file.id) + return file + + yield make_file + for file_id in file_ids: + await api_client.delete_file(file_id) + + +@pytest.fixture +async def notebook_maker(api_client: APIClient, test_project_id: uuid.UUID): + """Create and cleanup Notebook files""" + notebook_ids = [] + + async def make_notebook( + project_id: Optional[uuid.UUID] = None, + path: Optional[str] = None, + notebook: Optional[Notebook] = None, + ) -> File: + if not project_id: + project_id = test_project_id + if not path: + salt = str(uuid.uuid4()) + path = f'test-notebook-{salt}.ipynb' + file = await api_client.create_notebook(project_id, path, notebook) + notebook_ids.append(file.id) + return file + + yield make_notebook + for notebook_id in notebook_ids: + await api_client.delete_file(notebook_id) diff --git a/tests/e2e/test_execute.py b/tests/e2e/test_execute.py new file mode 100644 index 0000000..e913ee9 --- /dev/null +++ b/tests/e2e/test_execute.py @@ -0,0 +1,88 @@ +import pytest +import nbformat +import papermill +from origami.clients.api import APIClient +from origami.models.notebook import Notebook + + +class TestPapermillExecute: + @pytest.mark.parametrize("cells", ( + [ + nbformat.v4.new_code_cell(source="a = 1"), + nbformat.v4.new_code_cell(source="b = 2"), + nbformat.v4.new_code_cell(source="c = a + b"), + nbformat.v4.new_code_cell(source="print(c)"), + ], + [ + nbformat.v4.new_code_cell(source="# Parameters\na = 2\nb = 3\n", metadata={"tags": ["injected-parameters"]}), + nbformat.v4.new_code_cell(source="a = 1"), + nbformat.v4.new_code_cell(source="b = 2"), + nbformat.v4.new_code_cell(source="c = a + b"), + nbformat.v4.new_code_cell(source="print(c)"), + ], + )) + async def test_papermill_execute_with_noteable(self, cells, notebook_maker, api_client: APIClient): + nb = nbformat.v4.new_notebook(cells=cells, metadata={ + "kernel_info": { + "name": "python3" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + }, + }) + notebook = await notebook_maker(notebook=Notebook.parse_obj(nb)) + executed_nb = papermill.execute_notebook( + f"http://localhost:8002/f/{notebook.id}", + "-", + kernel_name="python3", + # Should get added as the first cell and overwrite values of a and b + parameters={"a": 2, "b": 3}, + engine_name="noteable", + log_output=False, + progress_bar=False, + ) + + parameterized_notebook_id = executed_nb.metadata['parameterized_notebook_id'] + rtu_client = await api_client.connect_realtime(parameterized_notebook_id) + # Check that the first cell is the parameters cell + assert rtu_client.builder.nb.cells[0].source == "# Parameters\na = 2\nb = 3\n" + assert rtu_client.kernel_state not in ("busy", "idle") + + async def test_papermill_execution_raises_exception(self, notebook_maker, api_client: APIClient): + nb = nbformat.v4.new_notebook(cells=[ + nbformat.v4.new_code_cell(source="a = 10\nb=20", metadata={"tags": ["parameters"]}), + nbformat.v4.new_code_cell(source="a = 1"), + nbformat.v4.new_code_cell(source="b = 2"), + nbformat.v4.new_code_cell(source="c = a + b"), + nbformat.v4.new_code_cell(source="print(c)"), + nbformat.v4.new_code_cell(source="raise Exception('test')"), + ]) + notebook = await notebook_maker(notebook=nb) + executed_nb = papermill.execute_notebook( + f"http://localhost:8002/f/{notebook.id}", + "-", + # Should get added as the first cell and overwrite values of a and b + parameters={"a": 2, "b": 3}, + engine_name="noteable", + log_output=False, + progress_bar=False, + ) + + parameterized_notebook_id = executed_nb.metadata['parameterized_notebook_id'] + + rtu_client = await api_client.connect_realtime(parameterized_notebook_id) + + # Check that the first cell is the parameters cell + assert rtu_client.builder.nb.cells[0].source == "# Parameters\na = 2\nb = 3\n" + + # Check that there is an active kernel session + assert rtu_client.kernel_state in ("busy", "idle") + + # TODO: figure out how to get the currently active kernel_session_id to shut + # it down. Until then, we'll just let it time out. + diff --git a/papermill_origami/tests/test_papermill_registration.py b/tests/test_papermill_registration.py similarity index 100% rename from papermill_origami/tests/test_papermill_registration.py rename to tests/test_papermill_registration.py diff --git a/papermill_origami/tests/test_path_util.py b/tests/test_path_util.py similarity index 100% rename from papermill_origami/tests/test_path_util.py rename to tests/test_path_util.py