diff --git a/.gitignore b/.gitignore index 68bc17f..97a31f3 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ +coverage.json # Translations *.mo @@ -158,3 +159,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +tests/outputs/** diff --git a/pyproject.toml b/pyproject.toml index 94c0479..0bd344c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ Changelog = "https://github.com/stumpylog/gotenberg-client/blob/main/CHANGELOG.m [project.optional-dependencies] compression = ["httpx[http2,brotli] ~= 0.24"] +magic = ["python-magic"] [tool.hatch.version] path = "src/gotenberg_client/__about__.py" @@ -55,12 +56,14 @@ dependencies = [ "pytest-xdist", "pytest-httpx ~= 0.26; python_version >= '3.9'", "pytest-httpx ~= 0.22; python_version < '3.9'", + "pikepdf" + ] [tool.hatch.envs.default.scripts] version = "python3 --version" -test = "pytest {args:tests}" -test-cov = "coverage run -m pytest {args:tests}" +test = "pytest --pythonwarnings=all {args:tests}" +test-cov = "coverage run -m pytest --pythonwarnings=all {args:tests}" cov-clear = "coverage erase" cov-report = [ "- coverage combine", @@ -187,6 +190,8 @@ branch = true parallel = true omit = [ "src/gotenberg_client/__about__.py", + "tests/conftest.py", + "tests/utils.py", ] [tool.coverage.paths] @@ -198,6 +203,7 @@ exclude_lines = [ "no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:", + "if SAVE_OUTPUTS:", ] [tool.mypy] diff --git a/src/gotenberg_client/client.py b/src/gotenberg_client/client.py index 5239f57..d39496d 100644 --- a/src/gotenberg_client/client.py +++ b/src/gotenberg_client/client.py @@ -6,6 +6,8 @@ from httpx import Client +from gotenberg_client.convert.chromium import ChromiumRoutes + class GotenbergClient: def __init__( @@ -28,9 +30,11 @@ def __init__( self._client.headers.update({"Accept": "application/json"}) if compress: - self._client.headers.update({"Accept-Encoding": "gzip,br"}) + # TODO Brotli? + self._client.headers.update({"Accept-Encoding": "gzip"}) # Add the resources + self.chromium = ChromiumRoutes(self._client) # TODO def add_headers(self, header: Dict[str, str]) -> None: # pragma: no cover diff --git a/src/gotenberg_client/convert/chromium.py b/src/gotenberg_client/convert/chromium.py index 7b24fe9..7943ce4 100644 --- a/src/gotenberg_client/convert/chromium.py +++ b/src/gotenberg_client/convert/chromium.py @@ -1,16 +1,21 @@ import dataclasses import enum import json +from contextlib import ExitStack from pathlib import Path from typing import Final from typing import Optional from urllib.parse import quote from httpx import Client +from httpx import Response +from gotenberg_client.convert.common import FORCE_MULTIPART from gotenberg_client.convert.common import PageOrientationOptions from gotenberg_client.convert.common import PageRangeType from gotenberg_client.convert.common import PdfAFormatOptions +from gotenberg_client.convert.common import guess_mime_type +from gotenberg_client.convert.common import optional_page_ranges_to_form from gotenberg_client.convert.common import optional_to_form @@ -103,7 +108,7 @@ def __init__(self, client: Client) -> None: def convert_url( self, - url: str, + url_to_convert: str, *, page_size: Optional[PageSize] = None, margins: Optional[Margin] = None, @@ -113,15 +118,33 @@ def convert_url( orientation: Optional[PageOrientationOptions] = None, scale: Optional[int | float] = None, page_ranges: Optional[PageRangeType] = None, - header: Optional[Path] = None, - footer: Optional[Path] = None, + header: Optional[Path] = None, # noqa: ARG002 + footer: Optional[Path] = None, # noqa: ARG002 render_control: Optional[RenderControl] = None, media_type_emulation: Optional[EmulatedMediaTypeOptions] = None, http_control: Optional[HttpOptions] = None, fail_on_console_exceptions: Optional[bool] = None, pdf_a_output: Optional[PdfAFormatOptions] = None, ) -> None: - pass + data = self._build_common_options_form_data( + page_size=page_size, + margins=margins, + prefer_css_page_size=prefer_css_page_size, + print_background=print_background, + omit_background=omit_background, + orientation=orientation, + scale=scale, + page_ranges=page_ranges, + render_control=render_control, + media_type_emulation=media_type_emulation, + http_control=http_control, + fail_on_console_exceptions=fail_on_console_exceptions, + pdf_a_output=pdf_a_output, + ) + data["url"] = url_to_convert + resp = self._client.post(url=self._URL_CONVERT_ENDPOINT, data=data, files=FORCE_MULTIPART) + resp.raise_for_status() + return resp def convert_html( self, @@ -143,8 +166,8 @@ def convert_html( http_control: Optional[HttpOptions] = None, fail_on_console_exceptions: Optional[bool] = None, pdf_a_output: Optional[PdfAFormatOptions] = None, - ) -> None: - self._build_common_options_form_data( + ) -> Response: + data = self._build_common_options_form_data( page_size=page_size, margins=margins, prefer_css_page_size=prefer_css_page_size, @@ -153,14 +176,42 @@ def convert_html( orientation=orientation, scale=scale, page_ranges=page_ranges, - header=header, - footer=footer, render_control=render_control, media_type_emulation=media_type_emulation, http_control=http_control, fail_on_console_exceptions=fail_on_console_exceptions, pdf_a_output=pdf_a_output, ) + # Open up all the file handles + files = {} + with ExitStack() as stack: + files.update({"index.html": ("index.html", stack.enter_context(index_file.open("rb")), "application/html")}) + if header is not None: + files.update( + {"header.html": ("header.html", stack.enter_context(header.open("rb")), "application/html")}, + ) + if footer is not None: + files.update( + {"footer.html": ("footer.html", stack.enter_context(footer.open("rb")), "application/html")}, + ) + if additional_files is not None: + for file in additional_files: + mime_type = guess_mime_type(file) + if mime_type is not None: + files.update({file.name: (file.name, stack.enter_context(file.open("rb")), mime_type)}) + else: + files.update( + { + file.name: ( + file.name, + stack.enter_context(file.open("rb")), + ), + }, + ) + + resp = self._client.post(url=self._HTML_CONVERT_ENDPOINT, data=data, files=files) + resp.raise_for_status() + return resp def convert_markdown( self, @@ -184,7 +235,55 @@ def convert_markdown( fail_on_console_exceptions: Optional[bool] = None, pdf_a_output: Optional[PdfAFormatOptions] = None, ) -> None: - pass + data = self._build_common_options_form_data( + page_size=page_size, + margins=margins, + prefer_css_page_size=prefer_css_page_size, + print_background=print_background, + omit_background=omit_background, + orientation=orientation, + scale=scale, + page_ranges=page_ranges, + render_control=render_control, + media_type_emulation=media_type_emulation, + http_control=http_control, + fail_on_console_exceptions=fail_on_console_exceptions, + pdf_a_output=pdf_a_output, + ) + # Open up all the file handles + files = {} + with ExitStack() as stack: + files.update({"index.html": ("index.html", stack.enter_context(index_file.open("rb")), "application/html")}) + if header is not None: + files.update( + {"header.html": ("header.html", stack.enter_context(header.open("rb")), "application/html")}, + ) + if footer is not None: + files.update( + {"footer.html": ("footer.html", stack.enter_context(footer.open("rb")), "application/html")}, + ) + # Including the markdown files + # Us the index of the file to ensure the ordering + for file in markdown_files: + files.update({file.name: (file.name, stack.enter_context(file.open("rb")), "text/markdown")}) + if additional_files is not None: + for file in additional_files: + mime_type = guess_mime_type(file) + if mime_type is not None: + files.update({file.name: (file.name, stack.enter_context(file.open("rb")), mime_type)}) + else: + files.update( + { + file.name: ( + file.name, + stack.enter_context(file.open("rb")), + ), + }, + ) + + resp = self._client.post(url=self._MARKDOWN_CONVERT_ENDPOINT, data=data, files=files) + resp.raise_for_status() + return resp @staticmethod def _build_common_options_form_data( @@ -197,8 +296,6 @@ def _build_common_options_form_data( orientation: Optional[PageOrientationOptions] = None, scale: Optional[int | float] = None, page_ranges: Optional[PageRangeType] = None, - header: Optional[Path] = None, - footer: Optional[Path] = None, render_control: Optional[RenderControl] = None, media_type_emulation: Optional[EmulatedMediaTypeOptions] = None, http_control: Optional[HttpOptions] = None, @@ -216,6 +313,7 @@ def _build_common_options_form_data( if orientation is not None: data.update(orientation.to_form()) data.update(optional_to_form(scale, "scale")) + data.update(optional_page_ranges_to_form(page_ranges, "nativePageRanges")) # TODO page ranges # TODO header & footer if render_control is not None: @@ -227,3 +325,4 @@ def _build_common_options_form_data( data.update(optional_to_form(fail_on_console_exceptions, "failOnConsoleExceptions")) if pdf_a_output is not None: data.update(pdf_a_output.to_form()) + return data diff --git a/src/gotenberg_client/convert/common.py b/src/gotenberg_client/convert/common.py index f08cc78..781a9f0 100644 --- a/src/gotenberg_client/convert/common.py +++ b/src/gotenberg_client/convert/common.py @@ -1,18 +1,30 @@ import enum from functools import lru_cache +from pathlib import Path from typing import Optional +# See https://github.com/psf/requests/issues/1081#issuecomment-428504128 +class ForceMultipartDict(dict): + def __bool__(self): + return True + + +FORCE_MULTIPART = ForceMultipartDict() + + @enum.unique class PageOrientationOptions(enum.Enum): Landscape = enum.auto() Potrait = enum.auto() def to_form(self) -> dict[str, str]: - if self.value == PageOrientationOptions.Landscape: + if self.value == PageOrientationOptions.Landscape.value: return {"landscape": "true"} - else: + elif self.value == PageOrientationOptions.Potrait.value: return {"landscape": "false"} + else: # pragma: no cover + raise NotImplementedError(self.value) @enum.unique @@ -21,6 +33,16 @@ class PdfAFormatOptions(enum.Enum): A2a = enum.auto() A3b = enum.auto() + def to_form(self) -> dict[str, str]: + if self.value == PdfAFormatOptions.A1a.value: + return {"pdfFormat": "PDF/A-1a"} + elif self.value == PdfAFormatOptions.A2a.value: + return {"pdfFormat": "PDF/A-2b"} + elif self.value == PdfAFormatOptions.A3b.value: + return {"pdfFormat": "PDF/A-3b"} + else: # pragma: no cover + raise NotImplementedError(self.value) + PageRangeType = list[list[int]] | None @@ -42,3 +64,22 @@ def optional_page_ranges_to_form(value: Optional[PageRangeType], name: str) -> d """ Converts a list of lists of pages into the formatted strings Gotenberg expects """ + if value is None: + return {} + else: + combined = [] + for range_value in value: + combined += range_value + combined.sort() + return {name: ",".join(combined)} + + +def guess_mime_type(url: Path) -> Optional[str]: + try: + import magic + + return magic.from_file(url, mime=True) + except ImportError: + import mimetypes + + mime_type, _ = mimetypes.guess_type(url) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..9117dc5 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,28 @@ +import logging +import os +from pathlib import Path +from typing import Final + +import pytest + +from gotenberg_client.client import GotenbergClient + +GOTENBERG_URL: Final[str] = os.getenv("GOTENBERG_URL", "http://localhost:3000") + +SAMPLE_DIR: Final[Path] = Path(__file__).parent.resolve() / "samples" +SAVE_DIR: Final[Path] = Path(__file__).parent.resolve() / "outputs" +SAVE_OUTPUTS: Final[bool] = "SAVE_TEST_OUTPUT" in os.environ +if SAVE_OUTPUTS: + SAVE_DIR.mkdir(exist_ok=True) + + +@pytest.fixture() +def client() -> GotenbergClient: + with GotenbergClient(gotenerg_url=GOTENBERG_URL, log_level=logging.INFO) as client: + yield client + + +@pytest.fixture() +def client_compressed() -> GotenbergClient: + with GotenbergClient(gotenerg_url=GOTENBERG_URL, log_level=logging.INFO, compress=True) as client: + yield client diff --git a/tests/samples/basic.html b/tests/samples/basic.html new file mode 100644 index 0000000..e9d7aed --- /dev/null +++ b/tests/samples/basic.html @@ -0,0 +1,10 @@ + + +
+ +++It is a press, certainly, but a press from which shall flow in inexhaustible streams...Through it, God will spread His Word. A spring of truth shall flow from it: like a new star it shall scatter the darkness of ignorance, and cause a light heretofore unknown to shine amongst men.
+ +
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
+of
+ + diff --git a/tests/samples/header.html b/tests/samples/header.html new file mode 100644 index 0000000..73bfb4b --- /dev/null +++ b/tests/samples/header.html @@ -0,0 +1,13 @@ + + + + + + + + diff --git a/tests/samples/img.gif b/tests/samples/img.gif new file mode 100644 index 0000000..6b066b5 Binary files /dev/null and b/tests/samples/img.gif differ diff --git a/tests/samples/markdown1.md b/tests/samples/markdown1.md new file mode 100644 index 0000000..8bad5bd --- /dev/null +++ b/tests/samples/markdown1.md @@ -0,0 +1,3 @@ +## This paragraph uses the default font and has been generated from a markdown file + +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. diff --git a/tests/samples/markdown2.md b/tests/samples/markdown2.md new file mode 100644 index 0000000..bafeff0 --- /dev/null +++ b/tests/samples/markdown2.md @@ -0,0 +1,3 @@ +## This paragraph uses a local font and has been generated from a markdown file + +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. diff --git a/tests/samples/markdown_index.html b/tests/samples/markdown_index.html new file mode 100644 index 0000000..ef20b74 --- /dev/null +++ b/tests/samples/markdown_index.html @@ -0,0 +1,31 @@ + + + + + + +++It is a press, certainly, but a press from which shall flow in inexhaustible streams...Through it, God will spread His Word. A spring of truth shall flow from it: like a new star it shall scatter the darkness of ignorance, and cause a light heretofore unknown to shine amongst men.
+ +