From 4ac9c5fa7e10519848cade2bbb0ad9a22f1d10b7 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Tue, 20 Feb 2024 13:51:55 +0100 Subject: [PATCH] replace token api str with Secret --- integrations/unstructured/pyproject.toml | 8 +++----- .../converters/unstructured/converter.py | 9 +++++---- integrations/unstructured/tests/conftest.py | 13 +++++++++++++ .../unstructured/tests/test_converter.py | 19 ++++++++----------- 4 files changed, 29 insertions(+), 20 deletions(-) create mode 100644 integrations/unstructured/tests/conftest.py diff --git a/integrations/unstructured/pyproject.toml b/integrations/unstructured/pyproject.toml index 7366a8adf..298fdb993 100644 --- a/integrations/unstructured/pyproject.toml +++ b/integrations/unstructured/pyproject.toml @@ -156,15 +156,13 @@ ban-relative-imports = "parents" "tests/**/*" = ["PLR2004", "S101", "TID252"] [tool.coverage.run] -source_pkgs = ["src", "tests"] +source = ["haystack_integrations"] branch = true parallel = true -[tool.coverage.paths] -unstructured_fileconverter_haystack = ["src/haystack_integrations", "*/unstructured-fileconverter-haystack/src"] -tests = ["tests", "*/unstructured-fileconverter-haystack/tests"] - [tool.coverage.report] +omit = ["*/tests/*", "*/__init__.py"] +show_missing=true exclude_lines = [ "no cov", "if __name__ == .__main__.:", diff --git a/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py b/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py index 54cbd5559..4be53a5b9 100644 --- a/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py +++ b/integrations/unstructured/src/haystack_integrations/components/converters/unstructured/converter.py @@ -10,6 +10,7 @@ from haystack import Document, component, default_to_dict from haystack.components.converters.utils import normalize_metadata +from haystack.utils import Secret from tqdm import tqdm from unstructured.documents.elements import Element # type: ignore[import] @@ -29,7 +30,7 @@ class UnstructuredFileConverter: def __init__( self, api_url: str = UNSTRUCTURED_HOSTED_API_URL, - api_key: Optional[str] = None, + api_key: Optional[Secret] = Secret.from_env_var("UNSTRUCTURED_API_KEY"), # noqa: B008 document_creation_mode: Literal[ "one-doc-per-file", "one-doc-per-page", "one-doc-per-element" ] = "one-doc-per-file", @@ -64,12 +65,11 @@ def __init__( is_hosted_api = api_url == UNSTRUCTURED_HOSTED_API_URL - api_key = api_key or os.environ.get("UNSTRUCTURED_API_KEY") # we check whether api_key is None or an empty string if is_hosted_api and not api_key: msg = ( "To use the hosted version of Unstructured, you need to set the environment variable " - "UNSTRUCTURED_API_KEY (recommended) or explictly pass the parameter api_key." + "UNSTRUCTURED_API_KEY (recommended) or explicitly pass the parameter api_key." ) raise ValueError(msg) @@ -84,6 +84,7 @@ def to_dict(self) -> Dict[str, Any]: return default_to_dict( self, api_url=self.api_url, + api_key=self.api_key.to_dict() if self.api_key else None, document_creation_mode=self.document_creation_mode, separator=self.separator, unstructured_kwargs=self.unstructured_kwargs, @@ -140,8 +141,8 @@ def run( documents.extend(docs_for_file) return {"documents": documents} + @staticmethod def _create_documents( - self, filepath: Path, elements: List[Element], document_creation_mode: Literal["one-doc-per-file", "one-doc-per-page", "one-doc-per-element"], diff --git a/integrations/unstructured/tests/conftest.py b/integrations/unstructured/tests/conftest.py new file mode 100644 index 000000000..fa02cc5dd --- /dev/null +++ b/integrations/unstructured/tests/conftest.py @@ -0,0 +1,13 @@ +from pathlib import Path + +import pytest + + +@pytest.fixture +def set_env_variables(monkeypatch): + monkeypatch.setenv("UNSTRUCTURED_API_KEY", "test-api-key") + + +@pytest.fixture +def samples_path(): + return Path(__file__).parent / "samples" diff --git a/integrations/unstructured/tests/test_converter.py b/integrations/unstructured/tests/test_converter.py index e03e2e58e..9e8de0aa0 100644 --- a/integrations/unstructured/tests/test_converter.py +++ b/integrations/unstructured/tests/test_converter.py @@ -1,22 +1,17 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from pathlib import Path - import pytest +from haystack.utils import Secret from haystack_integrations.components.converters.unstructured import UnstructuredFileConverter -@pytest.fixture -def samples_path(): - return Path(__file__).parent / "samples" - - class TestUnstructuredFileConverter: + @pytest.mark.usefixtures("set_env_variables") def test_init_default(self): - converter = UnstructuredFileConverter(api_key="test-api-key") + converter = UnstructuredFileConverter() assert converter.api_url == "https://api.unstructured.io/general/v0/general" - assert converter.api_key == "test-api-key" + assert isinstance(converter.api_key, Secret) assert converter.document_creation_mode == "one-doc-per-file" assert converter.separator == "\n\n" assert converter.unstructured_kwargs == {} @@ -31,20 +26,22 @@ def test_init_with_parameters(self): progress_bar=False, ) assert converter.api_url == "http://custom-url:8000/general" - assert converter.api_key is None + assert isinstance(converter.api_key, Secret) assert converter.document_creation_mode == "one-doc-per-element" assert converter.separator == "|" assert converter.unstructured_kwargs == {"foo": "bar"} assert not converter.progress_bar + @pytest.mark.usefixtures("set_env_variables") def test_to_dict(self): - converter = UnstructuredFileConverter(api_key="test-api-key") + converter = UnstructuredFileConverter() converter_dict = converter.to_dict() assert converter_dict == { "type": "haystack_integrations.components.converters.unstructured.converter.UnstructuredFileConverter", "init_parameters": { "api_url": "https://api.unstructured.io/general/v0/general", + "api_key": {"env_vars": ["UNSTRUCTURED_API_KEY"], "strict": True, "type": "env_var"}, "document_creation_mode": "one-doc-per-file", "separator": "\n\n", "unstructured_kwargs": {},