Skip to content

Commit

Permalink
replace token api str with Secret
Browse files Browse the repository at this point in the history
  • Loading branch information
davidsbatista committed Feb 20, 2024
1 parent 5bee1e9 commit 4ac9c5f
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 20 deletions.
8 changes: 3 additions & 5 deletions integrations/unstructured/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,13 @@ ban-relative-imports = "parents"
"tests/**/*" = ["PLR2004", "S101", "TID252"]

[tool.coverage.run]
source_pkgs = ["src", "tests"]
source = ["haystack_integrations"]
branch = true
parallel = true

[tool.coverage.paths]
unstructured_fileconverter_haystack = ["src/haystack_integrations", "*/unstructured-fileconverter-haystack/src"]
tests = ["tests", "*/unstructured-fileconverter-haystack/tests"]

[tool.coverage.report]
omit = ["*/tests/*", "*/__init__.py"]
show_missing=true
exclude_lines = [
"no cov",
"if __name__ == .__main__.:",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from haystack import Document, component, default_to_dict
from haystack.components.converters.utils import normalize_metadata
from haystack.utils import Secret
from tqdm import tqdm

from unstructured.documents.elements import Element # type: ignore[import]
Expand All @@ -29,7 +30,7 @@ class UnstructuredFileConverter:
def __init__(
self,
api_url: str = UNSTRUCTURED_HOSTED_API_URL,
api_key: Optional[str] = None,
api_key: Optional[Secret] = Secret.from_env_var("UNSTRUCTURED_API_KEY"), # noqa: B008
document_creation_mode: Literal[
"one-doc-per-file", "one-doc-per-page", "one-doc-per-element"
] = "one-doc-per-file",
Expand Down Expand Up @@ -64,12 +65,11 @@ def __init__(

is_hosted_api = api_url == UNSTRUCTURED_HOSTED_API_URL

api_key = api_key or os.environ.get("UNSTRUCTURED_API_KEY")
# we check whether api_key is None or an empty string
if is_hosted_api and not api_key:
msg = (
"To use the hosted version of Unstructured, you need to set the environment variable "
"UNSTRUCTURED_API_KEY (recommended) or explictly pass the parameter api_key."
"UNSTRUCTURED_API_KEY (recommended) or explicitly pass the parameter api_key."
)
raise ValueError(msg)

Expand All @@ -84,6 +84,7 @@ def to_dict(self) -> Dict[str, Any]:
return default_to_dict(
self,
api_url=self.api_url,
api_key=self.api_key.to_dict() if self.api_key else None,
document_creation_mode=self.document_creation_mode,
separator=self.separator,
unstructured_kwargs=self.unstructured_kwargs,
Expand Down Expand Up @@ -140,8 +141,8 @@ def run(
documents.extend(docs_for_file)
return {"documents": documents}

@staticmethod
def _create_documents(
self,
filepath: Path,
elements: List[Element],
document_creation_mode: Literal["one-doc-per-file", "one-doc-per-page", "one-doc-per-element"],
Expand Down
13 changes: 13 additions & 0 deletions integrations/unstructured/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from pathlib import Path

import pytest


@pytest.fixture
def set_env_variables(monkeypatch):
monkeypatch.setenv("UNSTRUCTURED_API_KEY", "test-api-key")


@pytest.fixture
def samples_path():
return Path(__file__).parent / "samples"
19 changes: 8 additions & 11 deletions integrations/unstructured/tests/test_converter.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0
from pathlib import Path

import pytest
from haystack.utils import Secret
from haystack_integrations.components.converters.unstructured import UnstructuredFileConverter


@pytest.fixture
def samples_path():
return Path(__file__).parent / "samples"


class TestUnstructuredFileConverter:
@pytest.mark.usefixtures("set_env_variables")
def test_init_default(self):
converter = UnstructuredFileConverter(api_key="test-api-key")
converter = UnstructuredFileConverter()
assert converter.api_url == "https://api.unstructured.io/general/v0/general"
assert converter.api_key == "test-api-key"
assert isinstance(converter.api_key, Secret)
assert converter.document_creation_mode == "one-doc-per-file"
assert converter.separator == "\n\n"
assert converter.unstructured_kwargs == {}
Expand All @@ -31,20 +26,22 @@ def test_init_with_parameters(self):
progress_bar=False,
)
assert converter.api_url == "http://custom-url:8000/general"
assert converter.api_key is None
assert isinstance(converter.api_key, Secret)
assert converter.document_creation_mode == "one-doc-per-element"
assert converter.separator == "|"
assert converter.unstructured_kwargs == {"foo": "bar"}
assert not converter.progress_bar

@pytest.mark.usefixtures("set_env_variables")
def test_to_dict(self):
converter = UnstructuredFileConverter(api_key="test-api-key")
converter = UnstructuredFileConverter()
converter_dict = converter.to_dict()

assert converter_dict == {
"type": "haystack_integrations.components.converters.unstructured.converter.UnstructuredFileConverter",
"init_parameters": {
"api_url": "https://api.unstructured.io/general/v0/general",
"api_key": {"env_vars": ["UNSTRUCTURED_API_KEY"], "strict": True, "type": "env_var"},
"document_creation_mode": "one-doc-per-file",
"separator": "\n\n",
"unstructured_kwargs": {},
Expand Down

0 comments on commit 4ac9c5f

Please sign in to comment.