Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace token api str with Secret #449

Merged
merged 10 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions integrations/unstructured/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,13 @@ ban-relative-imports = "parents"
"tests/**/*" = ["PLR2004", "S101", "TID252"]

[tool.coverage.run]
source_pkgs = ["src", "tests"]
source = ["haystack_integrations"]
branch = true
parallel = true

[tool.coverage.paths]
unstructured_fileconverter_haystack = ["src/haystack_integrations", "*/unstructured-fileconverter-haystack/src"]
tests = ["tests", "*/unstructured-fileconverter-haystack/tests"]

[tool.coverage.report]
omit = ["*/tests/*", "*/__init__.py"]
show_missing=true
exclude_lines = [
"no cov",
"if __name__ == .__main__.:",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from haystack import Document, component, default_to_dict
from haystack.components.converters.utils import normalize_metadata
from haystack.utils import Secret
from tqdm import tqdm

from unstructured.documents.elements import Element # type: ignore[import]
Expand All @@ -29,7 +30,7 @@ class UnstructuredFileConverter:
def __init__(
self,
api_url: str = UNSTRUCTURED_HOSTED_API_URL,
api_key: Optional[str] = None,
api_key: Optional[Secret] = Secret.from_env_var("UNSTRUCTURED_API_KEY"), # noqa: B008
davidsbatista marked this conversation as resolved.
Show resolved Hide resolved
document_creation_mode: Literal[
"one-doc-per-file", "one-doc-per-page", "one-doc-per-element"
] = "one-doc-per-file",
Expand Down Expand Up @@ -64,12 +65,11 @@ def __init__(

is_hosted_api = api_url == UNSTRUCTURED_HOSTED_API_URL

api_key = api_key or os.environ.get("UNSTRUCTURED_API_KEY")
# we check whether api_key is None or an empty string
if is_hosted_api and not api_key:
davidsbatista marked this conversation as resolved.
Show resolved Hide resolved
msg = (
"To use the hosted version of Unstructured, you need to set the environment variable "
"UNSTRUCTURED_API_KEY (recommended) or explictly pass the parameter api_key."
"UNSTRUCTURED_API_KEY (recommended) or explicitly pass the parameter api_key."
)
raise ValueError(msg)

Expand All @@ -84,6 +84,7 @@ def to_dict(self) -> Dict[str, Any]:
return default_to_dict(
self,
api_url=self.api_url,
api_key=self.api_key.to_dict() if self.api_key else None,
document_creation_mode=self.document_creation_mode,
separator=self.separator,
unstructured_kwargs=self.unstructured_kwargs,
Expand Down Expand Up @@ -140,8 +141,8 @@ def run(
documents.extend(docs_for_file)
return {"documents": documents}

@staticmethod
def _create_documents(
self,
filepath: Path,
elements: List[Element],
document_creation_mode: Literal["one-doc-per-file", "one-doc-per-page", "one-doc-per-element"],
Expand Down
13 changes: 13 additions & 0 deletions integrations/unstructured/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from pathlib import Path

import pytest


@pytest.fixture
def set_env_variables(monkeypatch):
monkeypatch.setenv("UNSTRUCTURED_API_KEY", "test-api-key")


@pytest.fixture
def samples_path():
return Path(__file__).parent / "samples"
19 changes: 8 additions & 11 deletions integrations/unstructured/tests/test_converter.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0
from pathlib import Path

import pytest
from haystack.utils import Secret
from haystack_integrations.components.converters.unstructured import UnstructuredFileConverter


@pytest.fixture
def samples_path():
return Path(__file__).parent / "samples"


class TestUnstructuredFileConverter:
@pytest.mark.usefixtures("set_env_variables")
def test_init_default(self):
converter = UnstructuredFileConverter(api_key="test-api-key")
converter = UnstructuredFileConverter()
assert converter.api_url == "https://api.unstructured.io/general/v0/general"
assert converter.api_key == "test-api-key"
assert isinstance(converter.api_key, Secret)
assert converter.document_creation_mode == "one-doc-per-file"
assert converter.separator == "\n\n"
assert converter.unstructured_kwargs == {}
Expand All @@ -31,20 +26,22 @@ def test_init_with_parameters(self):
progress_bar=False,
)
assert converter.api_url == "http://custom-url:8000/general"
assert converter.api_key is None
assert isinstance(converter.api_key, Secret)
assert converter.document_creation_mode == "one-doc-per-element"
assert converter.separator == "|"
assert converter.unstructured_kwargs == {"foo": "bar"}
assert not converter.progress_bar

@pytest.mark.usefixtures("set_env_variables")
def test_to_dict(self):
converter = UnstructuredFileConverter(api_key="test-api-key")
converter = UnstructuredFileConverter()
converter_dict = converter.to_dict()

assert converter_dict == {
"type": "haystack_integrations.components.converters.unstructured.converter.UnstructuredFileConverter",
"init_parameters": {
"api_url": "https://api.unstructured.io/general/v0/general",
"api_key": {"env_vars": ["UNSTRUCTURED_API_KEY"], "strict": True, "type": "env_var"},
"document_creation_mode": "one-doc-per-file",
"separator": "\n\n",
"unstructured_kwargs": {},
Expand Down