diff --git a/integrations/pinecone/examples/example.py b/integrations/pinecone/examples/example.py index a10b951b5..71d289ef6 100644 --- a/integrations/pinecone/examples/example.py +++ b/integrations/pinecone/examples/example.py @@ -15,6 +15,7 @@ from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder from haystack.components.preprocessors import DocumentSplitter from haystack.components.writers import DocumentWriter +from haystack.utils import Secret from haystack_integrations.components.retrievers.pinecone import PineconeEmbeddingRetriever from haystack_integrations.document_stores.pinecone import PineconeDocumentStore @@ -22,7 +23,11 @@ file_paths = glob.glob("neural-search-pills/pills/*.md") document_store = PineconeDocumentStore( - api_key="YOUR-PINECONE-API-KEY", environment="gcp-starter", index="default", namespace="default", dimension=768 + api_key=Secret.from_token("YOUR-PINECONE-API-KEY"), + environment="gcp-starter", + index="default", + namespace="default", + dimension=768, ) indexing = Pipeline() diff --git a/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py index 92ea987b4..91364d7bf 100644 --- a/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py +++ b/integrations/pinecone/src/haystack_integrations/document_stores/pinecone/document_store.py @@ -3,14 +3,14 @@ # SPDX-License-Identifier: Apache-2.0 import io import logging -import os from copy import copy from typing import Any, Dict, List, Optional import pandas as pd -from haystack import default_to_dict +from haystack import default_from_dict, default_to_dict from haystack.dataclasses import Document from haystack.document_stores.types import DuplicatePolicy +from haystack.utils import Secret, deserialize_secrets_inplace from haystack.utils.filters import convert import pinecone @@ -29,7 +29,7 @@ class PineconeDocumentStore: def __init__( self, *, - api_key: Optional[str] = None, + api_key: Secret = Secret.from_env_var("PINECONE_API_KEY"), # noqa: B008 environment: str = "us-west1-gcp", index: str = "default", namespace: str = "default", @@ -58,15 +58,16 @@ def __init__( [API reference](https://docs.pinecone.io/reference/create_index-1). """ - api_key = api_key or os.environ.get("PINECONE_API_KEY") - if not api_key: + resolved_api_key = api_key.resolve_value() + if resolved_api_key is None: msg = ( "PineconeDocumentStore expects an API key. " "Set the PINECONE_API_KEY environment variable (recommended) or pass it explicitly." ) raise ValueError(msg) + self.api_key = api_key - pinecone.init(api_key=api_key, environment=environment) + pinecone.init(api_key=resolved_api_key, environment=environment) if index not in pinecone.list_indexes(): logger.info(f"Index {index} does not exist. Creating a new index.") @@ -92,9 +93,15 @@ def __init__( self.batch_size = batch_size self.index_creation_kwargs = index_creation_kwargs + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "PineconeDocumentStore": + deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) + return default_from_dict(cls, data) + def to_dict(self) -> Dict[str, Any]: return default_to_dict( self, + api_key=self.api_key.to_dict(), environment=self.environment, index=self.index, dimension=self.dimension, diff --git a/integrations/pinecone/tests/conftest.py b/integrations/pinecone/tests/conftest.py index c7a1342d5..872b38a07 100644 --- a/integrations/pinecone/tests/conftest.py +++ b/integrations/pinecone/tests/conftest.py @@ -2,6 +2,7 @@ import pytest from haystack.document_stores.types import DuplicatePolicy +from pinecone.core.client.exceptions import NotFoundException from haystack_integrations.document_stores.pinecone import PineconeDocumentStore @@ -51,4 +52,7 @@ def delete_documents_and_wait(filters): store.delete_documents = delete_documents_and_wait yield store - store._index.delete(delete_all=True, namespace=namespace) + try: + store._index.delete(delete_all=True, namespace=namespace) + except NotFoundException: + pass diff --git a/integrations/pinecone/tests/test_document_store.py b/integrations/pinecone/tests/test_document_store.py index cd1bb0db3..d8e8ace35 100644 --- a/integrations/pinecone/tests/test_document_store.py +++ b/integrations/pinecone/tests/test_document_store.py @@ -4,6 +4,7 @@ import pytest from haystack import Document from haystack.testing.document_store import CountDocumentsTest, DeleteDocumentsTest, WriteDocumentsTest +from haystack.utils import Secret from haystack_integrations.document_stores.pinecone import PineconeDocumentStore @@ -13,7 +14,7 @@ def test_init(mock_pinecone): mock_pinecone.Index.return_value.describe_index_stats.return_value = {"dimension": 30} document_store = PineconeDocumentStore( - api_key="fake-api-key", + api_key=Secret.from_token("fake-api-key"), environment="gcp-starter", index="my_index", namespace="test", @@ -34,7 +35,7 @@ def test_init(mock_pinecone): @patch("haystack_integrations.document_stores.pinecone.document_store.pinecone") def test_init_api_key_in_environment_variable(mock_pinecone, monkeypatch): - monkeypatch.setenv("PINECONE_API_KEY", "fake-api-key") + monkeypatch.setenv("PINECONE_API_KEY", "env-api-key") PineconeDocumentStore( environment="gcp-starter", @@ -45,14 +46,14 @@ def test_init_api_key_in_environment_variable(mock_pinecone, monkeypatch): metric="euclidean", ) - mock_pinecone.init.assert_called_with(api_key="fake-api-key", environment="gcp-starter") + mock_pinecone.init.assert_called_with(api_key="env-api-key", environment="gcp-starter") @patch("haystack_integrations.document_stores.pinecone.document_store.pinecone") -def test_to_dict(mock_pinecone): +def test_to_dict(mock_pinecone, monkeypatch): mock_pinecone.Index.return_value.describe_index_stats.return_value = {"dimension": 30} + monkeypatch.setenv("PINECONE_API_KEY", "env-api-key") document_store = PineconeDocumentStore( - api_key="fake-api-key", environment="gcp-starter", index="my_index", namespace="test", @@ -63,6 +64,13 @@ def test_to_dict(mock_pinecone): assert document_store.to_dict() == { "type": "haystack_integrations.document_stores.pinecone.document_store.PineconeDocumentStore", "init_parameters": { + "api_key": { + "env_vars": [ + "PINECONE_API_KEY", + ], + "strict": True, + "type": "env_var", + }, "environment": "gcp-starter", "index": "my_index", "dimension": 30, @@ -85,12 +93,13 @@ def test_write_documents_duplicate_fail(self, document_store: PineconeDocumentSt @pytest.mark.skip(reason="Pinecone only supports UPSERT operations") def test_write_documents_duplicate_skip(self, document_store: PineconeDocumentStore): ... + @pytest.mark.skip(reason="Pinecone creates a namespace only when the first document is written") + def test_delete_documents_empty_document_store(self, document_store: PineconeDocumentStore): ... + def test_init_fails_wo_api_key(self, monkeypatch): - api_key = None monkeypatch.delenv("PINECONE_API_KEY", raising=False) with pytest.raises(ValueError): PineconeDocumentStore( - api_key=api_key, environment="gcp-starter", index="my_index", ) diff --git a/integrations/pinecone/tests/test_emebedding_retriever.py b/integrations/pinecone/tests/test_emebedding_retriever.py index d2d3c8546..9dc1c9760 100644 --- a/integrations/pinecone/tests/test_emebedding_retriever.py +++ b/integrations/pinecone/tests/test_emebedding_retriever.py @@ -18,10 +18,10 @@ def test_init_default(): @patch("haystack_integrations.document_stores.pinecone.document_store.pinecone") -def test_to_dict(mock_pinecone): +def test_to_dict(mock_pinecone, monkeypatch): + monkeypatch.setenv("PINECONE_API_KEY", "env-api-key") mock_pinecone.Index.return_value.describe_index_stats.return_value = {"dimension": 512} document_store = PineconeDocumentStore( - api_key="test-key", environment="gcp-starter", index="default", namespace="test-namespace", @@ -35,6 +35,13 @@ def test_to_dict(mock_pinecone): "init_parameters": { "document_store": { "init_parameters": { + "api_key": { + "env_vars": [ + "PINECONE_API_KEY", + ], + "strict": True, + "type": "env_var", + }, "environment": "gcp-starter", "index": "default", "namespace": "test-namespace",