Merge pull request #48 from deepset-ai/better-exceptions-handling

Better exceptions handling during pipeline deployment
deepset-ai · Dec 17, 2024 · ad6837b · ad6837b
2 parents 1d96419 + 2b48388
commit ad6837b
Show file tree

Hide file tree

Showing 15 changed files with 197 additions and 9 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -47,6 +47,7 @@ source = "vcs"
 dependencies = [
   "coverage[toml]>=6.5",
   "pytest",
+  "pytest-mock",
 ]
 [tool.hatch.envs.default.scripts]
 test = "pytest {args:tests}"

diff --git a/src/hayhooks/server/app.py b/src/hayhooks/server/app.py
@@ -8,7 +8,7 @@
 logger = logging.getLogger("uvicorn.info")
 
 
-def create_app():
+def create_app() -> FastAPI:
     if root_path := os.environ.get("HAYHOOKS_ROOT_PATH"):
         app = FastAPI(root_path=root_path)
     else:

diff --git a/src/hayhooks/server/utils/deploy_utils.py b/src/hayhooks/server/utils/deploy_utils.py
@@ -16,6 +16,8 @@ def deploy_pipeline_def(app, pipeline_def: PipelineDefinition):
         pipe = registry.add(pipeline_def.name, pipeline_def.source_code)
     except ValueError as e:
         raise HTTPException(status_code=409, detail=f"{e}") from e
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"{e}") from e
 
     PipelineRunRequest = get_request_model(pipeline_def.name, pipe.inputs())
     PipelineRunResponse = get_response_model(pipeline_def.name, pipe.outputs())

diff --git a/tests/test_files/broken_rag_pipeline.yml b/tests/test_files/broken_rag_pipeline.yml
@@ -0,0 +1,26 @@
+components:
+  llm:
+    init_parameters:
+      api_base_url: null
+      api_key:
+        env_vars:
+        # Missing OPENAI_API_KEY - should raise an exception
+        strict: true
+        type: env_var
+      generation_kwargs: {}
+      model: gpt-4o-mini
+      organization: null
+      streaming_callback: null
+      system_prompt: null
+    type: haystack.components.generators.openai.OpenAIGenerator
+  prompt_builder:
+    init_parameters:
+      required_variables: null
+      template: "{{ question }}"
+      variables: null
+    type: haystack.components.builders.prompt_builder.PromptBuilder
+connections:
+- receiver: llm.prompt
+  sender: prompt_builder.prompt
+max_runs_per_component: 100
+metadata: {}
diff --git a/tests/test_files/basic_rag_pipeline.yml → .../working_pipelines/basic_rag_pipeline.yml b/tests/test_files/basic_rag_pipeline.yml → .../working_pipelines/basic_rag_pipeline.yml
diff --git a/tests/test_files/chat_with_website.yml → ...s/working_pipelines/chat_with_website.yml b/tests/test_files/chat_with_website.yml → ...s/working_pipelines/chat_with_website.yml
diff --git a/tests/test_files/working_pipelines/minimal_retriever.yml b/tests/test_files/working_pipelines/minimal_retriever.yml
@@ -0,0 +1,79 @@
+components:
+  document_embedder:
+    init_parameters:
+      batch_size: 32
+      config_kwargs: null
+      device:
+        device: cpu
+        type: single
+      model: sentence-transformers/paraphrase-MiniLM-L3-v2
+      model_kwargs: null
+      normalize_embeddings: false
+      precision: float32
+      prefix: ""
+      progress_bar: true
+      suffix: ""
+      token:
+        env_vars:
+          - HF_API_TOKEN
+          - HF_TOKEN
+        strict: false
+        type: env_var
+      tokenizer_kwargs: null
+      truncate_dim: null
+      trust_remote_code: false
+    type: haystack.components.embedders.sentence_transformers_text_embedder.SentenceTransformersTextEmbedder
+  document_retriever:
+    init_parameters:
+      document_store:
+        init_parameters:
+          api_key: null
+          embedding_dim: 384
+          force_disable_check_same_thread: false
+          grpc_port: 6334
+          hnsw_config: null
+          host: null
+          https: null
+          index: Document
+          init_from: null
+          location: null
+          metadata: {}
+          on_disk: false
+          on_disk_payload: null
+          optimizers_config: null
+          path: null
+          payload_fields_to_index: null
+          port: 6333
+          prefer_grpc: false
+          prefix: null
+          progress_bar: true
+          quantization_config: null
+          recreate_index: false
+          replication_factor: null
+          return_embedding: false
+          scroll_size: 10000
+          shard_number: null
+          similarity: cosine
+          sparse_idf: false
+          timeout: null
+          url: http://localhost:6333
+          use_sparse_embeddings: false
+          wait_result_from_api: true
+          wal_config: null
+          write_batch_size: 100
+          write_consistency_factor: null
+        type: haystack_integrations.document_stores.qdrant.document_store.QdrantDocumentStore
+      filter_policy: replace
+      filters: null
+      group_by: null
+      group_size: null
+      return_embedding: false
+      scale_score: false
+      score_threshold: null
+      top_k: 3
+    type: haystack_integrations.components.retrievers.qdrant.retriever.QdrantEmbeddingRetriever
+connections:
+  - receiver: document_retriever.query_embedding
+    sender: document_embedder.embedding
+max_runs_per_component: 100
+metadata: {}
diff --git a/tests/test_files/pipeline_qdrant.yml → ...les/working_pipelines/pipeline_qdrant.yml b/tests/test_files/pipeline_qdrant.yml → ...les/working_pipelines/pipeline_qdrant.yml
diff --git a/tests/test_files/pipeline_qdrant_2.yml → ...s/working_pipelines/pipeline_qdrant_2.yml b/tests/test_files/pipeline_qdrant_2.yml → ...s/working_pipelines/pipeline_qdrant_2.yml
diff --git a/tests/test_files/st_retriever.yml → ..._files/working_pipelines/st_retriever.yml b/tests/test_files/st_retriever.yml → ..._files/working_pipelines/st_retriever.yml
diff --git a/tests/test_files/test_pipeline_01.yml → ...es/working_pipelines/test_pipeline_01.yml b/tests/test_files/test_pipeline_01.yml → ...es/working_pipelines/test_pipeline_01.yml
diff --git a/tests/test_files/test_pipeline_02.yml → ...es/working_pipelines/test_pipeline_02.yml b/tests/test_files/test_pipeline_02.yml → ...es/working_pipelines/test_pipeline_02.yml
diff --git a/tests/test_it_deploy.py b/tests/test_it_deploy.py
@@ -6,19 +6,19 @@
 client = TestClient(app)
 
 # Load pipeline definitions from test_files
-test_files = Path(__file__).parent / "test_files"
-pipeline_names = [file.stem for file in test_files.glob("*.yml")]
+test_files = Path(__file__).parent / "test_files" / "working_pipelines"
+pipeline_data = [{"name": file.stem, "source_code": file.read_text()} for file in test_files.glob("*.yml")]
 
 
-@pytest.mark.parametrize("pipeline_name", pipeline_names)
-def test_deploy_pipeline_def(pipeline_name: str):
-    pipeline_def = (Path(__file__).parent / "test_files" / f"{pipeline_name}.yml").read_text()
-
-    deploy_response = client.post("/deploy", json={"name": pipeline_name, "source_code": pipeline_def})
+@pytest.mark.parametrize("pipeline_data", pipeline_data)
+def test_deploy_pipeline_def(pipeline_data: dict):
+    deploy_response = client.post(
+        "/deploy", json={"name": pipeline_data["name"], "source_code": pipeline_data["source_code"]}
+    )
     assert deploy_response.status_code == 200
 
     status_response = client.get("/status")
-    assert pipeline_name in status_response.json()["pipelines"]
+    assert pipeline_data["name"] in status_response.json()["pipelines"]
 
     docs_response = client.get("/docs")
     assert docs_response.status_code == 200
diff --git a/tests/test_it_handling_deploy_exceptions.py b/tests/test_it_handling_deploy_exceptions.py
@@ -0,0 +1,14 @@
+from fastapi.testclient import TestClient
+from hayhooks.server import app
+from pathlib import Path
+
+client = TestClient(app)
+
+
+def test_gracefully_handle_deploy_exception():
+    pipeline_name = "broken_rag_pipeline"
+    pipeline_def = (Path(__file__).parent / "test_files" / "broken_rag_pipeline.yml").read_text()
+
+    deploy_response = client.post("/deploy", json={"name": pipeline_name, "source_code": pipeline_def})
+    assert deploy_response.status_code == 500
+    assert "Couldn't deserialize component 'llm'" in deploy_response.json()["detail"]
diff --git a/tests/test_registry.py b/tests/test_registry.py
@@ -0,0 +1,66 @@
+import pytest
+from pathlib import Path
+from haystack import Pipeline
+from haystack.core.errors import PipelineError
+
+from hayhooks.server.pipelines.registry import _PipelineRegistry
+
+
+@pytest.fixture
+def pipeline_registry():
+    return _PipelineRegistry()
+
+
+@pytest.fixture
+def sample_pipeline_yaml():
+    return (Path(__file__).parent / "test_files" / "working_pipelines" / "basic_rag_pipeline.yml").read_text()
+
+
+def test_add_pipeline(pipeline_registry, sample_pipeline_yaml):
+    result = pipeline_registry.add("test_pipeline", sample_pipeline_yaml)
+
+    expected_pipeline = Pipeline.loads(sample_pipeline_yaml)
+    assert result == expected_pipeline
+    assert pipeline_registry.get("test_pipeline") == expected_pipeline
+
+
+def test_add_duplicate_pipeline(pipeline_registry, sample_pipeline_yaml):
+    result = pipeline_registry.add("test_pipeline", sample_pipeline_yaml)
+
+    expected_pipeline = Pipeline.loads(sample_pipeline_yaml)
+    assert result == expected_pipeline
+
+    with pytest.raises(ValueError, match="A pipeline with name test_pipeline is already in the registry"):
+        pipeline_registry.add("test_pipeline", sample_pipeline_yaml)
+
+
+def test_add_invalid_pipeline(pipeline_registry, mocker):
+    mocker.patch('haystack.Pipeline.loads', side_effect=PipelineError("Invalid pipeline"))
+
+    with pytest.raises(ValueError, match="Unable to parse Haystack Pipeline test_pipeline"):
+        pipeline_registry.add("test_pipeline", "invalid yaml")
+
+
+def test_remove_pipeline(pipeline_registry, sample_pipeline_yaml):
+    pipeline_registry.add("test_pipeline", sample_pipeline_yaml)
+    pipeline_registry.remove("test_pipeline")
+
+    assert pipeline_registry.get("test_pipeline") is None
+
+
+def test_remove_nonexistent_pipeline(pipeline_registry):
+    pipeline_registry.remove("nonexistent_pipeline")
+
+
+def test_get_nonexistent_pipeline(pipeline_registry):
+    assert pipeline_registry.get("nonexistent_pipeline") is None
+
+
+def test_get_names(pipeline_registry, sample_pipeline_yaml, mocker):
+    mocker.patch('haystack.Pipeline.loads', return_value=mocker.Mock(spec=Pipeline))
+
+    pipeline_registry.add("pipeline1", sample_pipeline_yaml)
+    pipeline_registry.add("pipeline2", sample_pipeline_yaml)
+
+    names = pipeline_registry.get_names()
+    assert sorted(names) == ["pipeline1", "pipeline2"]