stackitcloud
diff --git a/‎libs/README.md‎
Lines changed: 7 additions & 5 deletions b/‎libs/README.md‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎libs/admin-api-lib/src/admin_api_lib/impl/file_services/s3_service.py‎
Lines changed: 3 additions & 4 deletions b/‎libs/admin-api-lib/src/admin_api_lib/impl/file_services/s3_service.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/pdf_extractor.py‎
Lines changed: 12 additions & 12 deletions b/‎libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/pdf_extractor.py‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎libs/extractor-api-lib/tests/pdf_extractor_test.py‎
Lines changed: 2 additions & 2 deletions b/‎libs/extractor-api-lib/tests/pdf_extractor_test.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎libs/rag-core-api/src/rag_core_api/dependency_container.py‎
Lines changed: 10 additions & 0 deletions b/‎libs/rag-core-api/src/rag_core_api/dependency_container.py‎
Lines changed: 10 additions & 0 deletions
@@ -90,12 +90,14 @@ Uploaded documents are required to contain the following metadata:
 | composed_retriever | [`rag_core_api.retriever.retriever.Retriever`](./rag-core-api/src/rag_core_api/retriever/retriever.py) | [`rag_core_api.impl.retriever.composite_retriever.CompositeRetriever`](./rag-core-api/src/rag_core_api/impl/retriever/composite_retriever.py) | Handles retrieval, re-ranking, etc. |
 | large_language_model | `langchain_core.language_models.llms.BaseLLM` | `langchain_community.llms.vllm.VLLMOpenAI`, `langchain_community.llms.Ollama` or `langchain_community.llms.FakeListLLM` | The LLm that is used for all LLM tasks. The default depends on the value of `rag_core_lib.impl.settings.rag_class_types_settings.RAGClassTypeSettings.llm_type`. The FakeListLLM is used for testing |
 | prompt | `str` | [`rag_core_api.prompt_templates.answer_generation_prompt.ANSWER_GENERATION_PROMPT`](./rag-core-api/src/rag_core_api/prompt_templates/answer_generation_prompt.py) | The prompt used for answering the question. |
-| rephrasing_prompt | `str` |  [`rag_core_api.prompt_templates.question_rephrasing_prompt.ANSWER_REPHRASING_PROMPT`](./rag-core-api/src/rag_core_api/prompt_templates/question_rephrasing_prompt.py) | The prompt used for rephrasing the question. The rephrased question (and the *original* question are both used for retrival of the documents)|
+| rephrasing_prompt | `str` |  [`rag_core_api.prompt_templates.question_rephrasing_prompt.ANSWER_REPHRASING_PROMPT`](./rag-core-api/src/rag_core_api/prompt_templates/question_rephrasing_prompt.py) | The prompt used for rephrasing the question. The rephrased question (and the *original* question are both used for retrival of the documents) |
+| language_detection_prompt | `str` | [`rag_core_api.prompt_templates.language_detection_prompt.LANGUAGE_DETECTION_PROMPT`](./rag-core-api/src/rag_core_api/prompt_templates/language_detection_prompt.py) | Prompt for detecting input language. Enforces structured JSON output `{ "language": "<iso639-1>" }` and defaults to `en` when uncertain. |
 | langfuse_manager | [`rag_core_lib.impl.langfuse_manager.langfuse_manager.LangfuseManager`](./rag-core-lib/src/rag_core_lib/impl/langfuse_manager/langfuse_manager.py) | [`rag_core_lib.impl.langfuse_manager.langfuse_manager.LangfuseManager`](./rag-core-lib/src/rag_core_lib/impl/langfuse_manager/langfuse_manager.py) | Retrieves additional settings, as well as the prompt from langfuse if available. |
-| answer_generation_chain | [`rag_core_lib.chains.async_chain.AsyncChain[rag_core_api.impl.graph.graph_state.graph_state.AnswerGraphState, str]`](./rag-core-lib/src/rag_core_lib/chains/async_chain.py) | [`rag_core_api.impl.answer_generation_chains.answer_generation_chain.AnswerGenerationChain`](./rag-core-api/src/rag_core_api/impl/answer_generation_chains/answer_generation_chain.py) | LangChain chain used for answering the question. Is part of the *chat_graph*, |
-| rephrasing_chain | [`rag_core_lib.chains.async_chain.AsyncChain[rag_core_api.impl.graph.graph_state.graph_state.AnswerGraphState, str]`](./rag-core-lib/src/rag_core_lib/chains/async_chain.py) | [`rag_core_api.impl.answer_generation_chains.rephrasing_chain.RephrasingChain`](./rag-core-api/src/rag_core_api/impl/answer_generation_chains/rephrasing_chain.py) | LangChain chain used for rephrasing the question. Is part of the *chat_graph*. |
+| answer_generation_chain | [`rag_core_lib.chains.runnables.AsyncRunnable[rag_core_api.impl.graph.graph_state.graph_state.AnswerGraphState, str]`](./rag-core-lib/src/rag_core_lib/runnables/async_runnable.py) | [`rag_core_api.impl.answer_generation_chains.answer_generation_chain.AnswerGenerationChain`](./rag-core-api/src/rag_core_api/impl/answer_generation_chains/answer_generation_chain.py) | LangChain chain used for answering the question. Is part of the *chat_graph*, |
+| rephrasing_chain | [`rag_core_lib.chains.runnables.AsyncRunnable[rag_core_api.impl.graph.graph_state.graph_state.AnswerGraphState, str]`](./rag-core-lib/src/rag_core_lib/runnables/async_runnable.py) | [`rag_core_api.impl.answer_generation_chains.rephrasing_chain.RephrasingChain`](./rag-core-api/src/rag_core_api/impl/answer_generation_chains/rephrasing_chain.py) | LangChain chain used for rephrasing the question. Is part of the *chat_graph*. |
+| language_detection_chain | [`rag_core_lib.chains.runnables.AsyncRunnable[rag_core_api.impl.graph.graph_state.graph_state.AnswerGraphState, str]`](./rag-core-lib/src/rag_core_lib/runnables/async_runnable.py) | [`rag_core_api.impl.answer_generation_chains.language_detection_chain.LanguageDetectionChain`](./rag-core-api/src/rag_core_api/impl/answer_generation_chains/language_detection_chain.py) | Detects the language of the question and returns an ISO 639-1 code (e.g., `en`, `de`). Uses structured-output guidance and robust parsing with fallback to `en`. Part of the *chat_graph*. |
 | chat_graph | [`rag_core_api.graph.graph_base.GraphBase`](./rag-core-api/src/rag_core_api/graph/graph_base.py) | [`rag_core_api.impl.graph.chat_graph.DefaultChatGraph`](./rag-core-api/src/rag_core_api/impl/graph/chat_graph.py) | Langgraph graph that contains the entire logic for question answering. |
-| traced_chat_graph | [`rag_core_lib.chains.async_chain.AsyncChain[Any, Any]`](./rag-core-lib/src/rag_core_lib/chains/async_chain.py)| [`rag_core_lib.impl.tracers.langfuse_traced_chain.LangfuseTracedGraph`](./rag-core-lib/src/rag_core_lib/impl/tracers/langfuse_traced_chain.py) | Wraps around the *chat_graph* and add langfuse tracing. |
+| traced_chat_graph | [`rag_core_lib.chains.runnables.AsyncRunnable[Any, Any]`](./rag-core-lib/src/rag_core_lib/runnables/async_runnable.py)| [`rag_core_lib.impl.tracers.langfuse_traced_chain.LangfuseTracedGraph`](./rag-core-lib/src/rag_core_lib/impl/tracers/langfuse_traced_chain.py) | Wraps around the *chat_graph* and add langfuse tracing. |
 | evaluator | [`rag_core_api.impl.evaluator.langfuse_ragas_evaluator.LangfuseRagasEvaluator`](./rag-core-api/src/rag_core_api/impl/evaluator/langfuse_ragas_evaluator.py) | [`rag_core_api.impl.evaluator.langfuse_ragas_evaluator.LangfuseRagasEvaluator`](./rag-core-api/src/rag_core_api/impl/evaluator/langfuse_ragas_evaluator.py) | The evaulator used in the evaluate endpoint. |
 | chat_endpoint | [`rag_core_api.api_endpoints.chat.Chat`](./rag-core-api/src/rag_core_api/api_endpoints/chat.py) | [`rag_core_api.impl.api_endpoints.default_chat.DefaultChat`](./rag-core-api/src/rag_core_api/impl/api_endpoints/default_chat.py) | Implementation of the chat endpoint. Default implementation just calls the *traced_chat_graph* |
 | ragas_llm | `langchain_core.language_models.chat_models.BaseChatModel` | `langchain_openai.ChatOpenAI` or `langchain_ollama.ChatOllama` | The LLM used for the ragas evaluation. |
@@ -191,7 +193,7 @@ The extracted information will be summarized using LLM. The summary, as well as
 | langfuse_manager | [`rag_core_lib.impl.langfuse_manager.langfuse_manager.LangfuseManager`](./rag-core-lib/src/rag_core_lib/impl/langfuse_manager/langfuse_manager.py) | [`rag_core_lib.impl.langfuse_manager.langfuse_manager.LangfuseManager`](./rag-core-lib/src/rag_core_lib/impl/langfuse_manager/langfuse_manager.py) | Retrieves additional settings, as well as the prompt from langfuse if available. |
 | summarizer |  [`admin_api_lib.summarizer.summarizer.Summarizer`](./admin-api-lib/src/admin_api_lib/summarizer/summarizer.py) | [`admin_api_lib.impl.summarizer.langchain_summarizer.LangchainSummarizer`](./admin-api-lib/src/admin_api_lib/impl/summarizer/langchain_summarizer.py) | Creates the summaries. Uses the shared retry decorator with optional per-summarizer overrides (see 2.4). |
 | untraced_information_enhancer |[`admin_api_lib.information_enhancer.information_enhancer.InformationEnhancer`](./admin-api-lib/src/admin_api_lib/information_enhancer/information_enhancer.py) | [`admin_api_lib.impl.information_enhancer.general_enhancer.GeneralEnhancer`](./admin-api-lib/src/admin_api_lib/impl/information_enhancer/general_enhancer.py) |  Uses the *summarizer* to enhance the extracted documents. |
-| information_enhancer |  [`rag_core_lib.chains.async_chain.AsyncChain[Any, Any]`](./rag-core-lib/src/rag_core_lib/chains/async_chain.py)| [`rag_core_lib.impl.tracers.langfuse_traced_chain.LangfuseTracedGraph`](./rag-core-lib/src/rag_core_lib/impl/tracers/langfuse_traced_chain.py) |Wraps around the *untraced_information_enhancer* and adds langfuse tracing. |
+| information_enhancer |  [`rag_core_lib.chains.runnables.AsyncRunnable[Any, Any]`](./rag-core-lib/src/rag_core_lib/runnables/async_runnable.py)| [`rag_core_lib.impl.tracers.langfuse_traced_chain.LangfuseTracedGraph`](./rag-core-lib/src/rag_core_lib/impl/tracers/langfuse_traced_chain.py) |Wraps around the *untraced_information_enhancer* and adds langfuse tracing. |
 | document_deleter |[`admin_api_lib.api_endpoints.document_deleter.DocumentDeleter`](./admin-api-lib/src/admin_api_lib/api_endpoints/document_deleter.py) | [`admin_api_lib.impl.api_endpoints.default_document_deleter.DefaultDocumentDeleter`](./admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_document_deleter.py) |  Handles deletion of sources. |
 | documents_status_retriever |  [`admin_api_lib.api_endpoints.documents_status_retriever.DocumentsStatusRetriever`](./admin-api-lib/src/admin_api_lib/api_endpoints/documents_status_retriever.py) | [`admin_api_lib.impl.api_endpoints.default_documents_status_retriever.DefaultDocumentsStatusRetriever`](./admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_documents_status_retriever.py) |Handles return of source status. |
 | source_uploader | [`admin_api_lib.api_endpoints.source_uploader.SourceUploader`](./admin-api-lib/src/admin_api_lib/api_endpoints/source_uploader.py) | [`admin_api_lib.impl.api_endpoints.default_source_uploader.DefaultSourceUploader`](./admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_source_uploader.py)| Handles data loading and extraction from various non-file sources. |
 
@@ -1,7 +1,6 @@
 """Class to handle I/O with S3 storage."""
 
 import logging
-import traceback
 from pathlib import Path
 from typing import BinaryIO
 
@@ -125,7 +124,7 @@ def delete_file(self, file_name: str) -> None:
         try:
             file_name = f"/{file_name}" if not file_name.startswith("/") else file_name
             self._s3_client.delete_object(Bucket=self._s3_settings.bucket, Key=file_name)
-            logger.info(f"File {file_name} successfully deleted.")
-        except Exception as e:
-            logger.error("Error deleting file %s: %s %s" % (file_name, e, traceback.format_exc()))
+            logger.info("File %s successfully deleted.", file_name)
+        except Exception:
+            logger.exception("Error deleting file %s", file_name)
             raise
@@ -154,8 +154,8 @@ async def aextract_content(self, file_path: Path, name: str) -> list[InternalInf
                     )
                     pdf_elements += new_pdf_elements
 
-        logger.info(f"Extraction completed. Found {len(pdf_elements)} information pieces.")
-        return pdf_elements
+            logger.info("Extraction completed. Found %d information pieces.", len(pdf_elements))
+            return pdf_elements
 
     def _is_text_based(self, page: Page) -> bool:
         """Classify whether a page is text-based, scanned.
@@ -200,8 +200,8 @@ def _extract_tables_from_text_page(
                 table_df = pd.DataFrame(table_data)
                 try:
                     converted_table = self._dataframe_converter.convert(table_df)
-                except TypeError as e:
-                    logger.error(f"Error while converting table to string: {e}")
+                except TypeError:
+                    logger.exception("Error while converting table to string")
                     continue
                 if not converted_table.strip():
                     continue
@@ -215,8 +215,8 @@ def _extract_tables_from_text_page(
                         information_id=hash_datetime(),
                     )
                 )
-        except Exception as e:
-            logger.warning(f"Failed to find tables on page {page_index}: {e}")
+        except Exception:
+            logger.warning("Failed to find tables on page %d", page_index, exc_info=True)
 
         return table_elements
 
@@ -321,19 +321,19 @@ def _extract_tables_from_scanned_page(
                                     },
                                 )
                             )
-                    except Exception as e:
-                        logger.warning(f"Failed to convert Camelot table {i + 1}: {e}")
+                    except Exception:
+                        logger.warning("Failed to convert Camelot table %d", i + 1, exc_info=True)
 
-        except Exception as e:
-            logger.debug(f"Camelot table extraction failed for page {page_index}: {e}")
+        except Exception:
+            logger.debug("Camelot table extraction failed for page %d", page_index, exc_info=True)
 
         return table_elements
 
     def _extract_text_from_text_page(self, page: Page) -> str:
         try:
             return page.extract_text() or ""
-        except Exception as e:
-            logger.warning(f"Failed to extract text with pdfplumber: {e}")
+        except Exception:
+            logger.warning("Failed to extract text with pdfplumber", exc_info=True)
             return ""
 
     def _extract_content_from_page(
 
@@ -551,8 +551,8 @@ async def test_end_to_end_extraction(self, pdf_extractor, test_pdf_files):
             text_count = sum(1 for elem in result if elem.type == ContentType.TEXT)
             table_count = sum(1 for elem in result if elem.type == ContentType.TABLE)
 
-            logger.info(f"  Text elements: {text_count}")
-            logger.info(f"  Table elements: {table_count}")
+            logger.info("  Text elements: %d", text_count)
+            logger.info("  Table elements: %d", table_count)
 
             # Verify metadata completeness
             for i, element in enumerate(result):
 
@@ -19,6 +19,7 @@
     AnswerGenerationChain,
 )
 from rag_core_api.impl.answer_generation_chains.rephrasing_chain import RephrasingChain
+from rag_core_api.impl.answer_generation_chains.language_detection_chain import LanguageDetectionChain
 from rag_core_api.impl.api_endpoints.default_chat import DefaultChat
 from rag_core_api.impl.api_endpoints.default_information_pieces_remover import (
     DefaultInformationPiecesRemover,
@@ -57,6 +58,7 @@
 from rag_core_api.prompt_templates.question_rephrasing_prompt import (
     QUESTION_REPHRASING_PROMPT,
 )
+from rag_core_api.prompt_templates.language_detection_prompt import LANGUAGE_DETECTION_PROMPT
 from rag_core_lib.impl.data_types.content_type import ContentType
 from rag_core_lib.impl.langfuse_manager.langfuse_manager import LangfuseManager
 from rag_core_lib.impl.llms.llm_factory import chat_model_provider
@@ -180,6 +182,7 @@ class DependencyContainer(DeclarativeContainer):
 
     prompt = ANSWER_GENERATION_PROMPT
     rephrasing_prompt = QUESTION_REPHRASING_PROMPT
+    language_detection_prompt = LANGUAGE_DETECTION_PROMPT
 
     langfuse = Singleton(
         Langfuse,
@@ -194,6 +197,7 @@ class DependencyContainer(DeclarativeContainer):
         managed_prompts={
             AnswerGenerationChain.__name__: prompt,
             RephrasingChain.__name__: rephrasing_prompt,
+            LanguageDetectionChain.__name__: language_detection_prompt,
         },
         llm=large_language_model,
     )
@@ -208,10 +212,16 @@ class DependencyContainer(DeclarativeContainer):
         langfuse_manager=langfuse_manager,
     )
 
+    language_detection_chain = Singleton(
+        LanguageDetectionChain,
+        langfuse_manager=langfuse_manager,
+    )
+
     chat_graph = Singleton(
         DefaultChatGraph,
         composed_retriever=composed_retriever,
         rephrasing_chain=rephrasing_chain,
+        language_detection_chain=language_detection_chain,
         mapper=information_piece_mapper,
         answer_generation_chain=answer_generation_chain,
         error_messages=error_messages,