From 1b6ad69ba0294b39545dd5157da342453661341e Mon Sep 17 00:00:00 2001 From: yyhhyy <95077259+Hui824@users.noreply.github.com> Date: Wed, 3 Apr 2024 16:29:47 +0800 Subject: [PATCH 1/4] Attempt to fix the 'make mymp' error introduced by PR#1359 --- dbgpt/storage/vector_store/milvus_store.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/dbgpt/storage/vector_store/milvus_store.py b/dbgpt/storage/vector_store/milvus_store.py index de8f27885..da3cdcc85 100644 --- a/dbgpt/storage/vector_store/milvus_store.py +++ b/dbgpt/storage/vector_store/milvus_store.py @@ -174,7 +174,8 @@ def __init__(self, vector_store_config: MilvusVectorConfig) -> None: bytes_str = self.collection_name.encode("utf-8") hex_str = bytes_str.hex() self.collection_name = hex_str - + if vector_store_config.embedding_fn is None: + raise ValueError("embedding_fn is required for MilvusStore") self.embedding: Embeddings = vector_store_config.embedding_fn self.fields: List = [] self.alias = milvus_vector_config.get("alias") or "default" @@ -221,6 +222,12 @@ def __init__(self, vector_store_config: MilvusVectorConfig) -> None: alias="default", ) + def _ensure_embedding_loaded(self): + # Perform runtime checks on self.embedding to + # ensure it has been correctly set and loaded + if self.embedding is None: + raise ValueError("Embedding function is not loaded in MilvusStore") + def init_schema_and_load(self, vector_name, documents) -> List[str]: """Create a Milvus collection. @@ -232,6 +239,7 @@ def init_schema_and_load(self, vector_name, documents) -> List[str]: Returns: List[str]: document ids. """ + self._ensure_embedding_loaded() try: from pymilvus import ( Collection, @@ -327,6 +335,7 @@ def _add_documents( timeout: Optional[int] = None, ) -> List[str]: """Add text data into Milvus.""" + self._ensure_embedding_loaded() insert_dict: Any = {self.text_field: list(texts)} try: import numpy as np # noqa: F401 @@ -355,6 +364,7 @@ def _add_documents( def load_document(self, chunks: List[Chunk]) -> List[str]: """Load document in vector database.""" + self._ensure_embedding_loaded() batch_size = 500 batched_list = [ chunks[i : i + batch_size] for i in range(0, len(chunks), batch_size) @@ -367,6 +377,7 @@ def load_document(self, chunks: List[Chunk]) -> List[str]: def similar_search(self, text, topk) -> List[Chunk]: """Perform a search on a query string and return results.""" + self._ensure_embedding_loaded() from pymilvus import Collection, DataType """similar_search in vector database.""" @@ -410,6 +421,7 @@ def similar_search_with_scores(self, text, topk, score_threshold) -> List[Chunk] Returns: List[Tuple[Document, float]]: Result doc and score. """ + self._ensure_embedding_loaded() from pymilvus import Collection self.col = Collection(self.collection_name) From 96fc14dc57f06b160cde16718722123cbb215c71 Mon Sep 17 00:00:00 2001 From: aries_ckt <916701291@qq.com> Date: Thu, 4 Apr 2024 01:47:16 +0800 Subject: [PATCH 2/4] fix:set embedding_fn when delete space --- dbgpt/app/knowledge/service.py | 8 +++++++- dbgpt/storage/vector_store/milvus_store.py | 16 +++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/dbgpt/app/knowledge/service.py b/dbgpt/app/knowledge/service.py index b6a2fab68..58aff202c 100644 --- a/dbgpt/app/knowledge/service.py +++ b/dbgpt/app/knowledge/service.py @@ -444,7 +444,13 @@ def delete_space(self, space_name: str): if len(spaces) == 0: raise Exception(f"delete error, no space name:{space_name} in database") space = spaces[0] - config = VectorStoreConfig(name=space.name) + embedding_factory = CFG.SYSTEM_APP.get_component( + "embedding_factory", EmbeddingFactory + ) + embedding_fn = embedding_factory.create( + model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL] + ) + config = VectorStoreConfig(name=space.name, embedding_fn=embedding_fn) vector_store_connector = VectorStoreConnector( vector_store_type=CFG.VECTOR_STORE_TYPE, vector_store_config=config, diff --git a/dbgpt/storage/vector_store/milvus_store.py b/dbgpt/storage/vector_store/milvus_store.py index da3cdcc85..62a918a7d 100644 --- a/dbgpt/storage/vector_store/milvus_store.py +++ b/dbgpt/storage/vector_store/milvus_store.py @@ -180,6 +180,11 @@ def __init__(self, vector_store_config: MilvusVectorConfig) -> None: self.fields: List = [] self.alias = milvus_vector_config.get("alias") or "default" + if self.embedding is None: + # Perform runtime checks on self.embedding to + # ensure it has been correctly set and loaded + raise ValueError("Embedding function is not loaded in MilvusStore") + # use HNSW by default. self.index_params = { "index_type": "HNSW", @@ -222,12 +227,6 @@ def __init__(self, vector_store_config: MilvusVectorConfig) -> None: alias="default", ) - def _ensure_embedding_loaded(self): - # Perform runtime checks on self.embedding to - # ensure it has been correctly set and loaded - if self.embedding is None: - raise ValueError("Embedding function is not loaded in MilvusStore") - def init_schema_and_load(self, vector_name, documents) -> List[str]: """Create a Milvus collection. @@ -239,7 +238,6 @@ def init_schema_and_load(self, vector_name, documents) -> List[str]: Returns: List[str]: document ids. """ - self._ensure_embedding_loaded() try: from pymilvus import ( Collection, @@ -335,7 +333,6 @@ def _add_documents( timeout: Optional[int] = None, ) -> List[str]: """Add text data into Milvus.""" - self._ensure_embedding_loaded() insert_dict: Any = {self.text_field: list(texts)} try: import numpy as np # noqa: F401 @@ -364,7 +361,6 @@ def _add_documents( def load_document(self, chunks: List[Chunk]) -> List[str]: """Load document in vector database.""" - self._ensure_embedding_loaded() batch_size = 500 batched_list = [ chunks[i : i + batch_size] for i in range(0, len(chunks), batch_size) @@ -377,7 +373,6 @@ def load_document(self, chunks: List[Chunk]) -> List[str]: def similar_search(self, text, topk) -> List[Chunk]: """Perform a search on a query string and return results.""" - self._ensure_embedding_loaded() from pymilvus import Collection, DataType """similar_search in vector database.""" @@ -421,7 +416,6 @@ def similar_search_with_scores(self, text, topk, score_threshold) -> List[Chunk] Returns: List[Tuple[Document, float]]: Result doc and score. """ - self._ensure_embedding_loaded() from pymilvus import Collection self.col = Collection(self.collection_name) From 2a8f66dedc3a399e85ed594f1ee0892e733f889f Mon Sep 17 00:00:00 2001 From: aries_ckt <916701291@qq.com> Date: Thu, 4 Apr 2024 01:55:00 +0800 Subject: [PATCH 3/4] fix delete unuseful code --- dbgpt/storage/vector_store/milvus_store.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/dbgpt/storage/vector_store/milvus_store.py b/dbgpt/storage/vector_store/milvus_store.py index 62a918a7d..80a352d40 100644 --- a/dbgpt/storage/vector_store/milvus_store.py +++ b/dbgpt/storage/vector_store/milvus_store.py @@ -175,16 +175,13 @@ def __init__(self, vector_store_config: MilvusVectorConfig) -> None: hex_str = bytes_str.hex() self.collection_name = hex_str if vector_store_config.embedding_fn is None: + # Perform runtime checks on self.embedding to + # ensure it has been correctly set and loaded raise ValueError("embedding_fn is required for MilvusStore") self.embedding: Embeddings = vector_store_config.embedding_fn self.fields: List = [] self.alias = milvus_vector_config.get("alias") or "default" - if self.embedding is None: - # Perform runtime checks on self.embedding to - # ensure it has been correctly set and loaded - raise ValueError("Embedding function is not loaded in MilvusStore") - # use HNSW by default. self.index_params = { "index_type": "HNSW", From 06d7c1ccf1f1067bc9d8b7d5847ea18a52e3ebb6 Mon Sep 17 00:00:00 2001 From: Fangyin Cheng Date: Sun, 7 Apr 2024 14:10:10 +0800 Subject: [PATCH 4/4] ci: add mypy check --- .github/workflows/pylint.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index d15f86769..aca3bc806 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -27,3 +27,5 @@ jobs: run: make setup - name: Check Python code style run: make fmt-check + - name: Check Python code type + run: make mypy