Skip to content

Commit

Permalink
fix(ChatData):db metadata charset bug (#698)
Browse files Browse the repository at this point in the history
1.fix db metadata charset bug
2.fmt
  • Loading branch information
fangyinc authored Oct 19, 2023
2 parents 8aee7fd + 8acc2dc commit 9efc4d3
Show file tree
Hide file tree
Showing 32 changed files with 371 additions and 82 deletions.
8 changes: 7 additions & 1 deletion docs/getting_started/faq/deploy/deploy_faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,10 @@ pip install langchain>=0.0.286
```commandline
pip install --use-pep517 fschat
```
```

##### Q9: alembic.util.exc.CommandError: Target database is not up to date.
delete files in `DB-GPT/pilot/meta_data/alembic/versions/` and reboot.
```commandline
rm -rf DB-GPT/pilot/meta_data/alembic/versions/*
```
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: DB-GPT 👏👏 0.3.5\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2023-09-26 17:47+0800\n"
"POT-Creation-Date: 2023-10-19 19:31+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: zh_CN\n"
Expand All @@ -20,12 +20,12 @@ msgstr ""
"Generated-By: Babel 2.12.1\n"

#: ../../getting_started/faq/deploy/deploy_faq.md:1
#: ca823e9d6d1d433db7ed15c8273e1b00
#: fb640f7c38744cbf996dcf7f73f325f6
msgid "Installation FAQ"
msgstr "Installation FAQ"

#: ../../getting_started/faq/deploy/deploy_faq.md:5
#: 3803d098c534434f9f513b3a62de54a4
#: 79fd80e469d14d608554d53a0e0ed2e3
#, fuzzy
msgid ""
"Q1: execute `pip install -e .` error, found some package cannot find "
Expand All @@ -35,18 +35,18 @@ msgstr ""
"cannot find correct version."

#: ../../getting_started/faq/deploy/deploy_faq.md:6
#: b785864f47e643df9a4669d8da6167d6
#: f1f6e3291d1446b5bbcf744cd4c4e89a
msgid "change the pip source."
msgstr "替换pip源."

#: ../../getting_started/faq/deploy/deploy_faq.md:13
#: ../../getting_started/faq/deploy/deploy_faq.md:20
#: c41f026fb1464c71a45d0746c224ecce f70fb69b568d4fc4ad4c4731b2032eaf
#: 68e1b39a08774a81b9061cc5205e4c1c dd34901f446749e998cd34ec5b6c44f4
msgid "or"
msgstr "或者"

#: ../../getting_started/faq/deploy/deploy_faq.md:27
#: d179e3d695764f838dc354eb0d978bb3
#: 0899f0e28dae443b8f912d96c797b79c
msgid ""
"Q2: sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) unable to"
" open database file"
Expand All @@ -55,86 +55,97 @@ msgstr ""
" open database file"

#: ../../getting_started/faq/deploy/deploy_faq.md:29
#: 55174e8d247a414e8c6c8861d4707a55
#: 3e60d8190e49436b8c40b34a67b7bfb3
msgid "make sure you pull latest code or create directory with mkdir pilot/data"
msgstr "make sure you pull latest code or create directory with mkdir pilot/data"

#: ../../getting_started/faq/deploy/deploy_faq.md:31
#: dbce9e9cae734a5083a6f0fc28bce7cd
#: baeaae20238842d3b8e4ae5b337198e5
msgid "Q3: The model keeps getting killed."
msgstr "Q3: The model keeps getting killed."

#: ../../getting_started/faq/deploy/deploy_faq.md:33
#: 2de5648d2e7546bf85f20f4162003298
#: eb3936307ad64b19b73483ff9ae126f2
msgid ""
"your GPU VRAM size is not enough, try replace your hardware or replace "
"other llms."
msgstr "GPU显存不够, 增加显存或者换一个显存小的模型"

#: ../../getting_started/faq/deploy/deploy_faq.md:35
#: 47810771cd364964b9b5b8fd85bca4ee
#: f6dba770717041699c73b4cd00d48aad
msgid "Q4: How to access website on the public network"
msgstr ""

#: ../../getting_started/faq/deploy/deploy_faq.md:37
#: e8c5bac6680648509d528ea6aaf5994e
#: 447d9e9374de44bab6d8a03f2c936676
msgid ""
"You can try to use gradio's [network](https://github.com/gradio-"
"app/gradio/blob/main/gradio/networking.py) to achieve."
msgstr ""

#: ../../getting_started/faq/deploy/deploy_faq.md:48
#: bb75ec127f574c00a09d92d5206e9357
#: 5e34dd4dfcf34feeb1815dfa974041d0
msgid "Open `url` with your browser to see the website."
msgstr ""

#: ../../getting_started/faq/deploy/deploy_faq.md:50
#: 5fdb87b84bd94385a1a93dab8d41ebe8
#: aaef774ce6124021a3862bc0a25d465f
msgid "Q5: (Windows) execute `pip install -e .` error"
msgstr ""

#: ../../getting_started/faq/deploy/deploy_faq.md:52
#: 31eef51e044044f29f3ad08defa9c305
#: ec3945df451c4ec2b32ebb476f45c82b
msgid "The error log like the following:"
msgstr ""

#: ../../getting_started/faq/deploy/deploy_faq.md:71
#: aaba0c3060b443e4b9877f70d78321ce
#: 1df09f6d9f9b4c1a8a32d6e271e5ee39
msgid ""
"Download and install `Microsoft C++ Build Tools` from [visual-cpp-build-"
"tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/)"
msgstr ""

#: ../../getting_started/faq/deploy/deploy_faq.md:75
#: 4c8137546e5c4240884f7ea6d9d922bf
#: 251f47bfa5694242a1c9d81a2022b7a0
msgid "Q6: `Torch not compiled with CUDA enabled`"
msgstr ""

#: ../../getting_started/faq/deploy/deploy_faq.md:82
#: 01daf14f8c494219b1d9a5af4449951e
#: bc9dfdfc47924a0e8d3ec535e23bf923
msgid "Install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive)"
msgstr ""

#: ../../getting_started/faq/deploy/deploy_faq.md:83
#: c75e6371911e4d5ca6859e51501c9679
#: b5a632baa42745bdbee5d6ba516d8d8b
msgid ""
"Reinstall PyTorch [start-locally](https://pytorch.org/get-started/locally"
"/#start-locally) with CUDA support."
msgstr ""

#: ../../getting_started/faq/deploy/deploy_faq.md:85
#: 7cfb9003e505445ebb9ed3d015e184e2
#: 0092fb91642749f5a55b629017c0de6a
msgid "Q7: ImportError: cannot import name 'PersistentClient' from 'chromadb'."
msgstr "Q7: ImportError: cannot import name 'PersistentClient' from 'chromadb'."

#: ../../getting_started/faq/deploy/deploy_faq.md:91
#: e1d5d5d85ddc480d8d81f7b550848cbf
#: 4aa87418f2a54c138bf3b7ff28a7e776
msgid ""
"Q8: pydantic.error_wrappers.ValidationError:1 validation error for "
"HuggingFaceEmbeddings.model_kwargs extra not permitted"
msgstr "Q8: pydantic.error_wrappers.ValidationError:1 validation error for "
msgstr ""
"Q8: pydantic.error_wrappers.ValidationError:1 validation error for "
"HuggingFaceEmbeddings.model_kwargs extra not permitted"

#: ../../getting_started/faq/deploy/deploy_faq.md:102
#: 6b690ab272af44f6b126cfe5ce1435ef
msgid "Q9: alembic.util.exc.CommandError: Target database is not up to date."
msgstr ""

#: ../../getting_started/faq/deploy/deploy_faq.md:103
#: 223026d3b9124363b695937922d8f8d5
msgid "delete files in `DB-GPT/pilot/meta_data/alembic/versions/` and reboot."
msgstr "删除`DB-GPT/pilot/meta_data/alembic/versions/`目录下文件"

#~ msgid ""
#~ "Q2: When use Mysql, Access denied "
#~ "for user 'root@localhost'(using password :NO)"
Expand Down
7 changes: 5 additions & 2 deletions pilot/base_modules/agent/db/my_plugin_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@

class MyPluginEntity(Base):
__tablename__ = "my_plugin"

__table_args__ = {
"mysql_charset": "utf8mb4",
"mysql_collate": "utf8mb4_unicode_ci",
}
id = Column(Integer, primary_key=True, comment="autoincrement id")
tenant = Column(String(255), nullable=True, comment="user's tenant")
user_code = Column(String(255), nullable=False, comment="user code")
Expand All @@ -27,7 +30,7 @@ class MyPluginEntity(Base):
created_at = Column(
DateTime, default=datetime.utcnow, comment="plugin install time"
)
__table_args__ = (UniqueConstraint("user_code", "name", name="uk_name"),)
UniqueConstraint("user_code", "name", name="uk_name")


class MyPluginDao(BaseDao[MyPluginEntity]):
Expand Down
15 changes: 10 additions & 5 deletions pilot/base_modules/agent/db/plugin_hub_db.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
from datetime import datetime
import pytz
from typing import List
from sqlalchemy import Column, Integer, String, Index, DateTime, func, Boolean
from sqlalchemy import Column, Integer, String, Index, DateTime, func, Boolean, DDL
from sqlalchemy import UniqueConstraint
from pilot.base_modules.meta_data.meta_data import Base

from pilot.base_modules.meta_data.base_dao import BaseDao
from pilot.base_modules.meta_data.meta_data import Base, engine, session


char_set_sql = DDL("ALTER TABLE plugin_hub CONVERT TO CHARACTER SET utf8mb4")


class PluginHubEntity(Base):
__tablename__ = "plugin_hub"
__table_args__ = {
"mysql_charset": "utf8mb4",
"mysql_collate": "utf8mb4_unicode_ci",
}
id = Column(
Integer, primary_key=True, autoincrement=True, comment="autoincrement id"
)
Expand All @@ -26,10 +33,8 @@ class PluginHubEntity(Base):
created_at = Column(DateTime, default=datetime.utcnow, comment="plugin upload time")
installed = Column(Integer, default=False, comment="plugin already installed count")

__table_args__ = (
UniqueConstraint("name", name="uk_name"),
Index("idx_q_type", "type"),
)
UniqueConstraint("name", name="uk_name")
Index("idx_q_type", "type")


class PluginHubDao(BaseDao[PluginHubEntity]):
Expand Down
22 changes: 21 additions & 1 deletion pilot/connections/rdbms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,18 @@ def __init__(
custom_table_info: Optional[dict] = None,
view_support: bool = False,
):
"""Create engine from database URI."""
"""Create engine from database URI.
Args:
- engine: Engine sqlalchemy.engine
- schema: Optional[str].
- metadata: Optional[MetaData]
- ignore_tables: Optional[List[str]]
- include_tables: Optional[List[str]]
- sample_rows_in_table_info: int default:3,
- indexes_in_table_info: bool = False,
- custom_table_info: Optional[dict] = None,
- view_support: bool = False,
"""
self._engine = engine
self._schema = schema
if include_tables and ignore_tables:
Expand Down Expand Up @@ -92,6 +103,15 @@ def from_uri_db(
engine_args: Optional[dict] = None,
**kwargs: Any,
) -> RDBMSDatabase:
"""Construct a SQLAlchemy engine from uri database.
Args:
host (str): database host.
port (int): database port.
user (str): database user.
pwd (str): database password.
db_name (str): database name.
engine_args (Optional[dict]):other engine_args.
"""
db_url: str = (
cls.driver
+ "://"
Expand Down
8 changes: 7 additions & 1 deletion pilot/embedding_engine/csv_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ def __init__(
source_reader: Optional = None,
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize with csv path."""
"""Initialize with csv path.
Args:
- file_path: data source path
- vector_store_config: vector store config params.
- source_reader: Optional[BaseLoader]
- text_splitter: Optional[TextSplitter]
"""
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)
Expand Down
21 changes: 20 additions & 1 deletion pilot/embedding_engine/embedding_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,16 @@ def __init__(
text_splitter: Optional[TextSplitter] = None,
embedding_factory: EmbeddingFactory = None,
):
"""Initialize with knowledge embedding client, model_name, vector_store_config, knowledge_type, knowledge_source"""
"""Initialize with knowledge embedding client, model_name, vector_store_config, knowledge_type, knowledge_source
Args:
- model_name: model_name
- vector_store_config: vector store config: Dict
- knowledge_type: Optional[KnowledgeType]
- knowledge_source: Optional[str]
- source_reader: Optional[BaseLoader]
- text_splitter: Optional[TextSplitter]
- embedding_factory: EmbeddingFactory
"""
self.knowledge_source = knowledge_source
self.model_name = model_name
self.vector_store_config = vector_store_config
Expand Down Expand Up @@ -65,6 +74,11 @@ def init_knowledge_embedding(self):
)

def similar_search(self, text, topk):
"""vector db similar search
Args:
- text: query text
- topk: top k
"""
vector_client = VectorStoreConnector(
self.vector_store_config["vector_store_type"], self.vector_store_config
)
Expand All @@ -75,12 +89,17 @@ def similar_search(self, text, topk):
return ans

def vector_exist(self):
"""vector db is exist"""
vector_client = VectorStoreConnector(
self.vector_store_config["vector_store_type"], self.vector_store_config
)
return vector_client.vector_name_exists()

def delete_by_ids(self, ids):
"""delete vector db by ids
Args:
- ids: vector ids
"""
vector_client = VectorStoreConnector(
self.vector_store_config["vector_store_type"], self.vector_store_config
)
Expand Down
8 changes: 7 additions & 1 deletion pilot/embedding_engine/pdf_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@ def __init__(
source_reader: Optional = None,
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize pdf word path."""
"""Initialize pdf word path.
Args:
- file_path: data source path
- vector_store_config: vector store config params.
- source_reader: Optional[BaseLoader]
- text_splitter: Optional[TextSplitter]
"""
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)
Expand Down
8 changes: 7 additions & 1 deletion pilot/embedding_engine/ppt_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@ def __init__(
source_reader: Optional = None,
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize ppt word path."""
"""Initialize ppt word path.
Args:
- file_path: data source path
- vector_store_config: vector store config params.
- source_reader: Optional[BaseLoader]
- text_splitter: Optional[TextSplitter]
"""
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)
Expand Down
Loading

0 comments on commit 9efc4d3

Please sign in to comment.