diff --git a/README.md b/README.md index 0f4b918..b21e6cf 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,11 @@
- + - + @@ -60,7 +60,7 @@ The above-listed technologies are just the main ones. There are other technologi * [Pyenv-VirtualEnv](https://github.com/pyenv/pyenv-virtualenv) $\rightarrow$ The plugin for `Pyenv` to manage the virtual environment for our packages. * [Pre-Commit](https://pre-commit.com/) $\rightarrow$ Git hook scripts to identify issues and quality of your code before pushing it to GitHub. These hooks are implemented for the following linting packages: * [Black (Python)](https://black.readthedocs.io/en/stable/) $\rightarrow$ Manage your code style with auto-formatting and parallel continuous integration runner for Python. - * [Isort (Python)](https://pycqa.github.io/isort/) $\rightarrow$ Sort your `import` for clarity. Also for Python. + * [Isort (Python)](https://pycqa.github.io/isort/) $\rightarrow$ Sort your `import` for clarity. Also for Python. * [MyPy (Python)](https://mypy.readthedocs.io/en/stable/) $\rightarrow$ A static type checker for Python that helps you to write cleaner code. * [Pre-Commit CI](https://pre-commit.ci/) $\rightarrow$ Continuous integration for our Pre-Commit hook that fixes and updates our hook versions. * [CodeCov](https://about.codecov.io/) $\rightarrow$ A platform that analyzes the result of your automated tests. @@ -75,6 +75,7 @@ My choice for a project development workflow is usually the [Trunk-Based Develop ## What Code is included? For the backend application: + * The project, linter, and test configurations in `backend/pyproject.toml`. * 3 settings classes (development, staging, production) with the super class in `backend/src/config/settings/base.py`. * Event logger in `backend/src/config/events.py`.. @@ -87,10 +88,12 @@ For the backend application: * A comprehensive FastAPI application initialization in `backend/src/main.py`. For testing, I have prepared the following simple code to kick-start your test-driven development: + * A simple replication of the backend application for testing purposes and the asynchronous test client in `backend/tests/conftest.py`. * 2 simple test functions to test the backend application initialization in `tests/unit_tests/test_src.py`. For the DevOps: + * A simple `build` job to test the compilation of the source code for the backend application in `.github/workflows/ci-backend.yaml`. * A simple linting job called `code-style` with black, isort, flake8, and mypy in `.github/workflows/ci-backend.yaml`. * An automated testing with `PyTest` and an automated test reporting with `Codecov` in in `.github/workflows/ci-backend.yaml`. @@ -100,11 +103,13 @@ For the DevOps: * A CI for automatically updating all linter version in the pre-commit `YAML` file in `.pre-commit-config.YAML`. For containerization: + * A `Docker` configuration that utilizes the latest Python image in `backend/Dockerfile`. * A script that ensure the backend application will restart when postgres image hasn't started yet in `backend/entrypoint.sh`. * Setting up `Postgres` image for our database server, `Adminer` for our database editor, and `backend_app` for our backend application's container in `docker-compose.yaml`. For the team development environment: + * A pre-commit hooks for `Black`, `Isort`, and `MyPy` to ensure the conventional commit message before pushing an updated code into the remote repository in `.pre-commit-config.YAML`. * All secret variables are listed in `.env.example`. You need to copy these variables and set the values respectively to your need and save them in a new `.env` in the root directory. @@ -112,7 +117,7 @@ For the team development environment: This backend application is setup with `Docker`. Nevertheless, you can see the full local setup without `Docker` in [backend/README.md](https://github.com/SkywardAI/chat-backend/blob/trunk/backend/README.md). -### For quick setup: +### For quick setup ```shell cp .env.example .env @@ -125,11 +130,13 @@ docker-compose up ## Regular Setup 1. Before setting up the backend app, please create a new directory called `coverage` for the testing report purpose: + ```shell cd backend && mkdir coverage ``` 2. Backend app setup: + ```shell # Creating VENV pyenv virtualenv 3.11.0 any_venv_name @@ -144,6 +151,7 @@ docker-compose up 3. Testing with `PyTest`: Make sure that you are in the `backend/` directory. + ```shell # For testing without Docker pytest @@ -153,6 +161,7 @@ docker-compose up ``` 4. `Pre-Commit` setup: + ```shell # Make sure you are in the ROOT project directory pre-commit install @@ -173,18 +182,18 @@ docker-compose up echo "SECRET_VARIABLE=SECRET_VARIABLE_VALUE" >> .env ``` - + For test usage , you can simplely use .env.example + ```shell cp .env.example .env ``` - - 6. `CODEOWNERS` setup: Go to `.github/` and open `CODEOWNERS` file. This file is to assign the code to a specific team member so you can distribute the weights of the project clearly. 7. Docker setup: + ```shell # Make sure you are in the ROOT project directory chmod +x backend/entrypoint.sh @@ -197,6 +206,7 @@ docker-compose up ``` 8. (IMPORTANT) Database setup: + ```shell # (Docker) Generate revision for the database auto-migrations docker exec backend_app alembic revision --autogenerate -m "YOUR MIGRATION TITLE" @@ -207,7 +217,7 @@ docker-compose up alembic upgrade head # to register the database classes ``` -9. Go to https://about.codecov.io/, and sign up with your github to get the `CODECOV_TOKEN` +9. Go to , and sign up with your github to get the `CODECOV_TOKEN` 10. Go to your GitHub and register all the secret variables (look in .env.example) in your repository (`settings` $\rightarrow$ (scroll down a bit) `Secrets` $\rightarrow$ `Actions` $\rightarrow$ `New repository secret`) @@ -325,9 +335,10 @@ docker-compose.yaml # The main configuration file for settin ## Final Step You can delete these 3 files (or change its content based on your need): -- `LICENSE.md` -- `README.md` -- `backend/README.md` + +* `LICENSE.md` +* `README.md` +* `backend/README.md` Enjoy your development and may your technology be forever useful to everyone 😉🚀🧬 diff --git a/backend/requirements.txt b/backend/requirements.txt index 9a36974..527d73c 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -7,6 +7,7 @@ asyncpg==0.29.0 bcrypt==4.0.1 black==24.3.0 colorama==0.4.6 +datasets==2.15.0 email_validator==2.1.1 fastapi==0.110.0 greenlet==3.0.3 @@ -27,6 +28,7 @@ python-dotenv==1.0.1 python-jose==3.3.0 python-multipart==0.0.9 python-slugify==8.0.4 +sentence-transformers==2.3.1 SQLAlchemy==2.0.0b3 trio==0.24.0 uvicorn==0.28.0 diff --git a/backend/src/api/dependencies/repository.py b/backend/src/api/dependencies/repository.py index 05d8f71..8f91716 100644 --- a/backend/src/api/dependencies/repository.py +++ b/backend/src/api/dependencies/repository.py @@ -8,6 +8,7 @@ from src.api.dependencies.session import get_async_session from src.repository.crud.base import BaseCRUDRepository +from src.repository.rag.base import BaseRAGRepository def get_repository( @@ -19,3 +20,14 @@ def _get_repo( return repo_type(async_session=async_session) return _get_repo + + +def get_rag_repository( + repo_type: typing.Type[BaseRAGRepository], +) -> typing.Callable[[SQLAlchemyAsyncSession], BaseRAGRepository]: + def _get_repo( + async_session: SQLAlchemyAsyncSession = fastapi.Depends(get_async_session), + ) -> BaseRAGRepository: + return repo_type(async_session=async_session) + + return _get_repo diff --git a/backend/src/api/dependencies/session.py b/backend/src/api/dependencies/session.py index cb52e34..05979cf 100644 --- a/backend/src/api/dependencies/session.py +++ b/backend/src/api/dependencies/session.py @@ -17,5 +17,6 @@ async def get_async_session() -> typing.AsyncGenerator[SQLAlchemyAsyncSession, N except Exception as e: print(e) await async_db.async_session.rollback() + raise finally: await async_db.async_session.close() diff --git a/backend/src/api/routes/ai_model.py b/backend/src/api/routes/ai_model.py index cd816ca..1d8ceb1 100644 --- a/backend/src/api/routes/ai_model.py +++ b/backend/src/api/routes/ai_model.py @@ -1,7 +1,7 @@ import fastapi from src.api.dependencies.repository import get_repository -from src.models.schemas.ai_model import AiModel, AiModelInResponse +from src.models.schemas.ai_model import AiModel, AiModelChooseResponse, AiModelInResponse from src.repository.crud.ai_model import AiModelCRUDRepository router = fastapi.APIRouter(prefix="/models", tags=["model"]) @@ -22,11 +22,26 @@ async def get_aimodels( for ai_model in ai_models: aimodel = AiModelInResponse( id=ai_model.id, - available_models=AiModel( - name=ai_model.name, - des=ai_model.des, - ), + name=ai_model.name, + des=ai_model.des, ) ai_model_list.append(aimodel) return ai_model_list + + +@router.post( + path="/{id}", + name="models:choose-model", + response_model=AiModelChooseResponse, + status_code=fastapi.status.HTTP_200_OK, +) +async def choose_aimodels( + id: int, + aimodel_repo: AiModelCRUDRepository = fastapi.Depends(get_repository(repo_type=AiModelCRUDRepository)), +) -> AiModelChooseResponse: + ai_model = await aimodel_repo.read_aimodel_by_id(id=id) + return AiModelChooseResponse( + name=ai_model.name, + msg="Model has been selected", + ) diff --git a/backend/src/api/routes/chat.py b/backend/src/api/routes/chat.py index 8d9893e..32e112c 100644 --- a/backend/src/api/routes/chat.py +++ b/backend/src/api/routes/chat.py @@ -1,8 +1,9 @@ import fastapi -from src.api.dependencies.repository import get_repository +from src.api.dependencies.repository import get_rag_repository, get_repository from src.models.schemas.chat import ChatHistory, ChatInMessage, ChatInResponse, Session from src.repository.crud.chat import ChatHistoryCRUDRepository, SessionCRUDRepository +from src.repository.rag.chat import RAGChatModelRepository from src.securities.authorizations.jwt import jwt_generator from src.utilities.exceptions.database import EntityDoesNotExist @@ -19,6 +20,7 @@ async def chat( chat_in_msg: ChatInMessage, session_repo: SessionCRUDRepository = fastapi.Depends(get_repository(repo_type=SessionCRUDRepository)), chat_repo: ChatHistoryCRUDRepository = fastapi.Depends(get_repository(repo_type=ChatHistoryCRUDRepository)), + rag_chat_repo: RAGChatModelRepository = fastapi.Depends(get_rag_repository(repo_type=RAGChatModelRepository)), ) -> ChatInResponse: # if not chat_in_msg.accountID: # chat_in_msg.accountID = 0 @@ -32,8 +34,7 @@ async def chat( # create_session = await session_repo.read_create_sessions_by_id(id=chat_in_msg.sessionId, account_id=chat_in_msg.accountID, name=chat_in_msg.message[:40]) session_id = chat_in_msg.sessionId await chat_repo.create_chat_history(session_id=session_id, is_bot_msg=False, message=chat_in_msg.message) - # TODO use RAG framework to generate the response message - response_msg = "Oh, really? It's amazing !" + response_msg = await rag_chat_repo.get_response(session_id=session_id, input_msg=chat_in_msg.message) await chat_repo.create_chat_history(session_id=session_id, is_bot_msg=True, message=response_msg) return ChatInResponse( sessionId=session_id, @@ -101,7 +102,6 @@ async def get_chathistory( ) -> list[ChatHistory]: chats = await chat_repo.read_chat_history_by_session_id(id=id) chats_list: list = list() - print("2222222222") for chat in chats: res_session = ChatHistory( id=chat.id, diff --git a/backend/src/api/routes/file.py b/backend/src/api/routes/file.py index 72d3277..fefa8fd 100644 --- a/backend/src/api/routes/file.py +++ b/backend/src/api/routes/file.py @@ -2,13 +2,18 @@ import random import fastapi - +from src.config.settings.const import UPLOAD_FILE_PATH from src.api.dependencies.repository import get_repository from src.models.schemas.file import FileInResponse, FileStatusInResponse from src.repository.crud.file import UploadedFileCRUDRepository +from fastapi import BackgroundTasks router = fastapi.APIRouter(prefix="/file", tags=["file"]) +async def save_upload_file(file: fastapi.UploadFile, save_file: str): + with open(save_file, "wb") as f: + contents = await file.read() + f.write(contents) @router.post( "", @@ -17,25 +22,22 @@ status_code=fastapi.status.HTTP_201_CREATED, ) async def upload_and_return_id( + background_tasks: BackgroundTasks, file: fastapi.UploadFile = fastapi.File(...), file_repo: UploadedFileCRUDRepository = fastapi.Depends(get_repository(repo_type=UploadedFileCRUDRepository)), ): new_file = await file_repo.create_uploadfile(file_name=file.filename) - # TODO save_path to a global constant - save_path = "./uploaded_files/" + save_path = UPLOAD_FILE_PATH if not os.path.exists(save_path): os.mkdir(save_path) save_file = os.path.join(save_path, file.filename) - with open(save_file, "wb") as f: - contents = await file.read() - f.write(contents) + background_tasks.add_task(save_upload_file, file, save_file) return FileInResponse(fileID=new_file.id) - @router.get( path="/{id}", name="file:check upload status", @@ -51,4 +53,4 @@ async def check_status( # 0 for in process # 1 for complete successfully # -1 for error - return FileStatusInResponse(status=random.choice(choices)) + return FileStatusInResponse(status=1) diff --git a/backend/src/api/routes/train.py b/backend/src/api/routes/train.py index 776bae8..865be8c 100644 --- a/backend/src/api/routes/train.py +++ b/backend/src/api/routes/train.py @@ -33,9 +33,8 @@ async def check_status( id: int, ) -> TrainStatusInResponse: - # TODO check process status of training choices = [0, 1, -1] # 0 for in process # 1 for complete successfully # -1 for error - return TrainStatusInResponse(status=random.choice(choices)) + return TrainStatusInResponse(status=1) diff --git a/backend/src/config/settings/const.py b/backend/src/config/settings/const.py new file mode 100644 index 0000000..f92c09a --- /dev/null +++ b/backend/src/config/settings/const.py @@ -0,0 +1,4 @@ +# Uploaded file path +UPLOAD_FILE_PATH = "./uploaded_files/" +MAX_SQL_LENGTH = 200 +DEFAULT_MODEL = "all-MiniLM-L6-v2" diff --git a/backend/src/models/schemas/ai_model.py b/backend/src/models/schemas/ai_model.py index bc3e149..9e17cc5 100644 --- a/backend/src/models/schemas/ai_model.py +++ b/backend/src/models/schemas/ai_model.py @@ -2,15 +2,22 @@ class AiModel(BaseSchemaModel): + id: int name: str - des: str | None + des: str class AiModelInResponse(BaseSchemaModel): id: int - available_models: AiModel + name: str + des: str class AiModelInUpdate(BaseSchemaModel): name: str | None des: str | None + + +class AiModelChooseResponse(BaseSchemaModel): + name: str + msg: str diff --git a/backend/src/models/schemas/base.py b/backend/src/models/schemas/base.py index c1274b1..de870c3 100644 --- a/backend/src/models/schemas/base.py +++ b/backend/src/models/schemas/base.py @@ -9,8 +9,8 @@ class BaseSchemaModel(pydantic.BaseModel): class Config(pydantic.BaseConfig): - orm_mode: bool = True + from_attributes: bool = True validate_assignment: bool = True - allow_population_by_field_name: bool = True + populate_by_name: bool = True json_encoders: dict = {datetime.datetime: format_datetime_into_isoformat} alias_generator: typing.Any = format_dict_key_to_camel_case diff --git a/backend/src/repository/crud/ai_model.py b/backend/src/repository/crud/ai_model.py index 6d05637..7a2af12 100644 --- a/backend/src/repository/crud/ai_model.py +++ b/backend/src/repository/crud/ai_model.py @@ -27,57 +27,54 @@ async def read_aimodels(self) -> typing.Sequence[AiModel]: async def read_aimodel_by_id(self, id: int) -> AiModel: stmt = sqlalchemy.select(AiModel).where(AiModel.id == id) - query = await self.async_session.execute(statement=stmt) - - if not query: - raise EntityDoesNotExist("Account with id `{id}` does not exist!") - - return query.scalar() # type: ignore + result = await self.async_session.execute(statement=stmt) + ai_model = result.scalar_one_or_none() + if ai_model is None: + raise EntityDoesNotExist(f"AiModel with id `{id}` does not exist!") + return ai_model async def read_aimodel_by_name(self, name: str) -> AiModel: stmt = sqlalchemy.select(AiModel).where(AiModel.name == name) query = await self.async_session.execute(statement=stmt) - if not query: - raise EntityDoesNotExist("Account with username `{name}` does not exist!") + if not query.scalar(): + raise EntityDoesNotExist(f"AiModel with name `{name}` does not exist!") - return query.scalar() # type: ignore + return query.scalar() - async def update_aimodel_by_id(self, id: int, aicount_update: AiModelInUpdate) -> AiModel: - new_aicount_data = aicount_update.dict() + async def update_aimodel_by_id(self, id: int, aimodel_update: AiModelInUpdate) -> AiModel: + new_aimodel_data = aimodel_update.dict() select_stmt = sqlalchemy.select(AiModel).where(AiModel.id == id) query = await self.async_session.execute(statement=select_stmt) update_aimodel = query.scalar() if not update_aimodel: - raise EntityDoesNotExist(f"AiModel with id `{id}` does not exist!") # type: ignore - - update_stmt = sqlalchemy.update(table=AiModel).where(AiModel.id == update_aimodel.id).values() # type: ignore - - if new_aicount_data["name"]: - update_stmt = update_stmt.values(username=new_aicount_data["name"]) + raise EntityDoesNotExist(f"AiModel with id `{id}` does not exist!") - if new_aicount_data["des"]: - update_stmt = update_stmt.values(username=new_aicount_data["des"]) + update_stmt = ( + sqlalchemy.update(AiModel) + .where(AiModel.id == update_aimodel.id) + .values(name=new_aimodel_data["name"], des=new_aimodel_data["des"]) + ) await self.async_session.execute(statement=update_stmt) await self.async_session.commit() await self.async_session.refresh(instance=update_aimodel) - return update_aimodel # type: ignore + return update_aimodel - async def delete_account_by_id(self, id: int) -> str: + async def delete_aimodel_by_id(self, id: int) -> str: select_stmt = sqlalchemy.select(AiModel).where(AiModel.id == id) query = await self.async_session.execute(statement=select_stmt) delete_aimodel = query.scalar() if not delete_aimodel: - raise EntityDoesNotExist(f"Ai Model with id `{id}` does not exist!") # type: ignore + raise EntityDoesNotExist(f"AiModel with id `{id}` does not exist!") - stmt = sqlalchemy.delete(table=AiModel).where(AiModel.id == delete_aimodel.id) + stmt = sqlalchemy.delete(AiModel).where(AiModel.id == delete_aimodel.id) await self.async_session.execute(statement=stmt) await self.async_session.commit() - return f"Ai Model with id '{id}' is successfully deleted!" + return f"AiModel with id '{id}' is successfully deleted!" diff --git a/backend/src/repository/crud/__init__.py b/backend/src/repository/rag/__init__.py similarity index 100% rename from backend/src/repository/crud/__init__.py rename to backend/src/repository/rag/__init__.py diff --git a/backend/src/repository/rag/base.py b/backend/src/repository/rag/base.py new file mode 100644 index 0000000..80262c3 --- /dev/null +++ b/backend/src/repository/rag/base.py @@ -0,0 +1,6 @@ +from sqlalchemy.ext.asyncio import AsyncSession as SQLAlchemyAsyncSession + + +class BaseRAGRepository: + def __init__(self, async_session: SQLAlchemyAsyncSession): + self.async_session = async_session diff --git a/backend/src/repository/rag/chat.py b/backend/src/repository/rag/chat.py new file mode 100644 index 0000000..0655caf --- /dev/null +++ b/backend/src/repository/rag/chat.py @@ -0,0 +1,23 @@ +from sentence_transformers import SentenceTransformer + +from src.config.settings.const import DEFAULT_MODEL, MAX_SQL_LENGTH +from src.repository.rag.base import BaseRAGRepository + + +class RAGChatModelRepository(BaseRAGRepository): + model = SentenceTransformer(DEFAULT_MODEL, "cuda") + + async def load_model(self, session_id: int, model_name: str) -> bool: + # Init model with input model_name + try: + model = SentenceTransformer(model_name, "cuda") + model.max_seq_length = MAX_SQL_LENGTH + except Exception as e: + print(e) + return False + return True + + async def get_response(self, session_id: int, input_msg: str) -> str: + # TODO use RAG framework to generate the response message + response_msg = "Oh, really? It's amazing !" + return response_msg