Skip to content

Commit

Permalink
Add embedding
Browse files Browse the repository at this point in the history
Signed-off-by: Aisuko <[email protected]>
  • Loading branch information
Aisuko committed Jul 21, 2024
1 parent da02a1e commit 810b043
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 26 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ INFERENCE_ENG:=llamacpp
INFERENCE_ENG_PORT:=8080
INFERENCE_ENG_VERSION:=server--b1-2321a5e
NUM_CPU_CORES:=8.00
NUM_CPU_CORES_EMBEDDING:=4.00

# Language model, default is phi3-mini-4k-instruct-q4.gguf
# https://github.com/SkywardAI/llama.cpp/blob/9b2f16f8055265c67e074025350736adc1ea0666/tests/test-chat-template.cpp#L91-L92
Expand Down Expand Up @@ -121,6 +122,7 @@ env:
@echo "INFERENCE_ENG_PORT=$(INFERENCE_ENG_PORT)">> $(FILE_NAME)
@echo "INFERENCE_ENG_VERSION=$(INFERENCE_ENG_VERSION)">> $(FILE_NAME)
@echo "NUM_CPU_CORES=$(NUM_CPU_CORES)">> $(FILE_NAME)
@echo "NUM_CPU_CORES_EMBEDDING=$(NUM_CPU_CORES_EMBEDDING)" >> $(FILE_NAME)
@echo "LANGUAGE_MODEL_NAME=$(LANGUAGE_MODEL_NAME)">> $(FILE_NAME)
@echo "ADMIN_USERNAME=$(ADMIN_USERNAME)">> $(FILE_NAME)
@echo "ADMIN_EMAIL=$(ADMIN_EMAIL)">> $(FILE_NAME)
Expand Down
6 changes: 6 additions & 0 deletions backend/src/repository/inference_eng.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,9 @@ def instruct_infer_url(cls) -> str:
str: URL for the inference engine
"""
return f"http://{cls.infer_eng_url}:{cls.infer_eng_port}/completion"

@classmethod
def instruct_embedding_url(cls) -> str:
"""
"""
return f"http://embedding_eng:8082/embedding"

Check failure on line 68 in backend/src/repository/inference_eng.py

View workflow job for this annotation

GitHub Actions / Code-Quality 💎 (ubuntu-latest, 3.11)

Ruff (F541)

backend/src/repository/inference_eng.py:68:16: F541 f-string without any placeholders
29 changes: 16 additions & 13 deletions backend/src/repository/rag/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,26 +126,29 @@ async def get_context_by_question(input_msg: str):
"""

# tokenized_input
async with httpx.AsyncClient() as client:
try:
res=await client.post(
InferenceHelper.tokenizer_url(),
json={"content": input_msg},
)
res.raise_for_status()
tokenized_input = res.json().get("tokens")
except Exception as e:
pass

try:
res=await httpx_kit.async_client.post(
InferenceHelper.instruct_embedding_url(),
headers={"Content-Type": "application/json"},
json={"content": input_msg},
timeout=httpx.Timeout(timeout=None)
)
res.raise_for_status()
tokenized_input = res.json().get("embedding")

Check failure on line 138 in backend/src/repository/rag/chat.py

View workflow job for this annotation

GitHub Actions / Code-Quality 💎 (ubuntu-latest, 3.11)

Ruff (F841)

backend/src/repository/rag/chat.py:138:17: F841 Local variable `tokenized_input` is assigned to but never used
except Exception as e:
loguru.logger.error(e)
# search the context in the vector database
result=await vector_db.search(tokenized_input, 1, collection_name="aisuko_squad01")
# context=vector_db.search(tokenized_input, 1, collection_name="aisuko_squad01")
context=""
# combine the context with the input message
context = ""
return context or InferenceHelper.instruction

current_context = await get_context_by_question(input_msg)

Check failure on line 147 in backend/src/repository/rag/chat.py

View workflow job for this annotation

GitHub Actions / Code-Quality 💎 (ubuntu-latest, 3.11)

Ruff (F841)

backend/src/repository/rag/chat.py:147:9: F841 Local variable `current_context` is assigned to but never used


data_with_context = {
"prompt": self.format_prompt(input_msg, current_context),
"prompt": self.format_prompt(input_msg, current_context=""),
"temperature": temperature,
"top_k": top_k,
"top_p": top_p,
Expand Down
34 changes: 21 additions & 13 deletions backend/src/repository/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,27 @@ def insert_list(self, collection_name: str = DEFAULT_COLLECTION, data_list: list

def search(self, data, n_results, collection_name=DEFAULT_COLLECTION):
search_params = {"metric_type": "COSINE", "params": {}}
res = self.client.search(
collection_name=collection_name,
data=data,
limit=n_results,
search_params=search_params,
output_fields=["title"],
)
loguru.logger.info(f"Vector Database --- Result: {res}")
sentences = []
for hits in res:
for hit in hits:
sentences.append(hit.get("entity").get("title"))
return sentences
try:

res = self.client.search(
collection_name=collection_name,
data=data,
limit=n_results,
search_params=search_params,
output_fields=["title"],
)

loguru.logger.info(f"Vector Database --- Result: {res}")
sentences = []
for hits in res:
for hit in hits:
sentences.append(hit.get("entity").get("title"))
return sentences
except Exception as e:
loguru.logger.error(e)
return None



def create_index(self, index_name, index_params, collection_name=DEFAULT_COLLECTION):
self.client.create_index(collection_name, index_name, index_params)
Expand Down
17 changes: 17 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ services:
- ETCD_AUTO_COMPACTION_RETENTION=${ETCD_AUTO_COMPACTION_RETENTION}
- ETCD_QUOTA_BACKEND_BYTES=${ETCD_QUOTA_BACKEND_BYTES}
- NUM_CPU_CORES=${NUM_CPU_CORES}
- NUM_CPU_CORES_EMBEDDING=${NUM_CPU_CORES_EMBEDDING}
volumes:
- ./backend/:/app/
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models
Expand Down Expand Up @@ -163,6 +164,22 @@ services:
- 8080:8080
command: ["-m", "models/${LANGUAGE_MODEL_NAME}","-c","8192"]

embedding_eng:
container_name: embedding_eng
image: gclub/llama.cpp:${INFERENCE_ENG_VERSION}
restart: always
deploy: # https://github.com/compose-spec/compose-spec/blob/master/deploy.md
resources:
reservations:
cpus: "${NUM_CPU_CORES_EMBEDDING}"
volumes:
- "${DOCKER_VOLUME_DIRECTORY:-.}/volumes/models:/models"
expose:
- 8080
ports:
- 8082:8080
command: ["-m", "models/${LANGUAGE_MODEL_NAME}","--embeddings","--pooling","mean","-c","512"]

rebel:
container_name: rebel
image: ghcr.io/skywardai/rebel:v0.1.6
Expand Down

0 comments on commit 810b043

Please sign in to comment.