diff --git a/src/rerankers/bge_base/local/Dockerfile b/src/rerankers/bge_base/local/Dockerfile new file mode 100644 index 0000000..97897b3 --- /dev/null +++ b/src/rerankers/bge_base/local/Dockerfile @@ -0,0 +1,15 @@ +# Use an official Python runtime as a parent image +FROM python:3.9-slim + +WORKDIR /app + + +#install requirements +COPY requirements.txt requirements.txt +RUN pip3 install -r requirements.txt + +# Copy the rest of the application code to the working directory +COPY . /app/ +EXPOSE 8000 +# Set the entrypoint for the container +CMD ["hypercorn", "--bind", "0.0.0.0:8000", "api:app"] diff --git a/src/rerankers/bge_base/local/README.md b/src/rerankers/bge_base/local/README.md new file mode 100644 index 0000000..6a2b982 --- /dev/null +++ b/src/rerankers/bge_base/local/README.md @@ -0,0 +1,20 @@ +## Grievance classification: + + +### Purpose : +Re rank given a question and a list of contetn + + +### Testing the model deployment : +To run for testing just the Hugging Face deployment for grievence recognition, you can follow the following steps : + +- Git clone the repo +- Go to current folder location i.e. ``` cd /src/rerankers/bge_base/local ``` +- Create docker image file and test the api: +``` +docker build -t testmodel . +docker run -p 8000:8000 testmodel +curl -X POST -H "Content-Type: application/json" \ + -d '{"question": "What is agriculture ?", "content_chunks": ["Farming is a practice of growing crops to sell them to generate money", "LLM are the present day hype machine but will they be useful until you can truly reason with them? ", "Things are generally better than what people deep into it feel"]}' \ + http://localhost:8000/ +``` diff --git a/src/rerankers/bge_base/local/__init__.py b/src/rerankers/bge_base/local/__init__.py new file mode 100644 index 0000000..7faa07a --- /dev/null +++ b/src/rerankers/bge_base/local/__init__.py @@ -0,0 +1,2 @@ +from .request import ModelRequest +from .request import Model diff --git a/src/rerankers/bge_base/local/api.py b/src/rerankers/bge_base/local/api.py new file mode 100644 index 0000000..09f4cd7 --- /dev/null +++ b/src/rerankers/bge_base/local/api.py @@ -0,0 +1,30 @@ +from model import Model +from request import ModelRequest +from quart import Quart, request,jsonify +import aiohttp + +app = Quart(__name__) + +model = None + +@app.before_serving +async def startup(): + app.client = aiohttp.ClientSession() + global model + model = Model(app) + +@app.route('/', methods=['POST']) +async def embed(): + global model + data = await request.get_json() + req = ModelRequest(**data) + prediction = await model.inference(req) + # Convert the NumPy array to a list (or another serializable format) and return as JSON + if prediction is not None: + return jsonify(prediction.tolist()) # Assuming 'prediction' is a NumPy array + else: + # Return a meaningful error message if prediction is None + return jsonify({'error': 'Prediction failed'}), 500 + +if __name__ == "__main__": + app.run() \ No newline at end of file diff --git a/src/rerankers/bge_base/local/model.py b/src/rerankers/bge_base/local/model.py new file mode 100644 index 0000000..2f50c7a --- /dev/null +++ b/src/rerankers/bge_base/local/model.py @@ -0,0 +1,18 @@ +from request import ModelRequest +from sentence_transformers.cross_encoder import CrossEncoder +import torch + +class Model(): + def __new__(cls, context): + cls.context = context + if not hasattr(cls, 'instance'): + cls.instance = super(Model, cls).__new__(cls) + model_name = "BAAI/bge-reranker-base" + cls.model = CrossEncoder(model_name) + return cls.instance + + + async def inference(self, request: ModelRequest): + predict_array = request.predict_array + predictions = self.model.predict(predict_array) + return (predictions) diff --git a/src/rerankers/bge_base/local/request.py b/src/rerankers/bge_base/local/request.py new file mode 100644 index 0000000..8b0bd66 --- /dev/null +++ b/src/rerankers/bge_base/local/request.py @@ -0,0 +1,15 @@ +import requests +import json + +class ModelRequest(): + def __init__(self, question, content_chunks): + self.question = question + self.content_chunks = content_chunks + self.predict_array = [[question, content] for content in content_chunks] + + def to_json(self): + return json.dumps(self, default=lambda o: o.__dict__, + sort_keys=True, indent=4) + + + \ No newline at end of file diff --git a/src/rerankers/bge_base/local/requirements.txt b/src/rerankers/bge_base/local/requirements.txt new file mode 100644 index 0000000..cdaa932 --- /dev/null +++ b/src/rerankers/bge_base/local/requirements.txt @@ -0,0 +1,4 @@ +torch +sentence_transformers +quart +aiohttp