diff --git a/.doctrees/api_spec/autorag.deploy.doctree b/.doctrees/api_spec/autorag.deploy.doctree index e2ee5cff1..c504440db 100644 Binary files a/.doctrees/api_spec/autorag.deploy.doctree and b/.doctrees/api_spec/autorag.deploy.doctree differ diff --git a/.doctrees/api_spec/autorag.utils.doctree b/.doctrees/api_spec/autorag.utils.doctree index 4afb8bca3..48e81cf77 100644 Binary files a/.doctrees/api_spec/autorag.utils.doctree and b/.doctrees/api_spec/autorag.utils.doctree differ diff --git a/.doctrees/deploy/api_endpoint.doctree b/.doctrees/deploy/api_endpoint.doctree index 933ca3d7b..3c743b1f1 100644 Binary files a/.doctrees/deploy/api_endpoint.doctree and b/.doctrees/deploy/api_endpoint.doctree differ diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle index ab8d9400c..283562bc4 100644 Binary files a/.doctrees/environment.pickle and b/.doctrees/environment.pickle differ diff --git a/_modules/autorag/deploy/api.html b/_modules/autorag/deploy/api.html index bf072455f..e3243e472 100644 --- a/_modules/autorag/deploy/api.html +++ b/_modules/autorag/deploy/api.html @@ -428,9 +428,10 @@
import os
import pathlib
import uuid
-from typing import Dict, Optional, List, Union
+from typing import Dict, Optional, List, Union, Literal
import pandas as pd
+from pyngrok import ngrok
from quart import Quart, request, jsonify
from quart.helpers import stream_with_context
from pydantic import BaseModel, ValidationError
@@ -475,6 +476,21 @@ Source code for autorag.deploy.api
+
+[docs]
+class StreamResponse(BaseModel):
+ """
+ When the type is generated_text, only generated_text is returned. The other fields are None.
+ When the type is retrieved_passage, only retrieved_passage and passage_index are returned. The other fields are None.
+ """
+
+ type: Literal["generated_text", "retrieved_passage"]
+ generated_text: Optional[str]
+ retrieved_passage: Optional[RetrievedPassage]
+ passage_index: Optional[int]
+
+
+
[docs]
class VersionResponse(BaseModel):
@@ -482,11 +498,6 @@ Source code for autorag.deploy.api
-empty_retrieved_passage = RetrievedPassage(
- content="", doc_id="", filepath=None, file_page=None, start_idx=None, end_idx=None
-)
-
-
[docs]
class ApiRunner(BaseRunner):
@@ -578,19 +589,28 @@ Source code for autorag.deploy.api
retrieved_passages = self.extract_retrieve_passage(
previous_result
)
- response = RunResponse(
- result="", retrieved_passage=retrieved_passages
- )
- yield response.model_dump_json().encode("utf-8")
+ for i, retrieved_passage in enumerate(retrieved_passages):
+ yield (
+ StreamResponse(
+ type="retrieved_passage",
+ generated_text=None,
+ retrieved_passage=retrieved_passage,
+ passage_index=i,
+ )
+ .model_dump_json()
+ .encode("utf-8")
+ )
# Start streaming of the result
assert len(previous_result) == 1
prompt: str = previous_result["prompts"].tolist()[0]
async for delta in module_instance.astream(
prompt=prompt, **module_param
):
- response = RunResponse(
- result=delta,
- retrieved_passage=[empty_retrieved_passage],
+ response = StreamResponse(
+ type="generated_text",
+ generated_text=delta,
+ retrieved_passage=None,
+ passage_index=None,
)
yield response.model_dump_json().encode("utf-8")
@@ -605,31 +625,23 @@ Source code for autorag.deploy.api
[docs]
- def run_api_server(self, host: str = "0.0.0.0", port: int = 8000, **kwargs):
+ def run_api_server(
+ self, host: str = "0.0.0.0", port: int = 8000, remote: bool = True, **kwargs
+ ):
"""
- Run the pipeline as api server.
- You can send POST request to `http://host:port/run` with json body like below:
-
- .. Code:: json
-
- {
- "query": "your query",
- "result_column": "generated_texts"
- }
-
- And it returns json response like below:
-
- .. Code:: json
-
- {
- "answer": "your answer"
- }
+ Run the pipeline as an api server.
+ Here is api endpoint documentation => https://docs.auto-rag.com/deploy/api_endpoint.html
:param host: The host of the api server.
:param port: The port of the api server.
+ :param remote: Whether to expose the api server to the public internet using ngrok.
:param kwargs: Other arguments for Flask app.run.
"""
logger.info(f"Run api server at {host}:{port}")
+ if remote:
+ http_tunnel = ngrok.connect(str(port), "http")
+ public_url = http_tunnel.public_url
+ logger.info(f"Public API URL: {public_url}")
self.app.run(host=host, port=port, **kwargs)
diff --git a/_modules/autorag/utils/util.html b/_modules/autorag/utils/util.html
index 256b64679..2225e2c02 100644
--- a/_modules/autorag/utils/util.html
+++ b/_modules/autorag/utils/util.html
@@ -431,11 +431,13 @@ Source code for autorag.utils.util
import glob
import inspect
import itertools
+import json
import logging
import os
import re
import string
from copy import deepcopy
+from json import JSONDecoder
from typing import List, Callable, Dict, Optional, Any, Collection, Iterable
from asyncio import AbstractEventLoop
@@ -1227,6 +1229,53 @@ Source code for autorag.utils.util
yaml_dict = convert_env_in_dict(yaml_dict)
return yaml_dict
+
+
+
+[docs]
+def decode_multiple_json_from_bytes(byte_data: bytes) -> list:
+ """
+ Decode multiple JSON objects from bytes received from SSE server.
+
+ Args:
+ byte_data: Bytes containing one or more JSON objects
+
+ Returns:
+ List of decoded JSON objects
+ """
+ # Decode bytes to string
+ try:
+ text_data = byte_data.decode("utf-8").strip()
+ except UnicodeDecodeError:
+ raise ValueError("Invalid byte data: Unable to decode as UTF-8")
+
+ # Initialize decoder and result list
+ decoder = JSONDecoder()
+ result = []
+
+ # Keep track of position in string
+ pos = 0
+ text_data = text_data.strip()
+
+ while pos < len(text_data):
+ try:
+ # Try to decode next JSON object
+ json_obj, json_end = decoder.raw_decode(text_data[pos:])
+ result.append(json_obj)
+
+ # Move position to end of current JSON object
+ pos += json_end
+
+ # Skip any whitespace
+ while pos < len(text_data) and text_data[pos].isspace():
+ pos += 1
+
+ except json.JSONDecodeError:
+ # If we can't decode at current position, move forward one character
+ pos += 1
+
+ return result
+
diff --git a/_sources/deploy/api_endpoint.md.txt b/_sources/deploy/api_endpoint.md.txt
index 51196bd04..437edc272 100644
--- a/_sources/deploy/api_endpoint.md.txt
+++ b/_sources/deploy/api_endpoint.md.txt
@@ -37,11 +37,21 @@ runner.run_api_server()
autorag run_api --trial_dir /trial/dir/0 --host 0.0.0.0 --port 8000
```
-## API Endpoint
+## Use NGrok Tunnel for public access
-Certainly! To generate API endpoint documentation in Markdown format from the provided OpenAPI specification, we need to break down each endpoint and describe its purpose, request parameters, and response structure. Here's how you can document the API:
+For accessing the API server from the public, you can use the NGrok tunnel service.
+It automatically creates ngrok tunnel to your local server.
+
+You can see the logs of the public URL like below:
+
+```
+INFO [api.py:199] >> Public API URL: api.py:199
+ https://8a31-14-52-132-205.ngrok-free.app
+```
+This is the URL to your local server, so use it as the host at request.
----
+
+## API Endpoint
## Example API Documentation
@@ -92,8 +102,9 @@ Certainly! To generate API endpoint documentation in Markdown format from the pr
- **Content Type**: `text/event-stream`
- **Schema**:
- **Properties**:
- - `result` (string or array of strings): The result text or list of texts (streamed line by line).
- - `retrieved_passage` (array of objects): List of retrieved passages.
+ - `type` (generated_text or retrieved_passage): If it is generated_text, you can see only the generated text. If it is retrieved_passage, you can see the retrieved passage and passage_index.
+ - `generated_text` (string): The generated text from the generator (LLM). The result of the RAG system.
+ - `retrieved_passage` (object): Retrieved passage.
- **Properties**:
- `content` (string): The content of the passage.
- `doc_id` (string): Document ID.
@@ -101,6 +112,7 @@ Certainly! To generate API endpoint documentation in Markdown format from the pr
- `file_page` (integer, nullable): File page number.
- `start_idx` (integer, nullable): Start index.
- `end_idx` (integer, nullable): End index.
+ - `passage_index` (integer): Index of the retrieved passage.
---
@@ -133,7 +145,7 @@ Here's the Python client code for each endpoint:
```python
import requests
-import json
+from autorag.utils.util import decode_multiple_json_from_bytes
# Base URL of the API
BASE_URL = "http://example.com:8000" # Replace with the actual base URL of the API
@@ -156,17 +168,24 @@ def stream_query(query, result_column="generated_texts"):
"query": query,
"result_column": result_column
}
- response = requests.post(url, json=payload, stream=True)
- if response.status_code == 200:
- for i, chunk in enumerate(response.iter_content(chunk_size=None)):
- if chunk:
- # Decode the chunk and print it
- data = json.loads(chunk.decode("utf-8"))
- if i == 0:
- retrieved_passages = data["retrieved_passage"] # The retrieved passages
- print(data["result"], end="")
- else:
- response.raise_for_status()
+ with requests.Session() as session:
+ response = session.post(url, json=payload, stream=True)
+ retrieved_passages = [] # This will store retrieved passages
+
+ # Check if the request was successful
+ if response.status_code == 200:
+ # Process the streaming response
+ for i, chunk in enumerate(response.iter_content(chunk_size=None)):
+ if chunk:
+ data_list = decode_multiple_json_from_bytes(chunk)
+ for data in data_list:
+ if data["type"] == "retrieved_passage":
+ retrieved_passages.append(data["retrieved_passage"])
+ else:
+ print(data["generated_text"], end="") # Stream the generated texts
+ else:
+ print(f"Request failed with status code: {response.status_code}")
+ print(f"Response content: {response.text}")
def get_version():
url = f"{BASE_URL}/version"
diff --git a/api_spec/autorag.deploy.html b/api_spec/autorag.deploy.html
index 91a9b3345..d28e1e10c 100644
--- a/api_spec/autorag.deploy.html
+++ b/api_spec/autorag.deploy.html
@@ -448,26 +448,15 @@ Submodules
-run_api_server(host: str = '0.0.0.0', port: int = 8000, **kwargs)[source]¶
-Run the pipeline as api server.
-You can send POST request to http://host:port/run with json body like below:
-{
- "query": "your query",
- "result_column": "generated_texts"
-}
-
-
-And it returns json response like below:
-{
- "answer": "your answer"
-}
-
-
+run_api_server(host: str = '0.0.0.0', port: int = 8000, remote: bool = True, **kwargs)[source]¶
+Run the pipeline as an api server.
+Here is api endpoint documentation => https://docs.auto-rag.com/deploy/api_endpoint.html
- Parameters:
host – The host of the api server.
port – The port of the api server.
+remote – Whether to expose the api server to the public internet using ngrok.
kwargs – Other arguments for Flask app.run.
@@ -604,6 +593,54 @@ Submodules
+-
+class autorag.deploy.api.StreamResponse(*, type: Literal['generated_text', 'retrieved_passage'], generated_text: str | None, retrieved_passage: RetrievedPassage | None, passage_index: int | None)[source]¶
+Bases: BaseModel
+When the type is generated_text, only generated_text is returned. The other fields are None.
+When the type is retrieved_passage, only retrieved_passage and passage_index are returned. The other fields are None.
+
+-
+generated_text: str | None¶
+
+
+
+-
+model_computed_fields: ClassVar[Dict[str, ComputedFieldInfo]] = {}¶
+A dictionary of computed field names and their corresponding ComputedFieldInfo objects.
+
+
+
+-
+model_config: ClassVar[ConfigDict] = {}¶
+Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].
+
+
+
+-
+model_fields: ClassVar[Dict[str, FieldInfo]] = {'generated_text': FieldInfo(annotation=Union[str, NoneType], required=True), 'passage_index': FieldInfo(annotation=Union[int, NoneType], required=True), 'retrieved_passage': FieldInfo(annotation=Union[RetrievedPassage, NoneType], required=True), 'type': FieldInfo(annotation=Literal['generated_text', 'retrieved_passage'], required=True)}¶
+Metadata about the fields defined on the model,
+mapping of field names to [FieldInfo][pydantic.fields.FieldInfo] objects.
+This replaces Model.__fields__ from Pydantic V1.
+
+
+
+-
+passage_index: int | None¶
+
+
+
+-
+retrieved_passage: RetrievedPassage | None¶
+
+
+
+-
+type: Literal['generated_text', 'retrieved_passage']¶
+
+
+
+
-
class autorag.deploy.api.VersionResponse(*, version: str)[source]¶
@@ -925,6 +962,16 @@ SubmodulesRunResponse.retrieved_passage
+
StreamResponse
+
VersionResponse
VersionResponse.model_computed_fields
VersionResponse.model_config
diff --git a/api_spec/autorag.html b/api_spec/autorag.html
index c4538cb61..9d0d919fb 100644
--- a/api_spec/autorag.html
+++ b/api_spec/autorag.html
@@ -522,6 +522,16 @@ SubpackagesRunResponse.retrieved_passage
+StreamResponse
+
VersionResponse
VersionResponse.model_computed_fields
VersionResponse.model_config
@@ -782,6 +792,7 @@ Subpackagesconvert_env_in_dict()
convert_inputs_to_list()
convert_string_to_tuple_in_dict()
+decode_multiple_json_from_bytes()
dict_to_markdown()
dict_to_markdown_table()
embedding_query_content()
diff --git a/api_spec/autorag.utils.html b/api_spec/autorag.utils.html
index ea950f4b2..45894ef42 100644
--- a/api_spec/autorag.utils.html
+++ b/api_spec/autorag.utils.html
@@ -532,6 +532,18 @@ Submodules
+
+autorag.utils.util.decode_multiple_json_from_bytes(byte_data: bytes) list [source]¶
+Decode multiple JSON objects from bytes received from SSE server.
+
+- Args:
byte_data: Bytes containing one or more JSON objects
+
+- Returns:
List of decoded JSON objects
+
+
+
+
-
autorag.utils.util.dict_to_markdown(d, level=1)[source]¶
@@ -918,6 +930,7 @@ Submodulesconvert_env_in_dict()
convert_inputs_to_list()
convert_string_to_tuple_in_dict()
+decode_multiple_json_from_bytes()
dict_to_markdown()
dict_to_markdown_table()
embedding_query_content()
diff --git a/deploy/api_endpoint.html b/deploy/api_endpoint.html
index 06d5316ca..870057c88 100644
--- a/deploy/api_endpoint.html
+++ b/deploy/api_endpoint.html
@@ -461,11 +461,20 @@ Running API server
+Use NGrok Tunnel for public access¶
+For accessing the API server from the public, you can use the NGrok tunnel service.
+It automatically creates ngrok tunnel to your local server.
+You can see the logs of the public URL like below:
+INFO [api.py:199] >> Public API URL: api.py:199
+ https://8a31-14-52-132-205.ngrok-free.app
+
+
+This is the URL to your local server, so use it as the host at request.
+
API Endpoint¶
-Certainly! To generate API endpoint documentation in Markdown format from the provided OpenAPI specification, we need to break down each endpoint and describe its purpose, request parameters, and response structure. Here’s how you can document the API:
-
Example API Documentation¶
@@ -557,8 +566,9 @@ 2. /v1/stream
Properties:
-result
(string or array of strings): The result text or list of texts (streamed line by line).
-retrieved_passage
(array of objects): List of retrieved passages.
+type
(generated_text or retrieved_passage): If it is generated_text, you can see only the generated text. If it is retrieved_passage, you can see the retrieved passage and passage_index.
+generated_text
(string): The generated text from the generator (LLM). The result of the RAG system.
+retrieved_passage
(object): Retrieved passage.
Properties:
@@ -572,6 +582,7 @@ 2. /v1/stream
+passage_index
(integer): Index of the retrieved passage.
@@ -622,7 +633,7 @@ Python Sample Codeimport requests
-import json
+from autorag.utils.util import decode_multiple_json_from_bytes
# Base URL of the API
BASE_URL = "http://example.com:8000" # Replace with the actual base URL of the API
@@ -645,17 +656,24 @@ Python Sample Code"query": query,
"result_column": result_column
}
- response = requests.post(url, json=payload, stream=True)
- if response.status_code == 200:
- for i, chunk in enumerate(response.iter_content(chunk_size=None)):
- if chunk:
- # Decode the chunk and print it
- data = json.loads(chunk.decode("utf-8"))
- if i == 0:
- retrieved_passages = data["retrieved_passage"] # The retrieved passages
- print(data["result"], end="")
- else:
- response.raise_for_status()
+ with requests.Session() as session:
+ response = session.post(url, json=payload, stream=True)
+ retrieved_passages = [] # This will store retrieved passages
+
+ # Check if the request was successful
+ if response.status_code == 200:
+ # Process the streaming response
+ for i, chunk in enumerate(response.iter_content(chunk_size=None)):
+ if chunk:
+ data_list = decode_multiple_json_from_bytes(chunk)
+ for data in data_list:
+ if data["type"] == "retrieved_passage":
+ retrieved_passages.append(data["retrieved_passage"])
+ else:
+ print(data["generated_text"], end="") # Stream the generated texts
+ else:
+ print(f"Request failed with status code: {response.status_code}")
+ print(f"Response content: {response.text}")
def get_version():
url = f"{BASE_URL}/version"
@@ -768,6 +786,7 @@ /version
- API endpoint
- Running API server
+- Use NGrok Tunnel for public access
- API Endpoint
- Example API Documentation
- Version: 1.0.0
diff --git a/genindex.html b/genindex.html
index b6c7f51f7..ce3b8d243 100644
--- a/genindex.html
+++ b/genindex.html
@@ -1692,6 +1692,8 @@ C
D