Skip to content

Commit

Permalink
Add endpoint for batch uploading document metadata (#404)
Browse files Browse the repository at this point in the history
  • Loading branch information
nenb authored May 7, 2024
1 parent e397acb commit d7e4783
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 0 deletions.
27 changes: 27 additions & 0 deletions ragna/deploy/_api/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,33 @@ async def create_document_upload_info(
)
return schemas.DocumentUpload(parameters=parameters, document=document)

# TODO: Add UI support and documentation for this endpoint (#406)
@app.post("/documents")
async def create_documents_upload_info(
user: UserDependency,
names: Annotated[list[str], Body(..., embed=True)],
) -> list[schemas.DocumentUpload]:
with get_session() as session:
document_metadata_collection = []
document_upload_collection = []
for name in names:
document = schemas.Document(name=name)
metadata, parameters = await config.document.get_upload_info(
config=config, user=user, id=document.id, name=document.name
)
document_metadata_collection.append((document, metadata))
document_upload_collection.append(
schemas.DocumentUpload(parameters=parameters, document=document)
)

database.add_documents(
session,
user=user,
document_metadata_collection=document_metadata_collection,
)
return document_upload_collection

# TODO: Add new endpoint for batch uploading documents (#407)
@app.put("/document")
async def upload_document(
token: Annotated[str, Form()], file: UploadFile
Expand Down
26 changes: 26 additions & 0 deletions ragna/deploy/_api/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,32 @@ def add_document(
session.commit()


def add_documents(
session: Session,
*,
user: str,
document_metadata_collection: list[tuple[schemas.Document, dict[str, Any]]],
) -> None:
"""
Add multiple documents to the database.
This function allows adding multiple documents at once by calling `add_all`. This is
important when there is non-negligible latency attached to each database operation.
"""
user_id = _get_user_id(session, user)
documents = [
orm.Document(
id=document.id,
user_id=user_id,
name=document.name,
metadata_=metadata,
)
for document, metadata in document_metadata_collection
]
session.add_all(documents)
session.commit()


def _orm_to_schema_document(document: orm.Document) -> schemas.Document:
return schemas.Document(id=document.id, name=document.name)

Expand Down
86 changes: 86 additions & 0 deletions tests/deploy/api/test_batch_endpoints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from fastapi import status
from fastapi.testclient import TestClient

from ragna.deploy import Config
from ragna.deploy._api import app

from .utils import authenticate


def test_batch_sequential_upload_equivalence(tmp_local_root):
"Check that uploading documents sequentially and in batch gives the same result"
config = Config(local_root=tmp_local_root)

document_root = config.local_root / "documents"
document_root.mkdir()
document_path1 = document_root / "test1.txt"
with open(document_path1, "w") as file:
file.write("!\n")
document_path2 = document_root / "test2.txt"
with open(document_path2, "w") as file:
file.write("?\n")

with TestClient(
app(config=Config(), ignore_unavailable_components=False)
) as client:
authenticate(client)

document1_upload = (
client.post("/document", json={"name": document_path1.name})
.raise_for_status()
.json()
)
document2_upload = (
client.post("/document", json={"name": document_path2.name})
.raise_for_status()
.json()
)

documents_upload = (
client.post(
"/documents", json={"names": [document_path1.name, document_path2.name]}
)
.raise_for_status()
.json()
)

assert (
document1_upload["parameters"]["url"]
== documents_upload[0]["parameters"]["url"]
)
assert (
document2_upload["parameters"]["url"]
== documents_upload[1]["parameters"]["url"]
)

assert (
document1_upload["document"]["name"]
== documents_upload[0]["document"]["name"]
)
assert (
document2_upload["document"]["name"]
== documents_upload[1]["document"]["name"]
)

# assuming that if test passes for first document it will also pass for the other
with open(document_path1, "rb") as file:
response_sequential_upload1 = client.request(
document1_upload["parameters"]["method"],
document1_upload["parameters"]["url"],
data=document1_upload["parameters"]["data"],
files={"file": file},
)
response_batch_upload1 = client.request(
documents_upload[0]["parameters"]["method"],
documents_upload[0]["parameters"]["url"],
data=documents_upload[0]["parameters"]["data"],
files={"file": file},
)

assert response_sequential_upload1.status_code == status.HTTP_200_OK
assert response_batch_upload1.status_code == status.HTTP_200_OK

assert (
response_sequential_upload1.json()["name"]
== response_batch_upload1.json()["name"]
)

0 comments on commit d7e4783

Please sign in to comment.