Skip to content

Commit

Permalink
Merge
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkbrnd committed Feb 6, 2025
2 parents 000d864 + d9dbcbd commit 66de326
Show file tree
Hide file tree
Showing 5 changed files with 219 additions and 8 deletions.
27 changes: 27 additions & 0 deletions cookbook/agent_concepts/knowledge/readers/json_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import json
from pathlib import Path

from agno.document.reader.json_reader import JSONReader

reader = JSONReader()

json_path = Path("tmp/test.json")
test_data = {"key": "value"}
json_path.write_text(json.dumps(test_data))

try:
print("Starting read...")
documents = reader.read(json_path)

if documents:
for doc in documents:
print(doc.name)
print(doc.content)
print(f"Content length: {len(doc.content)}")
print("-" * 80)
else:
print("No documents were returned")

except Exception as e:
print(f"Error type: {type(e)}")
print(f"Error occurred: {str(e)}")
26 changes: 18 additions & 8 deletions libs/agno/agno/document/reader/json_reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from io import BytesIO
from pathlib import Path
from typing import List
from typing import IO, Any, List, Union

from agno.document.base import Document
from agno.document.reader.base import Reader
Expand All @@ -12,14 +13,23 @@ class JSONReader(Reader):

chunk: bool = False

def read(self, path: Path) -> List[Document]:
if not path.exists():
raise FileNotFoundError(f"Could not find file: {path}")

def read(self, path: Union[Path, IO[Any]]) -> List[Document]:
try:
logger.info(f"Reading: {path}")
json_name = path.name.split(".")[0]
json_contents = json.loads(path.read_text("utf-8"))
if isinstance(path, Path):
if not path.exists():
raise FileNotFoundError(f"Could not find file: {path}")
logger.info(f"Reading: {path}")
json_name = path.name.split(".")[0]
json_contents = json.loads(path.read_text("utf-8"))

elif isinstance(path, BytesIO):
logger.info(f"Reading uploaded file: {path.name}")
json_name = path.name.split(".")[0]
path.seek(0)
json_contents = json.load(path)

else:
raise ValueError("Unsupported file type. Must be Path or BytesIO.")

if isinstance(json_contents, dict):
json_contents = [json_contents]
Expand Down
10 changes: 10 additions & 0 deletions libs/agno/agno/playground/async_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,16 @@ async def create_agent_run(
file_content = TextReader().read(text_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)

elif file.content_type == "application/json":
from agno.document.reader.json_reader import JSONReader

contents = await file.read()
json_file = BytesIO(contents)
json_file.name = file.filename
file_content = JSONReader().read(json_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)
else:
raise HTTPException(status_code=400, detail="Unsupported file type")

Expand Down
9 changes: 9 additions & 0 deletions libs/agno/agno/playground/sync_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,15 @@ def create_agent_run(
file_content = TextReader().read(text_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)
elif file.content_type == "application/json":
from agno.document.reader.json_reader import JSONReader

contents = file.file.read()
json_file = BytesIO(contents)
json_file.name = file.filename
file_content = JSONReader().read(json_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)
else:
raise HTTPException(status_code=400, detail="Unsupported file type")

Expand Down
155 changes: 155 additions & 0 deletions libs/agno/tests/unit/reader/test_json_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import json
from io import BytesIO
from pathlib import Path

import pytest

from agno.document.base import Document
from agno.document.reader.json_reader import JSONReader


@pytest.fixture
def test_read_json_file_path(tmp_path):
# Create a temporary JSON file
json_path = tmp_path / "test.json"
test_data = {"key": "value"}
json_path.write_text(json.dumps(test_data))

reader = JSONReader()
documents = reader.read(json_path)

assert len(documents) == 1
assert documents[0].name == "test"
assert json.loads(documents[0].content) == test_data


def test_read_json_bytesio():
# Create a BytesIO object with JSON data
test_data = {"key": "value"}
json_bytes = BytesIO(json.dumps(test_data).encode())
json_bytes.name = "test.json"

reader = JSONReader()
documents = reader.read(json_bytes)

assert len(documents) == 1
assert documents[0].name == "test"
assert json.loads(documents[0].content) == test_data


def test_read_json_list():
# Test reading a JSON file containing a list
test_data = [{"key1": "value1"}, {"key2": "value2"}]
json_bytes = BytesIO(json.dumps(test_data).encode())
json_bytes.name = "test.json"

reader = JSONReader()
documents = reader.read(json_bytes)

assert len(documents) == 2
assert all(doc.name == "test" for doc in documents)
assert [json.loads(doc.content) for doc in documents] == test_data


def test_chunking():
# Test document chunking functionality
test_data = {"key": "value"}
json_bytes = BytesIO(json.dumps(test_data).encode())
json_bytes.name = "test.json"

reader = JSONReader()
reader.chunk = True
# Mock the chunk_document method
reader.chunk_document = lambda doc: [
Document(name=f"{doc.name}_chunk_{i}", id=f"{doc.id}_chunk_{i}", content=f"chunk_{i}", meta_data={"chunk": i})
for i in range(2)
]

documents = reader.read(json_bytes)

assert len(documents) == 2
assert all(doc.name.startswith("test_chunk_") for doc in documents)
assert all(doc.id.startswith("test_1_chunk_") for doc in documents)
assert all("chunk" in doc.meta_data for doc in documents)


def test_file_not_found():
reader = JSONReader()
with pytest.raises(FileNotFoundError):
reader.read(Path("nonexistent.json"))


def test_invalid_json():
# Test handling of invalid JSON data
invalid_json = BytesIO(b"{invalid_json")
invalid_json.name = "invalid.json"

reader = JSONReader()
with pytest.raises(json.JSONDecodeError):
reader.read(invalid_json)


def test_unsupported_file_type():
reader = JSONReader()
with pytest.raises(ValueError, match="Unsupported file type"):
reader.read("not_a_path_or_bytesio")


def test_empty_json_file(tmp_path):
# Test handling of empty JSON file
json_path = tmp_path / "empty.json"
json_path.write_text("")

reader = JSONReader()
with pytest.raises(json.JSONDecodeError):
reader.read(json_path)


def test_empty_json_array(tmp_path):
# Test handling of empty JSON array
json_path = tmp_path / "empty_array.json"
json_path.write_text("[]")

reader = JSONReader()
documents = reader.read(json_path)
assert len(documents) == 0


def test_unicode_content(tmp_path):
# Test handling of Unicode content
test_data = {"key": "值"}
json_path = tmp_path / "unicode.json"
json_path.write_text(json.dumps(test_data))

reader = JSONReader()
documents = reader.read(json_path)

assert len(documents) == 1
assert json.loads(documents[0].content) == test_data


def test_nested_json():
# Test handling of deeply nested JSON
test_data = {"level1": {"level2": {"level3": "value"}}}
json_bytes = BytesIO(json.dumps(test_data).encode())
json_bytes.name = "nested.json"

reader = JSONReader()
documents = reader.read(json_bytes)

assert len(documents) == 1
assert json.loads(documents[0].content) == test_data


def test_large_json():
# Test handling of large JSON files
test_data = [{"key": f"value_{i}"} for i in range(1000)]
json_bytes = BytesIO(json.dumps(test_data).encode())
json_bytes.name = "large.json"

reader = JSONReader()
documents = reader.read(json_bytes)

assert len(documents) == 1000
assert all(doc.name == "large" for doc in documents)
assert all(doc.id.startswith("large_") for doc in documents)

0 comments on commit 66de326

Please sign in to comment.