Merge

agno-agi · Feb 6, 2025 · 66de326 · 66de326
2 parents 000d864 + d9dbcbd
commit 66de326
Show file tree

Hide file tree

Showing 5 changed files with 219 additions and 8 deletions.
diff --git a/cookbook/agent_concepts/knowledge/readers/json_reader.py b/cookbook/agent_concepts/knowledge/readers/json_reader.py
@@ -0,0 +1,27 @@
+import json
+from pathlib import Path
+
+from agno.document.reader.json_reader import JSONReader
+
+reader = JSONReader()
+
+json_path = Path("tmp/test.json")
+test_data = {"key": "value"}
+json_path.write_text(json.dumps(test_data))
+
+try:
+    print("Starting read...")
+    documents = reader.read(json_path)
+
+    if documents:
+        for doc in documents:
+            print(doc.name)
+            print(doc.content)
+            print(f"Content length: {len(doc.content)}")
+            print("-" * 80)
+    else:
+        print("No documents were returned")
+
+except Exception as e:
+    print(f"Error type: {type(e)}")
+    print(f"Error occurred: {str(e)}")
diff --git a/libs/agno/agno/document/reader/json_reader.py b/libs/agno/agno/document/reader/json_reader.py
@@ -1,6 +1,7 @@
 import json
+from io import BytesIO
 from pathlib import Path
-from typing import List
+from typing import IO, Any, List, Union
 
 from agno.document.base import Document
 from agno.document.reader.base import Reader
@@ -12,14 +13,23 @@ class JSONReader(Reader):
 
     chunk: bool = False
 
-    def read(self, path: Path) -> List[Document]:
-        if not path.exists():
-            raise FileNotFoundError(f"Could not find file: {path}")
-
+    def read(self, path: Union[Path, IO[Any]]) -> List[Document]:
         try:
-            logger.info(f"Reading: {path}")
-            json_name = path.name.split(".")[0]
-            json_contents = json.loads(path.read_text("utf-8"))
+            if isinstance(path, Path):
+                if not path.exists():
+                    raise FileNotFoundError(f"Could not find file: {path}")
+                logger.info(f"Reading: {path}")
+                json_name = path.name.split(".")[0]
+                json_contents = json.loads(path.read_text("utf-8"))
+
+            elif isinstance(path, BytesIO):
+                logger.info(f"Reading uploaded file: {path.name}")
+                json_name = path.name.split(".")[0]
+                path.seek(0)
+                json_contents = json.load(path)
+
+            else:
+                raise ValueError("Unsupported file type. Must be Path or BytesIO.")
 
             if isinstance(json_contents, dict):
                 json_contents = [json_contents]

diff --git a/libs/agno/agno/playground/async_router.py b/libs/agno/agno/playground/async_router.py
@@ -198,6 +198,16 @@ async def create_agent_run(
                         file_content = TextReader().read(text_file)
                         if agent.knowledge is not None:
                             agent.knowledge.load_documents(file_content)
+
+                    elif file.content_type == "application/json":
+                        from agno.document.reader.json_reader import JSONReader
+
+                        contents = await file.read()
+                        json_file = BytesIO(contents)
+                        json_file.name = file.filename
+                        file_content = JSONReader().read(json_file)
+                        if agent.knowledge is not None:
+                            agent.knowledge.load_documents(file_content)
                     else:
                         raise HTTPException(status_code=400, detail="Unsupported file type")
 

diff --git a/libs/agno/agno/playground/sync_router.py b/libs/agno/agno/playground/sync_router.py
@@ -182,6 +182,15 @@ def create_agent_run(
                         file_content = TextReader().read(text_file)
                         if agent.knowledge is not None:
                             agent.knowledge.load_documents(file_content)
+                    elif file.content_type == "application/json":
+                        from agno.document.reader.json_reader import JSONReader
+
+                        contents = file.file.read()
+                        json_file = BytesIO(contents)
+                        json_file.name = file.filename
+                        file_content = JSONReader().read(json_file)
+                        if agent.knowledge is not None:
+                            agent.knowledge.load_documents(file_content)
                     else:
                         raise HTTPException(status_code=400, detail="Unsupported file type")
 

diff --git a/libs/agno/tests/unit/reader/test_json_reader.py b/libs/agno/tests/unit/reader/test_json_reader.py
@@ -0,0 +1,155 @@
+import json
+from io import BytesIO
+from pathlib import Path
+
+import pytest
+
+from agno.document.base import Document
+from agno.document.reader.json_reader import JSONReader
+
+
+@pytest.fixture
+def test_read_json_file_path(tmp_path):
+    # Create a temporary JSON file
+    json_path = tmp_path / "test.json"
+    test_data = {"key": "value"}
+    json_path.write_text(json.dumps(test_data))
+
+    reader = JSONReader()
+    documents = reader.read(json_path)
+
+    assert len(documents) == 1
+    assert documents[0].name == "test"
+    assert json.loads(documents[0].content) == test_data
+
+
+def test_read_json_bytesio():
+    # Create a BytesIO object with JSON data
+    test_data = {"key": "value"}
+    json_bytes = BytesIO(json.dumps(test_data).encode())
+    json_bytes.name = "test.json"
+
+    reader = JSONReader()
+    documents = reader.read(json_bytes)
+
+    assert len(documents) == 1
+    assert documents[0].name == "test"
+    assert json.loads(documents[0].content) == test_data
+
+
+def test_read_json_list():
+    # Test reading a JSON file containing a list
+    test_data = [{"key1": "value1"}, {"key2": "value2"}]
+    json_bytes = BytesIO(json.dumps(test_data).encode())
+    json_bytes.name = "test.json"
+
+    reader = JSONReader()
+    documents = reader.read(json_bytes)
+
+    assert len(documents) == 2
+    assert all(doc.name == "test" for doc in documents)
+    assert [json.loads(doc.content) for doc in documents] == test_data
+
+
+def test_chunking():
+    # Test document chunking functionality
+    test_data = {"key": "value"}
+    json_bytes = BytesIO(json.dumps(test_data).encode())
+    json_bytes.name = "test.json"
+
+    reader = JSONReader()
+    reader.chunk = True
+    # Mock the chunk_document method
+    reader.chunk_document = lambda doc: [
+        Document(name=f"{doc.name}_chunk_{i}", id=f"{doc.id}_chunk_{i}", content=f"chunk_{i}", meta_data={"chunk": i})
+        for i in range(2)
+    ]
+
+    documents = reader.read(json_bytes)
+
+    assert len(documents) == 2
+    assert all(doc.name.startswith("test_chunk_") for doc in documents)
+    assert all(doc.id.startswith("test_1_chunk_") for doc in documents)
+    assert all("chunk" in doc.meta_data for doc in documents)
+
+
+def test_file_not_found():
+    reader = JSONReader()
+    with pytest.raises(FileNotFoundError):
+        reader.read(Path("nonexistent.json"))
+
+
+def test_invalid_json():
+    # Test handling of invalid JSON data
+    invalid_json = BytesIO(b"{invalid_json")
+    invalid_json.name = "invalid.json"
+
+    reader = JSONReader()
+    with pytest.raises(json.JSONDecodeError):
+        reader.read(invalid_json)
+
+
+def test_unsupported_file_type():
+    reader = JSONReader()
+    with pytest.raises(ValueError, match="Unsupported file type"):
+        reader.read("not_a_path_or_bytesio")
+
+
+def test_empty_json_file(tmp_path):
+    # Test handling of empty JSON file
+    json_path = tmp_path / "empty.json"
+    json_path.write_text("")
+
+    reader = JSONReader()
+    with pytest.raises(json.JSONDecodeError):
+        reader.read(json_path)
+
+
+def test_empty_json_array(tmp_path):
+    # Test handling of empty JSON array
+    json_path = tmp_path / "empty_array.json"
+    json_path.write_text("[]")
+
+    reader = JSONReader()
+    documents = reader.read(json_path)
+    assert len(documents) == 0
+
+
+def test_unicode_content(tmp_path):
+    # Test handling of Unicode content
+    test_data = {"key": "值"}
+    json_path = tmp_path / "unicode.json"
+    json_path.write_text(json.dumps(test_data))
+
+    reader = JSONReader()
+    documents = reader.read(json_path)
+
+    assert len(documents) == 1
+    assert json.loads(documents[0].content) == test_data
+
+
+def test_nested_json():
+    # Test handling of deeply nested JSON
+    test_data = {"level1": {"level2": {"level3": "value"}}}
+    json_bytes = BytesIO(json.dumps(test_data).encode())
+    json_bytes.name = "nested.json"
+
+    reader = JSONReader()
+    documents = reader.read(json_bytes)
+
+    assert len(documents) == 1
+    assert json.loads(documents[0].content) == test_data
+
+
+def test_large_json():
+    # Test handling of large JSON files
+    test_data = [{"key": f"value_{i}"} for i in range(1000)]
+    json_bytes = BytesIO(json.dumps(test_data).encode())
+    json_bytes.name = "large.json"
+
+    reader = JSONReader()
+    documents = reader.read(json_bytes)
+
+    assert len(documents) == 1000
+    assert all(doc.name == "large" for doc in documents)
+    assert all(doc.id.startswith("large_") for doc in documents)