diff --git a/src/strands/types/media.py b/src/strands/types/media.py index 29b89e5c..b778337a 100644 --- a/src/strands/types/media.py +++ b/src/strands/types/media.py @@ -13,14 +13,33 @@ """Supported document formats.""" -class DocumentSource(TypedDict): +class S3Location(TypedDict, total=False): + """Contains the S3 location information for a document. + + Attributes: + bucket: The S3 bucket name. + key: The S3 object key. + + Note: + Both bucket and key are required for a valid S3 location, + but they are marked as optional in the type definition to allow + for runtime validation in the code. + """ + + bucket: str + key: str + + +class DocumentSource(TypedDict, total=False): """Contains the content of a document. Attributes: bytes: The binary content of the document. + s3Location: The S3 location of the document (for Bedrock Nova models). """ bytes: bytes + s3Location: S3Location class DocumentContent(TypedDict): @@ -29,7 +48,7 @@ class DocumentContent(TypedDict): Attributes: format: The format of the document (e.g., "pdf", "txt"). name: The name of the document. - source: The source containing the document's binary content. + source: The source containing the document's binary content or S3 location. """ format: Literal["pdf", "csv", "doc", "docx", "xls", "xlsx", "html", "txt", "md"] diff --git a/tests/strands/models/test_bedrock.py b/tests/strands/models/test_bedrock.py index 1d045f3b..04914f8f 100644 --- a/tests/strands/models/test_bedrock.py +++ b/tests/strands/models/test_bedrock.py @@ -364,6 +364,51 @@ def test_format_request_cache(model, messages, model_id, tool_spec, cache_type): assert tru_request == exp_request +def test_message_with_both_document_sources(bedrock_client, model): + """Test that messages with both S3 and bytes document sources are properly formatted.""" + # Setup mock response + bedrock_client.converse_stream.return_value = { + "stream": [ + {"messageStart": {"role": "assistant"}}, + {"contentBlockDelta": {"delta": {"text": "I've analyzed both documents."}}}, + {"contentBlockStop": {}}, + {"messageStop": {"stopReason": "end_turn"}}, + ] + } + + # Create document with both source types (this is allowed by the type but might not be valid for the API) + document = { + "format": "pdf", + "name": "test.pdf", + "source": { + "bytes": b"test document content", + "s3Location": {"bucket": "test-bucket", "key": "documents/test.pdf"}, + }, + } + + # Create message with the mixed document + messages = [{"role": "user", "content": [{"text": "Please analyze this document"}, document]}] + + # Call converse and collect events + list(model.converse(messages)) + + # Verify the request was made with both document sources + args, kwargs = bedrock_client.converse_stream.call_args + request = kwargs + + # Extract the document content from the request + request_messages = request["messages"] + user_message = request_messages[0] + document_content = user_message["content"][1] + + # Verify both sources were included in the request + assert "bytes" in document_content["source"] + assert "s3Location" in document_content["source"] + assert document_content["source"]["bytes"] == b"test document content" + assert document_content["source"]["s3Location"]["bucket"] == "test-bucket" + assert document_content["source"]["s3Location"]["key"] == "documents/test.pdf" + + def test_format_chunk(model): tru_chunk = model.format_chunk("event") exp_chunk = "event"