Python: improved content inits, added ndarray support for binary cont…

…ent and small fixes to defaults (#10469) ### Motivation and Context  This PR adds support for ndarray's as the content carrier for all binary content types (binary, image, audio) as that is more optimized for larger content. It also does some fixes to the initialization of those content types and the underlying data uri type. Also some fixes for unspecified default param in pydantic Field, language servers do not recognize `Field("default value")` as having a default, so changed those occurances to `Field(default="default value")` ### Description  ### Contribution Checklist  - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄
microsoft · Feb 12, 2025 · cbfd7e9 · cbfd7e9
1 parent dc6ca1a
commit cbfd7e9
Show file tree

Hide file tree

Showing 24 changed files with 346 additions and 153 deletions.
diff --git a/python/samples/learn_resources/plugins/GithubPlugin/github.py b/python/samples/learn_resources/plugins/GithubPlugin/github.py
@@ -12,22 +12,22 @@
 class Repo(BaseModel):
     id: int = Field(..., alias="id")
     name: str = Field(..., alias="full_name")
-    description: str | None = Field(None, alias="description")
+    description: str | None = Field(default=None, alias="description")
     url: str = Field(..., alias="html_url")
 
 
 class User(BaseModel):
     id: int = Field(..., alias="id")
     login: str = Field(..., alias="login")
-    name: str | None = Field(None, alias="name")
-    company: str | None = Field(None, alias="company")
+    name: str | None = Field(default=None, alias="name")
+    company: str | None = Field(default=None, alias="company")
     url: str = Field(..., alias="html_url")
 
 
 class Label(BaseModel):
     id: int = Field(..., alias="id")
     name: str = Field(..., alias="name")
-    description: str | None = Field(None, alias="description")
+    description: str | None = Field(default=None, alias="description")
 
 
 class Issue(BaseModel):
@@ -37,12 +37,12 @@ class Issue(BaseModel):
     title: str = Field(..., alias="title")
     state: str = Field(..., alias="state")
     labels: list[Label] = Field(..., alias="labels")
-    when_created: str | None = Field(None, alias="created_at")
-    when_closed: str | None = Field(None, alias="closed_at")
+    when_created: str | None = Field(default=None, alias="created_at")
+    when_closed: str | None = Field(default=None, alias="closed_at")
 
 
 class IssueDetail(Issue):
-    body: str | None = Field(None, alias="body")
+    body: str | None = Field(default=None, alias="body")
 
 
 # endregion

diff --git a/python/semantic_kernel/agents/bedrock/models/bedrock_agent_model.py b/python/semantic_kernel/agents/bedrock/models/bedrock_agent_model.py
@@ -17,8 +17,8 @@ class BedrockAgentModel(KernelBaseModel):
     # This model_config will merge with the KernelBaseModel.model_config
     model_config = ConfigDict(extra="allow")
 
-    agent_id: str | None = Field(None, alias="agentId", description="The unique identifier of the agent.")
-    agent_name: str | None = Field(None, alias="agentName", description="The name of the agent.")
-    agent_version: str | None = Field(None, alias="agentVersion", description="The version of the agent.")
-    foundation_model: str | None = Field(None, alias="foundationModel", description="The foundation model.")
-    agent_status: str | None = Field(None, alias="agentStatus", description="The status of the agent.")
+    agent_id: str | None = Field(default=None, alias="agentId", description="The unique identifier of the agent.")
+    agent_name: str | None = Field(default=None, alias="agentName", description="The name of the agent.")
+    agent_version: str | None = Field(default=None, alias="agentVersion", description="The version of the agent.")
+    foundation_model: str | None = Field(default=None, alias="foundationModel", description="The foundation model.")
+    agent_status: str | None = Field(default=None, alias="agentStatus", description="The status of the agent.")
diff --git a/...nnectors/ai/open_ai/prompt_execution_settings/open_ai_audio_to_text_execution_settings.py b/...nnectors/ai/open_ai/prompt_execution_settings/open_ai_audio_to_text_execution_settings.py
@@ -13,9 +13,10 @@
 class OpenAIAudioToTextExecutionSettings(PromptExecutionSettings):
     """Request settings for OpenAI audio to text services."""
 
-    ai_model_id: str | None = Field(None, serialization_alias="model")
+    ai_model_id: str | None = Field(default=None, serialization_alias="model")
     filename: str | None = Field(
-        None, description="Do not set this manually. It is set by the service based on the audio content."
+        default=None,
+        description="Do not set this manually. It is set by the service based on the audio content.",
     )
     language: str | None = None
     prompt: str | None = None

diff --git a/...nnectors/ai/open_ai/prompt_execution_settings/open_ai_text_to_image_execution_settings.py b/...nnectors/ai/open_ai/prompt_execution_settings/open_ai_text_to_image_execution_settings.py
@@ -36,7 +36,7 @@ class OpenAITextToImageExecutionSettings(PromptExecutionSettings):
     """Request settings for OpenAI text to image services."""
 
     prompt: str | None = None
-    ai_model_id: str | None = Field(None, serialization_alias="model")
+    ai_model_id: str | None = Field(default=None, serialization_alias="model")
     size: ImageSize | None = None
     quality: str | None = None
     style: str | None = None

diff --git a/python/semantic_kernel/connectors/memory/azure_cosmosdb/azure_cosmosdb_settings.py b/python/semantic_kernel/connectors/memory/azure_cosmosdb/azure_cosmosdb_settings.py
@@ -21,7 +21,7 @@ class AzureCosmosDBSettings(KernelBaseSettings):
     env_prefix: ClassVar[str] = "COSMOSDB_"
 
     api: str | None = None
-    connection_string: SecretStr | None = Field(None, alias="AZCOSMOS_CONNSTR")
+    connection_string: SecretStr | None = Field(default=None, alias="AZCOSMOS_CONNSTR")
 
     model_config = ConfigDict(
         populate_by_name=True,

diff --git a/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py b/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py
@@ -59,12 +59,12 @@ class PostgresSettings(KernelBaseSettings):
     env_prefix: ClassVar[str] = "POSTGRES_"
 
     connection_string: SecretStr | None = None
-    host: str | None = Field(None, alias=PGHOST_ENV_VAR)
-    port: int | None = Field(5432, alias=PGPORT_ENV_VAR)
-    dbname: str | None = Field(None, alias=PGDATABASE_ENV_VAR)
-    user: str | None = Field(None, alias=PGUSER_ENV_VAR)
-    password: SecretStr | None = Field(None, alias=PGPASSWORD_ENV_VAR)
-    sslmode: str | None = Field(None, alias=PGSSL_MODE_ENV_VAR)
+    host: str | None = Field(default=None, alias=PGHOST_ENV_VAR)
+    port: int | None = Field(default=5432, alias=PGPORT_ENV_VAR)
+    dbname: str | None = Field(default=None, alias=PGDATABASE_ENV_VAR)
+    user: str | None = Field(default=None, alias=PGUSER_ENV_VAR)
+    password: SecretStr | None = Field(default=None, alias=PGPASSWORD_ENV_VAR)
+    sslmode: str | None = Field(default=None, alias=PGSSL_MODE_ENV_VAR)
 
     min_pool: int = 1
     max_pool: int = 5

diff --git a/python/semantic_kernel/connectors/search/bing/bing_search_response.py b/python/semantic_kernel/connectors/search/bing/bing_search_response.py
@@ -14,16 +14,16 @@ class BingWebPages(KernelBaseModel):
     """The web pages from a Bing search."""
 
     id: str | None = None
-    some_results_removed: bool | None = Field(None, alias="someResultsRemoved")
-    total_estimated_matches: int | None = Field(None, alias="totalEstimatedMatches")
-    web_search_url: str | None = Field(None, alias="webSearchUrl")
+    some_results_removed: bool | None = Field(default=None, alias="someResultsRemoved")
+    total_estimated_matches: int | None = Field(default=None, alias="totalEstimatedMatches")
+    web_search_url: str | None = Field(default=None, alias="webSearchUrl")
     value: list[BingWebPage] = Field(default_factory=list)
 
 
 @experimental_class
 class BingSearchResponse(KernelBaseModel):
     """The response from a Bing search."""
 
-    type_: str = Field("", alias="_type")
+    type_: str = Field(default="", alias="_type")
     query_context: dict[str, Any] = Field(default_factory=dict, validation_alias="queryContext")
-    web_pages: BingWebPages | None = Field(None, alias="webPages")
+    web_pages: BingWebPages | None = Field(default=None, alias="webPages")
diff --git a/python/semantic_kernel/connectors/search/google/google_search_result.py b/python/semantic_kernel/connectors/search/google/google_search_result.py
@@ -14,16 +14,16 @@ class GoogleSearchResult(KernelBaseModel):
 
     kind: str = ""
     title: str = ""
-    html_title: str = Field("", alias="htmlTitle")
+    html_title: str = Field(default="", alias="htmlTitle")
     link: str = ""
-    display_link: str = Field("", alias="displayLink")
+    display_link: str = Field(default="", alias="displayLink")
     snippet: str = ""
-    html_snippet: str = Field("", alias="htmlSnippet")
-    cache_id: str = Field("", alias="cacheId")
-    formatted_url: str = Field("", alias="formattedUrl")
-    html_formatted_url: str = Field("", alias="htmlFormattedUrl")
+    html_snippet: str = Field(default="", alias="htmlSnippet")
+    cache_id: str = Field(default="", alias="cacheId")
+    formatted_url: str = Field(default="", alias="formattedUrl")
+    html_formatted_url: str = Field(default="", alias="htmlFormattedUrl")
     pagemap: dict[str, Any] = Field(default_factory=dict)
     mime: str = ""
-    file_format: str = Field("", alias="fileFormat")
+    file_format: str = Field(default="", alias="fileFormat")
     image: dict[str, Any] = Field(default_factory=dict)
     labels: list[dict[str, Any]] = Field(default_factory=list)
diff --git a/python/semantic_kernel/contents/audio_content.py b/python/semantic_kernel/contents/audio_content.py
@@ -3,6 +3,7 @@
 import mimetypes
 from typing import Any, ClassVar, Literal, TypeVar
 
+from numpy import ndarray
 from pydantic import Field
 
 from semantic_kernel.contents.binary_content import BinaryContent
@@ -38,9 +39,43 @@ class AudioContent(BinaryContent):
             metadata (dict[str, Any]): Any metadata that should be attached to the response.
     """
 
-    content_type: Literal[ContentTypes.AUDIO_CONTENT] = Field(AUDIO_CONTENT_TAG, init=False)  # type: ignore
+    content_type: Literal[ContentTypes.AUDIO_CONTENT] = Field(default=AUDIO_CONTENT_TAG, init=False)  # type: ignore
     tag: ClassVar[str] = AUDIO_CONTENT_TAG
 
+    def __init__(
+        self,
+        uri: str | None = None,
+        data_uri: str | None = None,
+        data: str | bytes | ndarray | None = None,
+        data_format: str | None = None,
+        mime_type: str | None = None,
+        **kwargs: Any,
+    ):
+        """Create an Audio Content object, either from a data_uri or data.
+
+        Args:
+            uri: The reference uri of the content.
+            data_uri: The data uri of the content.
+            data: The data of the content.
+            data_format: The format of the data (e.g. base64).
+            mime_type: The mime type of the audio, only used with data.
+            kwargs: Any additional arguments:
+                inner_content: The inner content of the response,
+                    this should hold all the information from the response so even
+                    when not creating a subclass a developer
+                    can leverage the full thing.
+                ai_model_id: The id of the AI model that generated this response.
+                metadata: Any metadata that should be attached to the response.
+        """
+        super().__init__(
+            uri=uri,
+            data_uri=data_uri,
+            data=data,
+            data_format=data_format,
+            mime_type=mime_type,
+            **kwargs,
+        )
+
     @classmethod
     def from_audio_file(cls: type[_T], path: str) -> "AudioContent":
         """Create an instance from an audio file."""

diff --git a/python/semantic_kernel/contents/binary_content.py b/python/semantic_kernel/contents/binary_content.py
@@ -2,16 +2,18 @@
 
 import logging
 import os
+from pathlib import Path
 from typing import Annotated, Any, ClassVar, Literal, TypeVar
 from xml.etree.ElementTree import Element  # nosec
 
-from pydantic import Field, FilePath, UrlConstraints, computed_field
+from numpy import ndarray
+from pydantic import Field, FilePath, PrivateAttr, UrlConstraints, computed_field
 from pydantic_core import Url
 
 from semantic_kernel.contents.const import BINARY_CONTENT_TAG, ContentTypes
 from semantic_kernel.contents.kernel_content import KernelContent
 from semantic_kernel.contents.utils.data_uri import DataUri
-from semantic_kernel.exceptions.content_exceptions import ContentInitializationError
+from semantic_kernel.exceptions.content_exceptions import ContentException, ContentInitializationError
 from semantic_kernel.utils.experimental_decorator import experimental_class
 
 logger = logging.getLogger(__name__)
@@ -38,56 +40,63 @@ class BinaryContent(KernelContent):
 
     """
 
-    content_type: Literal[ContentTypes.BINARY_CONTENT] = Field(BINARY_CONTENT_TAG, init=False)  # type: ignore
+    content_type: Literal[ContentTypes.BINARY_CONTENT] = Field(default=BINARY_CONTENT_TAG, init=False)  # type: ignore
     uri: Url | str | None = None
+
     default_mime_type: ClassVar[str] = "text/plain"
     tag: ClassVar[str] = BINARY_CONTENT_TAG
-    _data_uri: DataUri | None = None
+    _data_uri: DataUri | None = PrivateAttr(default=None)
 
     def __init__(
         self,
         uri: Url | str | None = None,
         data_uri: DataUrl | str | None = None,
-        data: str | bytes | None = None,
+        data: str | bytes | ndarray | None = None,
         data_format: str | None = None,
         mime_type: str | None = None,
         **kwargs: Any,
     ):
         """Create a Binary Content object, either from a data_uri or data.
 
         Args:
-            uri (Url | str | None): The reference uri of the content.
-            data_uri (DataUrl | None): The data uri of the content.
-            data (str | bytes | None): The data of the content.
-            data_format (str | None): The format of the data (e.g. base64).
-            mime_type (str | None): The mime type of the image, only used with data.
-            kwargs (Any): Any additional arguments:
-                inner_content (Any): The inner content of the response,
+            uri: The reference uri of the content.
+            data_uri: The data uri of the content.
+            data: The data of the content.
+            data_format: The format of the data (e.g. base64).
+            mime_type: The mime type of the content, not always relevant.
+            kwargs: Any additional arguments:
+                inner_content: The inner content of the response,
                     this should hold all the information from the response so even
                     when not creating a subclass a developer can leverage the full thing.
-                ai_model_id (str | None): The id of the AI model that generated this response.
-                metadata (dict[str, Any]): Any metadata that should be attached to the response.
+                ai_model_id: The id of the AI model that generated this response.
+                metadata: Any metadata that should be attached to the response.
         """
-        temp_data_uri = None
+        temp_data_uri: DataUri | None = None
         if data_uri:
             temp_data_uri = DataUri.from_data_uri(data_uri, self.default_mime_type)
-            if "metadata" in kwargs:
-                kwargs["metadata"].update(temp_data_uri.parameters)
-            else:
-                kwargs["metadata"] = temp_data_uri.parameters
-        elif data:
-            if isinstance(data, str):
-                temp_data_uri = DataUri(
-                    data_str=data, data_format=data_format, mime_type=mime_type or self.default_mime_type
-                )
-            else:
-                temp_data_uri = DataUri(
-                    data_bytes=data, data_format=data_format, mime_type=mime_type or self.default_mime_type
-                )
+            kwargs.setdefault("metadata", {})
+            kwargs["metadata"].update(temp_data_uri.parameters)
+        elif data is not None:
+            match data:
+                case bytes():
+                    temp_data_uri = DataUri(
+                        data_bytes=data, data_format=data_format, mime_type=mime_type or self.default_mime_type
+                    )
+                case ndarray():
+                    temp_data_uri = DataUri(
+                        data_array=data, data_format=data_format, mime_type=mime_type or self.default_mime_type
+                    )
+                case str():
+                    temp_data_uri = DataUri(
+                        data_str=data, data_format=data_format, mime_type=mime_type or self.default_mime_type
+                    )
 
         if uri is not None:
             if isinstance(uri, str) and os.path.exists(uri):
-                uri = str(FilePath(uri))
+                if os.path.isfile(uri):
+                    uri = str(Path(uri))
+                else:
+                    raise ContentInitializationError("URI must be a file path, not a directory.")
             elif isinstance(uri, str):
                 uri = Url(uri)
 
@@ -105,28 +114,36 @@ def data_uri(self) -> str:
     @data_uri.setter
     def data_uri(self, value: str):
         """Set the data uri."""
-        self._data_uri = DataUri.from_data_uri(value)
+        if not self._data_uri:
+            self._data_uri = DataUri.from_data_uri(value, self.default_mime_type)
+        else:
+            self._data_uri.update_data(value)
         self.metadata.update(self._data_uri.parameters)
 
     @property
     def data(self) -> bytes:
         """Get the data."""
+        if self._data_uri and self._data_uri.data_array:
+            return self._data_uri.data_array.tobytes()
         if self._data_uri and self._data_uri.data_bytes:
             return self._data_uri.data_bytes
-        if self._data_uri and self._data_uri.data_str:
-            return self._data_uri.data_str.encode("utf-8")
         return b""
 
     @data.setter
-    def data(self, value: str | bytes):
+    def data(self, value: str | bytes | ndarray):
         """Set the data."""
         if self._data_uri:
             self._data_uri.update_data(value)
-        else:
-            if isinstance(value, str):
+            return
+        match value:
+            case ndarray():
+                self._data_uri = DataUri(data_array=value, mime_type=self.mime_type)
+            case str():
                 self._data_uri = DataUri(data_str=value, mime_type=self.mime_type)
-            else:
+            case bytes():
                 self._data_uri = DataUri(data_bytes=value, mime_type=self.mime_type)
+            case _:
+                raise ContentException("Data must be a string, bytes, or numpy array.")
 
     @property
     def mime_type(self) -> str:
@@ -167,6 +184,9 @@ def from_element(cls: type[_T], element: Element) -> _T:
 
     def write_to_file(self, path: str | FilePath) -> None:
         """Write the data to a file."""
+        if self._data_uri and self._data_uri.data_array is not None:
+            self._data_uri.data_array.tofile(path)
+            return
         with open(path, "wb") as file:
             file.write(self.data)
 

diff --git a/python/semantic_kernel/contents/chat_message_content.py b/python/semantic_kernel/contents/chat_message_content.py
@@ -83,7 +83,7 @@ class ChatMessageContent(KernelContent):
         __str__: Returns the content of the response.
     """
 
-    content_type: Literal[ContentTypes.CHAT_MESSAGE_CONTENT] = Field(CHAT_MESSAGE_CONTENT_TAG, init=False)  # type: ignore
+    content_type: Literal[ContentTypes.CHAT_MESSAGE_CONTENT] = Field(default=CHAT_MESSAGE_CONTENT_TAG, init=False)  # type: ignore
     tag: ClassVar[str] = CHAT_MESSAGE_CONTENT_TAG
     role: AuthorRole
     name: str | None = None