Skip to content

Commit

Permalink
Python: improved content inits, added ndarray support for binary cont…
Browse files Browse the repository at this point in the history
…ent and small fixes to defaults (#10469)

### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->
This PR adds support for ndarray's as the content carrier for all binary
content types (binary, image, audio) as that is more optimized for
larger content.

It also does some fixes to the initialization of those content types and
the underlying data uri type.

Also some fixes for unspecified default param in pydantic Field,
language servers do not recognize `Field("default value")` as having a
default, so changed those occurances to `Field(default="default value")`

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone 😄
  • Loading branch information
eavanvalkenburg authored Feb 12, 2025
1 parent dc6ca1a commit cbfd7e9
Show file tree
Hide file tree
Showing 24 changed files with 346 additions and 153 deletions.
14 changes: 7 additions & 7 deletions python/samples/learn_resources/plugins/GithubPlugin/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@
class Repo(BaseModel):
id: int = Field(..., alias="id")
name: str = Field(..., alias="full_name")
description: str | None = Field(None, alias="description")
description: str | None = Field(default=None, alias="description")
url: str = Field(..., alias="html_url")


class User(BaseModel):
id: int = Field(..., alias="id")
login: str = Field(..., alias="login")
name: str | None = Field(None, alias="name")
company: str | None = Field(None, alias="company")
name: str | None = Field(default=None, alias="name")
company: str | None = Field(default=None, alias="company")
url: str = Field(..., alias="html_url")


class Label(BaseModel):
id: int = Field(..., alias="id")
name: str = Field(..., alias="name")
description: str | None = Field(None, alias="description")
description: str | None = Field(default=None, alias="description")


class Issue(BaseModel):
Expand All @@ -37,12 +37,12 @@ class Issue(BaseModel):
title: str = Field(..., alias="title")
state: str = Field(..., alias="state")
labels: list[Label] = Field(..., alias="labels")
when_created: str | None = Field(None, alias="created_at")
when_closed: str | None = Field(None, alias="closed_at")
when_created: str | None = Field(default=None, alias="created_at")
when_closed: str | None = Field(default=None, alias="closed_at")


class IssueDetail(Issue):
body: str | None = Field(None, alias="body")
body: str | None = Field(default=None, alias="body")


# endregion
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ class BedrockAgentModel(KernelBaseModel):
# This model_config will merge with the KernelBaseModel.model_config
model_config = ConfigDict(extra="allow")

agent_id: str | None = Field(None, alias="agentId", description="The unique identifier of the agent.")
agent_name: str | None = Field(None, alias="agentName", description="The name of the agent.")
agent_version: str | None = Field(None, alias="agentVersion", description="The version of the agent.")
foundation_model: str | None = Field(None, alias="foundationModel", description="The foundation model.")
agent_status: str | None = Field(None, alias="agentStatus", description="The status of the agent.")
agent_id: str | None = Field(default=None, alias="agentId", description="The unique identifier of the agent.")
agent_name: str | None = Field(default=None, alias="agentName", description="The name of the agent.")
agent_version: str | None = Field(default=None, alias="agentVersion", description="The version of the agent.")
foundation_model: str | None = Field(default=None, alias="foundationModel", description="The foundation model.")
agent_status: str | None = Field(default=None, alias="agentStatus", description="The status of the agent.")
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
class OpenAIAudioToTextExecutionSettings(PromptExecutionSettings):
"""Request settings for OpenAI audio to text services."""

ai_model_id: str | None = Field(None, serialization_alias="model")
ai_model_id: str | None = Field(default=None, serialization_alias="model")
filename: str | None = Field(
None, description="Do not set this manually. It is set by the service based on the audio content."
default=None,
description="Do not set this manually. It is set by the service based on the audio content.",
)
language: str | None = None
prompt: str | None = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class OpenAITextToImageExecutionSettings(PromptExecutionSettings):
"""Request settings for OpenAI text to image services."""

prompt: str | None = None
ai_model_id: str | None = Field(None, serialization_alias="model")
ai_model_id: str | None = Field(default=None, serialization_alias="model")
size: ImageSize | None = None
quality: str | None = None
style: str | None = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class AzureCosmosDBSettings(KernelBaseSettings):
env_prefix: ClassVar[str] = "COSMOSDB_"

api: str | None = None
connection_string: SecretStr | None = Field(None, alias="AZCOSMOS_CONNSTR")
connection_string: SecretStr | None = Field(default=None, alias="AZCOSMOS_CONNSTR")

model_config = ConfigDict(
populate_by_name=True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ class PostgresSettings(KernelBaseSettings):
env_prefix: ClassVar[str] = "POSTGRES_"

connection_string: SecretStr | None = None
host: str | None = Field(None, alias=PGHOST_ENV_VAR)
port: int | None = Field(5432, alias=PGPORT_ENV_VAR)
dbname: str | None = Field(None, alias=PGDATABASE_ENV_VAR)
user: str | None = Field(None, alias=PGUSER_ENV_VAR)
password: SecretStr | None = Field(None, alias=PGPASSWORD_ENV_VAR)
sslmode: str | None = Field(None, alias=PGSSL_MODE_ENV_VAR)
host: str | None = Field(default=None, alias=PGHOST_ENV_VAR)
port: int | None = Field(default=5432, alias=PGPORT_ENV_VAR)
dbname: str | None = Field(default=None, alias=PGDATABASE_ENV_VAR)
user: str | None = Field(default=None, alias=PGUSER_ENV_VAR)
password: SecretStr | None = Field(default=None, alias=PGPASSWORD_ENV_VAR)
sslmode: str | None = Field(default=None, alias=PGSSL_MODE_ENV_VAR)

min_pool: int = 1
max_pool: int = 5
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@ class BingWebPages(KernelBaseModel):
"""The web pages from a Bing search."""

id: str | None = None
some_results_removed: bool | None = Field(None, alias="someResultsRemoved")
total_estimated_matches: int | None = Field(None, alias="totalEstimatedMatches")
web_search_url: str | None = Field(None, alias="webSearchUrl")
some_results_removed: bool | None = Field(default=None, alias="someResultsRemoved")
total_estimated_matches: int | None = Field(default=None, alias="totalEstimatedMatches")
web_search_url: str | None = Field(default=None, alias="webSearchUrl")
value: list[BingWebPage] = Field(default_factory=list)


@experimental_class
class BingSearchResponse(KernelBaseModel):
"""The response from a Bing search."""

type_: str = Field("", alias="_type")
type_: str = Field(default="", alias="_type")
query_context: dict[str, Any] = Field(default_factory=dict, validation_alias="queryContext")
web_pages: BingWebPages | None = Field(None, alias="webPages")
web_pages: BingWebPages | None = Field(default=None, alias="webPages")
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@ class GoogleSearchResult(KernelBaseModel):

kind: str = ""
title: str = ""
html_title: str = Field("", alias="htmlTitle")
html_title: str = Field(default="", alias="htmlTitle")
link: str = ""
display_link: str = Field("", alias="displayLink")
display_link: str = Field(default="", alias="displayLink")
snippet: str = ""
html_snippet: str = Field("", alias="htmlSnippet")
cache_id: str = Field("", alias="cacheId")
formatted_url: str = Field("", alias="formattedUrl")
html_formatted_url: str = Field("", alias="htmlFormattedUrl")
html_snippet: str = Field(default="", alias="htmlSnippet")
cache_id: str = Field(default="", alias="cacheId")
formatted_url: str = Field(default="", alias="formattedUrl")
html_formatted_url: str = Field(default="", alias="htmlFormattedUrl")
pagemap: dict[str, Any] = Field(default_factory=dict)
mime: str = ""
file_format: str = Field("", alias="fileFormat")
file_format: str = Field(default="", alias="fileFormat")
image: dict[str, Any] = Field(default_factory=dict)
labels: list[dict[str, Any]] = Field(default_factory=list)
37 changes: 36 additions & 1 deletion python/semantic_kernel/contents/audio_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import mimetypes
from typing import Any, ClassVar, Literal, TypeVar

from numpy import ndarray
from pydantic import Field

from semantic_kernel.contents.binary_content import BinaryContent
Expand Down Expand Up @@ -38,9 +39,43 @@ class AudioContent(BinaryContent):
metadata (dict[str, Any]): Any metadata that should be attached to the response.
"""

content_type: Literal[ContentTypes.AUDIO_CONTENT] = Field(AUDIO_CONTENT_TAG, init=False) # type: ignore
content_type: Literal[ContentTypes.AUDIO_CONTENT] = Field(default=AUDIO_CONTENT_TAG, init=False) # type: ignore
tag: ClassVar[str] = AUDIO_CONTENT_TAG

def __init__(
self,
uri: str | None = None,
data_uri: str | None = None,
data: str | bytes | ndarray | None = None,
data_format: str | None = None,
mime_type: str | None = None,
**kwargs: Any,
):
"""Create an Audio Content object, either from a data_uri or data.
Args:
uri: The reference uri of the content.
data_uri: The data uri of the content.
data: The data of the content.
data_format: The format of the data (e.g. base64).
mime_type: The mime type of the audio, only used with data.
kwargs: Any additional arguments:
inner_content: The inner content of the response,
this should hold all the information from the response so even
when not creating a subclass a developer
can leverage the full thing.
ai_model_id: The id of the AI model that generated this response.
metadata: Any metadata that should be attached to the response.
"""
super().__init__(
uri=uri,
data_uri=data_uri,
data=data,
data_format=data_format,
mime_type=mime_type,
**kwargs,
)

@classmethod
def from_audio_file(cls: type[_T], path: str) -> "AudioContent":
"""Create an instance from an audio file."""
Expand Down
92 changes: 56 additions & 36 deletions python/semantic_kernel/contents/binary_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@

import logging
import os
from pathlib import Path
from typing import Annotated, Any, ClassVar, Literal, TypeVar
from xml.etree.ElementTree import Element # nosec

from pydantic import Field, FilePath, UrlConstraints, computed_field
from numpy import ndarray
from pydantic import Field, FilePath, PrivateAttr, UrlConstraints, computed_field
from pydantic_core import Url

from semantic_kernel.contents.const import BINARY_CONTENT_TAG, ContentTypes
from semantic_kernel.contents.kernel_content import KernelContent
from semantic_kernel.contents.utils.data_uri import DataUri
from semantic_kernel.exceptions.content_exceptions import ContentInitializationError
from semantic_kernel.exceptions.content_exceptions import ContentException, ContentInitializationError
from semantic_kernel.utils.experimental_decorator import experimental_class

logger = logging.getLogger(__name__)
Expand All @@ -38,56 +40,63 @@ class BinaryContent(KernelContent):
"""

content_type: Literal[ContentTypes.BINARY_CONTENT] = Field(BINARY_CONTENT_TAG, init=False) # type: ignore
content_type: Literal[ContentTypes.BINARY_CONTENT] = Field(default=BINARY_CONTENT_TAG, init=False) # type: ignore
uri: Url | str | None = None

default_mime_type: ClassVar[str] = "text/plain"
tag: ClassVar[str] = BINARY_CONTENT_TAG
_data_uri: DataUri | None = None
_data_uri: DataUri | None = PrivateAttr(default=None)

def __init__(
self,
uri: Url | str | None = None,
data_uri: DataUrl | str | None = None,
data: str | bytes | None = None,
data: str | bytes | ndarray | None = None,
data_format: str | None = None,
mime_type: str | None = None,
**kwargs: Any,
):
"""Create a Binary Content object, either from a data_uri or data.
Args:
uri (Url | str | None): The reference uri of the content.
data_uri (DataUrl | None): The data uri of the content.
data (str | bytes | None): The data of the content.
data_format (str | None): The format of the data (e.g. base64).
mime_type (str | None): The mime type of the image, only used with data.
kwargs (Any): Any additional arguments:
inner_content (Any): The inner content of the response,
uri: The reference uri of the content.
data_uri: The data uri of the content.
data: The data of the content.
data_format: The format of the data (e.g. base64).
mime_type: The mime type of the content, not always relevant.
kwargs: Any additional arguments:
inner_content: The inner content of the response,
this should hold all the information from the response so even
when not creating a subclass a developer can leverage the full thing.
ai_model_id (str | None): The id of the AI model that generated this response.
metadata (dict[str, Any]): Any metadata that should be attached to the response.
ai_model_id: The id of the AI model that generated this response.
metadata: Any metadata that should be attached to the response.
"""
temp_data_uri = None
temp_data_uri: DataUri | None = None
if data_uri:
temp_data_uri = DataUri.from_data_uri(data_uri, self.default_mime_type)
if "metadata" in kwargs:
kwargs["metadata"].update(temp_data_uri.parameters)
else:
kwargs["metadata"] = temp_data_uri.parameters
elif data:
if isinstance(data, str):
temp_data_uri = DataUri(
data_str=data, data_format=data_format, mime_type=mime_type or self.default_mime_type
)
else:
temp_data_uri = DataUri(
data_bytes=data, data_format=data_format, mime_type=mime_type or self.default_mime_type
)
kwargs.setdefault("metadata", {})
kwargs["metadata"].update(temp_data_uri.parameters)
elif data is not None:
match data:
case bytes():
temp_data_uri = DataUri(
data_bytes=data, data_format=data_format, mime_type=mime_type or self.default_mime_type
)
case ndarray():
temp_data_uri = DataUri(
data_array=data, data_format=data_format, mime_type=mime_type or self.default_mime_type
)
case str():
temp_data_uri = DataUri(
data_str=data, data_format=data_format, mime_type=mime_type or self.default_mime_type
)

if uri is not None:
if isinstance(uri, str) and os.path.exists(uri):
uri = str(FilePath(uri))
if os.path.isfile(uri):
uri = str(Path(uri))
else:
raise ContentInitializationError("URI must be a file path, not a directory.")
elif isinstance(uri, str):
uri = Url(uri)

Expand All @@ -105,28 +114,36 @@ def data_uri(self) -> str:
@data_uri.setter
def data_uri(self, value: str):
"""Set the data uri."""
self._data_uri = DataUri.from_data_uri(value)
if not self._data_uri:
self._data_uri = DataUri.from_data_uri(value, self.default_mime_type)
else:
self._data_uri.update_data(value)
self.metadata.update(self._data_uri.parameters)

@property
def data(self) -> bytes:
"""Get the data."""
if self._data_uri and self._data_uri.data_array:
return self._data_uri.data_array.tobytes()
if self._data_uri and self._data_uri.data_bytes:
return self._data_uri.data_bytes
if self._data_uri and self._data_uri.data_str:
return self._data_uri.data_str.encode("utf-8")
return b""

@data.setter
def data(self, value: str | bytes):
def data(self, value: str | bytes | ndarray):
"""Set the data."""
if self._data_uri:
self._data_uri.update_data(value)
else:
if isinstance(value, str):
return
match value:
case ndarray():
self._data_uri = DataUri(data_array=value, mime_type=self.mime_type)
case str():
self._data_uri = DataUri(data_str=value, mime_type=self.mime_type)
else:
case bytes():
self._data_uri = DataUri(data_bytes=value, mime_type=self.mime_type)
case _:
raise ContentException("Data must be a string, bytes, or numpy array.")

@property
def mime_type(self) -> str:
Expand Down Expand Up @@ -167,6 +184,9 @@ def from_element(cls: type[_T], element: Element) -> _T:

def write_to_file(self, path: str | FilePath) -> None:
"""Write the data to a file."""
if self._data_uri and self._data_uri.data_array is not None:
self._data_uri.data_array.tofile(path)
return
with open(path, "wb") as file:
file.write(self.data)

Expand Down
2 changes: 1 addition & 1 deletion python/semantic_kernel/contents/chat_message_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class ChatMessageContent(KernelContent):
__str__: Returns the content of the response.
"""

content_type: Literal[ContentTypes.CHAT_MESSAGE_CONTENT] = Field(CHAT_MESSAGE_CONTENT_TAG, init=False) # type: ignore
content_type: Literal[ContentTypes.CHAT_MESSAGE_CONTENT] = Field(default=CHAT_MESSAGE_CONTENT_TAG, init=False) # type: ignore
tag: ClassVar[str] = CHAT_MESSAGE_CONTENT_TAG
role: AuthorRole
name: str | None = None
Expand Down
Loading

0 comments on commit cbfd7e9

Please sign in to comment.