Skip to content
Merged
8 changes: 8 additions & 0 deletions examples/apis.json
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,13 @@
"func": "get_point_cloud_map",
"packagePath": "viam.services",
"importName": "SLAMClient"
},
"audio_in": {
"func": "get_properties",
"packagePath": "viam.components"
},
"audio_out": {
"func": "get_properties",
"packagePath": "viam.components"
}
}
119 changes: 119 additions & 0 deletions examples/server/v1/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
from PIL import Image

from viam.components.arm import Arm
from viam.components.audio_in import AudioIn, AudioResponse
from viam.components.audio_input import AudioInput
from viam.components.audio_out import AudioOut
from viam.components.base import Base
from viam.components.board import Board, TickStream
from viam.components.camera import Camera
Expand Down Expand Up @@ -52,6 +54,8 @@
from viam.proto.component.audioinput import AudioChunk, AudioChunkInfo, SampleFormat
from viam.proto.component.encoder import PositionType
from viam.utils import SensorReading
from viam.proto.component.audioin import AudioChunk as AudioInChunk
from viam.proto.common import AudioInfo

GEOMETRIES = [
Geometry(center=Pose(x=1, y=2, z=3, o_x=2, o_y=3, o_z=4, theta=20), sphere=Sphere(radius_mm=2)),
Expand Down Expand Up @@ -173,6 +177,121 @@ async def get_geometries(self, extra: Optional[Dict[str, Any]] = None, **kwargs)
return GEOMETRIES


class ExampleAudioIn(AudioIn):
def __init__(self, name: str):
super().__init__(name)
self.sample_rate = 44100
self.num_channels = 2
self.supported_codecs = ["pcm16"]
self.chunk_count = 0
self.latency = timedelta(milliseconds=20)
self.volume_scale = 0.2
self.frequency_hz = 440

async def get_audio(self, codec: str, duration_seconds: float, previous_timestamp_ns: int,
*, timeout: Optional[float] = None, **kwargs) -> AudioIn.AudioStream:

async def read() -> AsyncIterator[AudioIn.AudioResponse]:
# Generate chunks based on duration
chunk_duration_ms = 100 # 100ms per chunk
chunks_to_generate = max(1, int((duration_seconds * 1000) / chunk_duration_ms))

for i in range(chunks_to_generate):
# Generate audio data (sine wave pattern)
chunk_data = b""
samples_per_chunk = int(self.sample_rate * (chunk_duration_ms / 1000))

for sample in range(samples_per_chunk):
# Calculate the timing in seconds of this audio sample
time_offset = (i * chunk_duration_ms / 1000) + (sample / self.sample_rate)
# Generate one 16-bit PCM audio sample for a sine wave
# 32767 scales the value from (-1,1) to full 16 bit signed range (-32768,32767)
amplitude = int(32767 * self.volume_scale * math.sin(2 * math.pi * self.frequency_hz * time_offset))

# Convert to 16-bit PCM stereo
sample_bytes = amplitude.to_bytes(2, byteorder='little', signed=True)
chunk_data += sample_bytes * self.num_channels

chunk_start_time = previous_timestamp_ns + (i * chunk_duration_ms * 1000000) # Convert ms to ns
chunk_end_time = chunk_start_time + (chunk_duration_ms * 1000000)

audio_chunk = AudioInChunk(
audio_data=bytes(chunk_data),
audio_info=AudioInfo(
codec=codec,
sample_rate_hz=int(self.sample_rate),
num_channels=self.num_channels
),
sequence=i,
start_timestamp_nanoseconds=chunk_start_time,
end_timestamp_nanoseconds=chunk_end_time
)
audio_response = AudioResponse(audio=audio_chunk)
yield audio_response

await asyncio.sleep(self.latency.total_seconds())

return StreamWithIterator(read())

async def get_properties(self, *, timeout: Optional[float] = None, **kwargs) -> AudioIn.Properties:
"""Return the audio input device properties."""
return AudioIn.Properties(
supported_codecs=self.supported_codecs,
sample_rate_hz=self.sample_rate,
num_channels=self.num_channels
)

async def get_geometries(self, extra: Optional[Dict[str, Any]] = None, **kwargs) -> List[Geometry]:
return GEOMETRIES


class ExampleAudioOut(AudioOut):
def __init__(self, name: str):
super().__init__(name)
self.sample_rate = 44100
self.num_channels = 2
self.supported_codecs = ["pcm16", "mp3", "wav"]
self.volume = 1.0
self.is_playing = False

async def play(self,
data: bytes,
info: Optional[AudioInfo] = None,
*,
extra: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = None,
**kwargs) -> None:
"""Play the given audio data."""

# Simulate playing audio
self.is_playing = True
if info:
print(f"Playing audio: {len(data)} bytes, codec={info.codec}, "
f"sample_rate={info.sample_rate_hz}, channels={info.num_channels}")
else:
print(f"Playing audio: {len(data)} bytes (no audio info provided)")

await asyncio.sleep(0.1)

self.is_playing = False

async def get_properties(self,
*,
extra: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = None,
**kwargs) -> AudioOut.Properties:
"""Return the audio output device properties."""

return AudioOut.Properties(
supported_codecs=self.supported_codecs,
sample_rate_hz=self.sample_rate,
num_channels=self.num_channels
)

async def get_geometries(self, extra: Optional[Dict[str, Any]] = None, **kwargs) -> List[Geometry]:
return GEOMETRIES


class ExampleBase(Base):
def __init__(self, name: str):
self.position = 0
Expand Down
6 changes: 6 additions & 0 deletions examples/server/v1/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
from .components import (
ExampleAnalog,
ExampleArm,
ExampleAudioIn,
ExampleAudioInput,
ExampleAudioOut,
ExampleBase,
ExampleBoard,
ExampleCamera,
Expand All @@ -30,6 +32,8 @@
async def run(host: str, port: int, log_level: int):
my_arm = ExampleArm("arm0")
my_audio_input = ExampleAudioInput("audio_input0")
my_audio_in = ExampleAudioIn("audio_in0")
my_audio_out = ExampleAudioOut("audio_out0")
my_base = ExampleBase("base0")
my_board = ExampleBoard(
name="board",
Expand Down Expand Up @@ -75,7 +79,9 @@ async def run(host: str, port: int, log_level: int):
server = Server(
resources=[
my_arm,
my_audio_in,
my_audio_input,
my_audio_out,
my_base,
my_board,
my_camera,
Expand Down
24 changes: 24 additions & 0 deletions src/viam/components/audio_in/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from viam.resource.registry import Registry, ResourceRegistration

from viam.proto.common import AudioInfo
from viam.media.audio import AudioCodec
from .audio_in import AudioIn
from .client import AudioInClient
from .service import AudioInRPCService

AudioResponse = AudioIn.AudioResponse

__all__ = [
"AudioIn",
"AudioResponse",
"AudioInfo",
"AudioCodec",
]

Registry.register_api(
ResourceRegistration(
AudioIn,
AudioInRPCService,
lambda name, channel: AudioInClient(name, channel),
)
)
75 changes: 75 additions & 0 deletions src/viam/components/audio_in/audio_in.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import abc
import sys
from typing import Final, Optional

from viam.streams import Stream

from viam.proto.common import GetPropertiesResponse
from viam.proto.component.audioin import GetAudioResponse
from viam.resource.types import API, RESOURCE_NAMESPACE_RDK, RESOURCE_TYPE_COMPONENT

from ..component_base import ComponentBase

if sys.version_info >= (3, 10):
from typing import TypeAlias
else:
from typing_extensions import TypeAlias


class AudioIn(ComponentBase):
"""AudioIn represents a component that can capture audio.

This acts as an abstract base class for any drivers representing specific
audio input implementations. This cannot be used on its own. If the ``__init__()`` function is
overridden, it must call the ``super().__init__()`` function.
"""

API: Final = API( # pyright: ignore [reportIncompatibleVariableOverride]
RESOURCE_NAMESPACE_RDK, RESOURCE_TYPE_COMPONENT, "audio_in"
)

Properties: "TypeAlias" = GetPropertiesResponse
AudioResponse: "TypeAlias" = GetAudioResponse
AudioStream = Stream[AudioResponse]


@abc.abstractmethod
async def get_audio(self, codec: str,
duration_seconds: float,
previous_timestamp_ns:int,
*, timeout: Optional[float] = None, **kwargs) -> AudioStream:
"""
Get a stream of audio from the device

::
my_audio_in = AudioIn.from_robot(robot=machine, name="my_audio_in")

stream = await my_audio_in.get_audio(
codec=AudioCodec.PCM16,
duration_seconds=10.0,
previous_timestamp_ns=0
)

Args:
codec (str): The desired codec of the returned audio data
duration_seconds (float): duration of the stream. 0 = indefinite stream
previous_timestamp_ns (int): starting timestamp in nanoseconds for recording continuity.
Set to 0 to begin recording from the current time.
Returns:
AudioStream: stream of audio chunks.
...
"""

@abc.abstractmethod
async def get_properties(self, *, timeout: Optional[float] = None, **kwargs) -> Properties:
"""
Get the audio device's properties

::
my_audio_in = AudioIn.from_robot(robot=machine, name="my_audio_in")
properties = await my_audio_in.get_properties()

Returns:
Properties: The properties of the audio in device.
...
"""
79 changes: 79 additions & 0 deletions src/viam/components/audio_in/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from typing import Any, Dict, List, Mapping, Optional
import uuid

from grpclib.client import Channel

from viam.proto.component.audioin import GetAudioRequest, GetAudioResponse
from viam.proto.common import (
DoCommandRequest,
DoCommandResponse,
GetPropertiesRequest,
Geometry)
from grpclib.client import Stream as ClientStream
from viam.proto.component.audioin import AudioInServiceStub
from viam.resource.rpc_client_base import ReconfigurableResourceRPCClientBase
from viam.streams import StreamWithIterator

from .audio_in import AudioIn
from viam.utils import ValueTypes, dict_to_struct, get_geometries, struct_to_dict


class AudioInClient(AudioIn, ReconfigurableResourceRPCClientBase):

def __init__(self, name: str, channel: Channel) -> None:
self.channel = channel
self.client = AudioInServiceStub(channel)
super().__init__(name)


async def get_audio(self,
codec:str,
duration_seconds: float,
previous_timestamp_ns:int,
*,
extra: Optional[Dict[str, Any]] = None,
**kwargs,
):
request = GetAudioRequest(name=self.name, codec = codec,
duration_seconds=duration_seconds,
previous_timestamp_nanoseconds = previous_timestamp_ns,
request_id = str(uuid.uuid4()),
extra=dict_to_struct(extra))
async def read():
md = kwargs.get("metadata", self.Metadata()).proto
audio_stream: ClientStream[GetAudioRequest, GetAudioResponse]
async with self.client.GetAudio.open(metadata=md) as audio_stream:
try:
await audio_stream.send_message(request, end=True)
async for response in audio_stream:
yield response
except Exception as e:
raise (e)

return StreamWithIterator(read())


async def get_properties(
self,
*,
timeout: Optional[float] = None,
**kwargs,
) -> AudioIn.Properties:
md = kwargs.get("metadata", self.Metadata()).proto
return await self.client.GetProperties(GetPropertiesRequest(name=self.name), timeout=timeout, metadata=md)

async def do_command(
self,
command: Mapping[str, ValueTypes],
*,
timeout: Optional[float] = None,
**kwargs,
) -> Mapping[str, ValueTypes]:
md = kwargs.get("metadata", self.Metadata()).proto
request = DoCommandRequest(name=self.name, command=dict_to_struct(command))
response: DoCommandResponse = await self.client.DoCommand(request, timeout=timeout, metadata=md)
return struct_to_dict(response.result)

async def get_geometries(self, *, extra: Optional[Dict[str, Any]] = None, timeout: Optional[float] = None, **kwargs) -> List[Geometry]:
md = kwargs.get("metadata", self.Metadata())
return await get_geometries(self.client, self.name, extra, timeout, md)
Loading
Loading