diff --git a/README.md b/README.md index bd49f41..daa2dd4 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ lk app create --template agent-starter-swift --sandbox Then, build and run the app from Xcode by opening `VoiceAgent.xcodeproj`. You may need to adjust your app signing settings to run the app on your device. > [!NOTE] -> To setup without the LiveKit CLI, clone the repository and then either create a `VoiceAgent/.env.xcconfig` with a `LIVEKIT_SANDBOX_ID` (if using a [Sandbox Token Server](https://cloud.livekit.io/projects/p_/sandbox/templates/token-server)), or open `TokenService.swift` and add your [manually generated](#token-generation) URL and token. +> To setup without the LiveKit CLI, clone the repository and then either create a `VoiceAgent/.env.xcconfig` with a `LIVEKIT_SANDBOX_ID` (if using a [Sandbox Token Server](https://cloud.livekit.io/projects/p_/sandbox/templates/token-server)), or modify `VoiceAgent/VoiceAgentApp.swift` to replace the `SandboxTokenSource` with a custom token source implementation. ## Feature overview @@ -32,9 +32,15 @@ This starter app has support for a number of features of the agents framework, a ### Text, video, and voice input -This app supports text, video, and/or voice input according to the needs of your agent. To update the features enabled in the app, edit `VoiceAgent/VoiceAgentApp.swift` and update `AgentFeatures.current` to include or exclude the features you need. +This app supports text, video, and/or voice input according to the needs of your agent. To update the features enabled in the app, edit `VoiceAgent/VoiceAgentApp.swift` and modify the `.environment()` modifiers to enable or disable features. -By default, only voice and text input are enabled. +By default, all features (voice, video, and text input) are enabled. To disable a feature, change the value from `true` to `false`: + +```swift +.environment(\.voiceEnabled, true) // Enable voice input +.environment(\.videoEnabled, false) // Disable video input +.environment(\.textEnabled, true) // Enable text input +``` Available input types: - `.voice`: Allows the user to speak to the agent using their microphone. **Requires microphone permissions.** @@ -43,23 +49,23 @@ Available input types: If you have trouble with screensharing, refer to [the docs](https://docs.livekit.io/home/client/tracks/screenshare/) for more setup instructions. -### Preconnect audio buffer +### Session -This app uses `withPreConnectAudio` to capture and buffer audio before the room connection completes. This allows the connection to appear "instant" from the user's perspective and makes your app more responsive. To disable this feature, remove the call to `withPreConnectAudio` as below: +The app is built on top of two main observable components from the [LiveKit Swift SDK](https://github.com/livekit/client-sdk-swift): +- `Session` object to connect to the LiveKit infrastructure, interact with the `Agent`, its local state, and send/receive text messages. +- `LocalMedia` object to manage the local media tracks (audio, video, screen sharing) and their lifecycle. + +### Preconnect audio buffer -- Location: `VoiceAgent/App/AppViewModel.swift` → `connectWithVoice()` -- To disable preconnect buffering but keep voice: - - Replace the `withPreConnectAudio { ... }` block with a standard `room.connect` call and enable the microphone after connect, for example: - - Connect with `connectOptions: .init(enableMicrophone: true)` without wrapping in `withPreConnectAudio`, or - - Connect with microphone disabled and call `room.localParticipant.setMicrophone(enabled: true)` after connection. +This app enables `preConnectAudio` by default to capture and buffer audio before the room connection completes. This allows the connection to appear "instant" from the user's perspective and makes your app more responsive. To disable this feature, set `preConnectAudio` to `false` in `SessionOptions` when creating the `Session`. ### Virtual avatar support -If your agent publishes a [virtual avatar](https://docs.livekit.io/agents/integrations/avatar/), this app will automatically render the avatar’s camera feed in `AgentParticipantView` when available. +If your agent publishes a [virtual avatar](https://docs.livekit.io/agents/integrations/avatar/), this app will automatically render the avatar's camera feed in `AgentView` when available. ## Token generation in production -In a production environment, you will be responsible for developing a solution to [generate tokens for your users](https://docs.livekit.io/home/server/generating-tokens/) which is integrated with your authentication solution. You should disable your sandbox token server and modify `TokenService.swift` to use your own token server. +In a production environment, you will be responsible for developing a solution to [generate tokens for your users](https://docs.livekit.io/home/server/generating-tokens/) which is integrated with your authentication solution. You should replace your `SandboxTokenSource` with an `EndpointTokenSource` or your own `TokenSourceFixed` or `TokenSourceConfigurable` implementation. Additionally, you can use the `.cached()` extension to cache valid tokens and avoid unnecessary token requests. ## Running on Simulator diff --git a/VoiceAgent.xcodeproj/project.pbxproj b/VoiceAgent.xcodeproj/project.pbxproj index 9d1998a..8704328 100644 --- a/VoiceAgent.xcodeproj/project.pbxproj +++ b/VoiceAgent.xcodeproj/project.pbxproj @@ -10,7 +10,6 @@ ACAEBA5B2DE6EE970072E93E /* ReplayKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = ACAEBA5A2DE6EE970072E93E /* ReplayKit.framework */; }; ACAEBA622DE6EE970072E93E /* BroadcastExtension.appex in Embed Foundation Extensions */ = {isa = PBXBuildFile; fileRef = ACAEBA582DE6EE970072E93E /* BroadcastExtension.appex */; platformFilters = (ios, xros, ); settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; }; ACAEBA692DE6EF4B0072E93E /* LiveKit in Frameworks */ = {isa = PBXBuildFile; productRef = ACAEBA682DE6EF4B0072E93E /* LiveKit */; }; - ACFBA1DB2D8D5CBE0021202B /* Collections in Frameworks */ = {isa = PBXBuildFile; productRef = ACFBA1DA2D8D5CBE0021202B /* Collections */; }; B5E1B90F2D14E9EC00A38CB6 /* LiveKitComponents in Frameworks */ = {isa = PBXBuildFile; productRef = B5E1B90E2D14E9EC00A38CB6 /* LiveKitComponents */; }; B5E1B9122D14E9F500A38CB6 /* LiveKit in Frameworks */ = {isa = PBXBuildFile; productRef = B5E1B9112D14E9F500A38CB6 /* LiveKit */; }; /* End PBXBuildFile section */ @@ -23,13 +22,6 @@ remoteGlobalIDString = ACAEBA572DE6EE970072E93E; remoteInfo = BroadcastExtension; }; - ACC2802B2DEDDA1D0023C137 /* PBXContainerItemProxy */ = { - isa = PBXContainerItemProxy; - containerPortal = B5B5E3AA2D124AE00099C9BE /* Project object */; - proxyType = 1; - remoteGlobalIDString = B5B5E3B12D124AE00099C9BE; - remoteInfo = VoiceAgent; - }; /* End PBXContainerItemProxy section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -49,7 +41,6 @@ /* Begin PBXFileReference section */ ACAEBA582DE6EE970072E93E /* BroadcastExtension.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = BroadcastExtension.appex; sourceTree = BUILT_PRODUCTS_DIR; }; ACAEBA5A2DE6EE970072E93E /* ReplayKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = ReplayKit.framework; path = System/Library/Frameworks/ReplayKit.framework; sourceTree = SDKROOT; }; - ACC280272DEDDA1D0023C137 /* VoiceAgentTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = VoiceAgentTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; B5B5E3B22D124AE00099C9BE /* VoiceAgent.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = VoiceAgent.app; sourceTree = BUILT_PRODUCTS_DIR; }; /* End PBXFileReference section */ @@ -82,11 +73,6 @@ path = BroadcastExtension; sourceTree = ""; }; - ACC280282DEDDA1D0023C137 /* VoiceAgentTests */ = { - isa = PBXFileSystemSynchronizedRootGroup; - path = VoiceAgentTests; - sourceTree = ""; - }; B5B5E3B42D124AE00099C9BE /* VoiceAgent */ = { isa = PBXFileSystemSynchronizedRootGroup; exceptions = ( @@ -107,18 +93,10 @@ ); runOnlyForDeploymentPostprocessing = 0; }; - ACC280242DEDDA1D0023C137 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; B5B5E3AF2D124AE00099C9BE /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( - ACFBA1DB2D8D5CBE0021202B /* Collections in Frameworks */, B5E1B90F2D14E9EC00A38CB6 /* LiveKitComponents in Frameworks */, B5E1B9122D14E9F500A38CB6 /* LiveKit in Frameworks */, ); @@ -140,7 +118,6 @@ children = ( B5B5E3B42D124AE00099C9BE /* VoiceAgent */, ACAEBA5C2DE6EE970072E93E /* BroadcastExtension */, - ACC280282DEDDA1D0023C137 /* VoiceAgentTests */, ACAEBA592DE6EE970072E93E /* Frameworks */, B5B5E3B32D124AE00099C9BE /* Products */, ); @@ -151,7 +128,6 @@ children = ( B5B5E3B22D124AE00099C9BE /* VoiceAgent.app */, ACAEBA582DE6EE970072E93E /* BroadcastExtension.appex */, - ACC280272DEDDA1D0023C137 /* VoiceAgentTests.xctest */, ); name = Products; sourceTree = ""; @@ -182,29 +158,6 @@ productReference = ACAEBA582DE6EE970072E93E /* BroadcastExtension.appex */; productType = "com.apple.product-type.app-extension"; }; - ACC280262DEDDA1D0023C137 /* VoiceAgentTests */ = { - isa = PBXNativeTarget; - buildConfigurationList = ACC2802F2DEDDA1D0023C137 /* Build configuration list for PBXNativeTarget "VoiceAgentTests" */; - buildPhases = ( - ACC280232DEDDA1D0023C137 /* Sources */, - ACC280242DEDDA1D0023C137 /* Frameworks */, - ACC280252DEDDA1D0023C137 /* Resources */, - ); - buildRules = ( - ); - dependencies = ( - ACC2802C2DEDDA1D0023C137 /* PBXTargetDependency */, - ); - fileSystemSynchronizedGroups = ( - ACC280282DEDDA1D0023C137 /* VoiceAgentTests */, - ); - name = VoiceAgentTests; - packageProductDependencies = ( - ); - productName = VoiceAgentTests; - productReference = ACC280272DEDDA1D0023C137 /* VoiceAgentTests.xctest */; - productType = "com.apple.product-type.bundle.unit-test"; - }; B5B5E3B12D124AE00099C9BE /* VoiceAgent */ = { isa = PBXNativeTarget; buildConfigurationList = B5B5E3C12D124AE20099C9BE /* Build configuration list for PBXNativeTarget "VoiceAgent" */; @@ -226,7 +179,6 @@ packageProductDependencies = ( B5E1B90E2D14E9EC00A38CB6 /* LiveKitComponents */, B5E1B9112D14E9F500A38CB6 /* LiveKit */, - ACFBA1DA2D8D5CBE0021202B /* Collections */, ); productName = VoiceAgent; productReference = B5B5E3B22D124AE00099C9BE /* VoiceAgent.app */; @@ -245,10 +197,6 @@ ACAEBA572DE6EE970072E93E = { CreatedOnToolsVersion = 16.3; }; - ACC280262DEDDA1D0023C137 = { - CreatedOnToolsVersion = 16.3; - TestTargetID = B5B5E3B12D124AE00099C9BE; - }; B5B5E3B12D124AE00099C9BE = { CreatedOnToolsVersion = 16.2; }; @@ -266,7 +214,6 @@ packageReferences = ( B5E1B90D2D14E9EC00A38CB6 /* XCRemoteSwiftPackageReference "components-swift" */, B5E1B9102D14E9F500A38CB6 /* XCRemoteSwiftPackageReference "client-sdk-swift" */, - ACFBA1D92D8D5CBE0021202B /* XCRemoteSwiftPackageReference "swift-collections" */, ); preferredProjectObjectVersion = 77; productRefGroup = B5B5E3B32D124AE00099C9BE /* Products */; @@ -274,7 +221,6 @@ projectRoot = ""; targets = ( B5B5E3B12D124AE00099C9BE /* VoiceAgent */, - ACC280262DEDDA1D0023C137 /* VoiceAgentTests */, ACAEBA572DE6EE970072E93E /* BroadcastExtension */, ); }; @@ -288,13 +234,6 @@ ); runOnlyForDeploymentPostprocessing = 0; }; - ACC280252DEDDA1D0023C137 /* Resources */ = { - isa = PBXResourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; B5B5E3B02D124AE00099C9BE /* Resources */ = { isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; @@ -312,13 +251,6 @@ ); runOnlyForDeploymentPostprocessing = 0; }; - ACC280232DEDDA1D0023C137 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; B5B5E3AE2D124AE00099C9BE /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -338,11 +270,6 @@ target = ACAEBA572DE6EE970072E93E /* BroadcastExtension */; targetProxy = ACAEBA602DE6EE970072E93E /* PBXContainerItemProxy */; }; - ACC2802C2DEDDA1D0023C137 /* PBXTargetDependency */ = { - isa = PBXTargetDependency; - target = B5B5E3B12D124AE00099C9BE /* VoiceAgent */; - targetProxy = ACC2802B2DEDDA1D0023C137 /* PBXContainerItemProxy */; - }; /* End PBXTargetDependency section */ /* Begin XCBuildConfiguration section */ @@ -411,58 +338,6 @@ }; name = Release; }; - ACC2802D2DEDDA1D0023C137 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - BUNDLE_LOADER = "$(TEST_HOST)"; - CODE_SIGN_IDENTITY = "Apple Development"; - "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development"; - CODE_SIGN_STYLE = Automatic; - DEAD_CODE_STRIPPING = YES; - DEVELOPMENT_TEAM = 76TVFCUKK7; - GENERATE_INFOPLIST_FILE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 18.0; - MACOSX_DEPLOYMENT_TARGET = 15.0; - MARKETING_VERSION = 1.0; - PRODUCT_BUNDLE_IDENTIFIER = com.livekit.VoiceAgentTests; - PRODUCT_NAME = "$(TARGET_NAME)"; - PROVISIONING_PROFILE_SPECIFIER = ""; - SDKROOT = auto; - SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx xros xrsimulator"; - SWIFT_EMIT_LOC_STRINGS = NO; - SWIFT_VERSION = 5.0; - TARGETED_DEVICE_FAMILY = "1,2,7"; - TEST_HOST = "$(BUILT_PRODUCTS_DIR)/VoiceAgent.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/VoiceAgent"; - XROS_DEPLOYMENT_TARGET = 2.0; - }; - name = Debug; - }; - ACC2802E2DEDDA1D0023C137 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - BUNDLE_LOADER = "$(TEST_HOST)"; - CODE_SIGN_IDENTITY = "Apple Development"; - "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development"; - CODE_SIGN_STYLE = Automatic; - DEAD_CODE_STRIPPING = YES; - DEVELOPMENT_TEAM = 76TVFCUKK7; - GENERATE_INFOPLIST_FILE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 18.0; - MACOSX_DEPLOYMENT_TARGET = 15.0; - MARKETING_VERSION = 1.0; - PRODUCT_BUNDLE_IDENTIFIER = com.livekit.VoiceAgentTests; - PRODUCT_NAME = "$(TARGET_NAME)"; - PROVISIONING_PROFILE_SPECIFIER = ""; - SDKROOT = auto; - SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx xros xrsimulator"; - SWIFT_EMIT_LOC_STRINGS = NO; - SWIFT_VERSION = 5.0; - TARGETED_DEVICE_FAMILY = "1,2,7"; - TEST_HOST = "$(BUILT_PRODUCTS_DIR)/VoiceAgent.app/$(BUNDLE_EXECUTABLE_FOLDER_PATH)/VoiceAgent"; - XROS_DEPLOYMENT_TARGET = 2.0; - }; - name = Release; - }; B5B5E3BF2D124AE20099C9BE /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { @@ -689,15 +564,6 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; - ACC2802F2DEDDA1D0023C137 /* Build configuration list for PBXNativeTarget "VoiceAgentTests" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - ACC2802D2DEDDA1D0023C137 /* Debug */, - ACC2802E2DEDDA1D0023C137 /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; B5B5E3AD2D124AE00099C9BE /* Build configuration list for PBXProject "VoiceAgent" */ = { isa = XCConfigurationList; buildConfigurations = ( @@ -719,28 +585,20 @@ /* End XCConfigurationList section */ /* Begin XCRemoteSwiftPackageReference section */ - ACFBA1D92D8D5CBE0021202B /* XCRemoteSwiftPackageReference "swift-collections" */ = { - isa = XCRemoteSwiftPackageReference; - repositoryURL = "https://github.com/apple/swift-collections"; - requirement = { - kind = upToNextMajorVersion; - minimumVersion = 1.1.4; - }; - }; B5E1B90D2D14E9EC00A38CB6 /* XCRemoteSwiftPackageReference "components-swift" */ = { isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/livekit/components-swift"; requirement = { - kind = upToNextMajorVersion; - minimumVersion = 0.1.5; + branch = "blaze/agent-conversation"; + kind = branch; }; }; B5E1B9102D14E9F500A38CB6 /* XCRemoteSwiftPackageReference "client-sdk-swift" */ = { isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/livekit/client-sdk-swift"; requirement = { - kind = upToNextMajorVersion; - minimumVersion = 2.7.1; + branch = "blaze/agent-conversation"; + kind = branch; }; }; /* End XCRemoteSwiftPackageReference section */ @@ -750,11 +608,6 @@ isa = XCSwiftPackageProductDependency; productName = LiveKit; }; - ACFBA1DA2D8D5CBE0021202B /* Collections */ = { - isa = XCSwiftPackageProductDependency; - package = ACFBA1D92D8D5CBE0021202B /* XCRemoteSwiftPackageReference "swift-collections" */; - productName = Collections; - }; B5E1B90E2D14E9EC00A38CB6 /* LiveKitComponents */ = { isa = XCSwiftPackageProductDependency; package = B5E1B90D2D14E9EC00A38CB6 /* XCRemoteSwiftPackageReference "components-swift" */; diff --git a/VoiceAgent.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/VoiceAgent.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index dde732e..3c30fbb 100644 --- a/VoiceAgent.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/VoiceAgent.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -6,8 +6,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/livekit/client-sdk-swift", "state" : { - "revision" : "77b00169920283acd795e46c659ceefc9e4a666e", - "version" : "2.7.1" + "branch" : "blaze/agent-conversation", + "revision" : "6ea1621c9651ce1eb8fc5eca18cc196987317087" } }, { @@ -15,8 +15,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/livekit/components-swift", "state" : { - "revision" : "f756f3696f4a9b208430e0e239ee7b7b337222ce", - "version" : "0.1.5" + "branch" : "blaze/agent-conversation", + "revision" : "6a5061959f90890ac12f8ddd068e06e763b7b56d" } }, { @@ -51,8 +51,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/livekit/webrtc-xcframework.git", "state" : { - "revision" : "609aa5e7dd818ba85eb483153b572fd698785a40", - "version" : "137.7151.4" + "revision" : "5bda55f1f7ba0df114de60b760f5206a07e0fab7", + "version" : "137.7151.5" } } ], diff --git a/VoiceAgent.xcodeproj/xcshareddata/xcschemes/VoiceAgent.xcscheme b/VoiceAgent.xcodeproj/xcshareddata/xcschemes/VoiceAgent.xcscheme index e1b74c4..622a245 100644 --- a/VoiceAgent.xcodeproj/xcshareddata/xcschemes/VoiceAgent.xcscheme +++ b/VoiceAgent.xcodeproj/xcshareddata/xcschemes/VoiceAgent.xcscheme @@ -27,13 +27,8 @@ buildConfiguration = "Debug" selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB" selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" - shouldUseLaunchSchemeArgsEnv = "YES"> - - - - + shouldUseLaunchSchemeArgsEnv = "YES" + shouldAutocreateTestPlan = "YES"> some View { StartView() + .onAppear { + chat = false + } } @ViewBuilder private func interactions() -> some View { #if os(visionOS) - VisionInteractionView(keyboardFocus: $keyboardFocus) - .environment(chatViewModel) + VisionInteractionView(chat: chat, keyboardFocus: $keyboardFocus) .overlay(alignment: .bottom) { agentListening() .padding(16 * .grid) } #else - switch viewModel.interactionMode { - case .text: + if chat { TextInteractionView(keyboardFocus: $keyboardFocus) - .environment(chatViewModel) - case .voice: + } else { VoiceInteractionView() .overlay(alignment: .bottom) { agentListening() @@ -85,12 +89,16 @@ struct AppView: View { @ViewBuilder private func errors() -> some View { #if !os(visionOS) - if case .reconnecting = viewModel.connectionState { - WarningView(warning: "warning.reconnecting") + if let error = session.error { + ErrorView(error: error) { session.dismissError() } + } + + if let agentError = session.agent.error { + ErrorView(error: agentError) { Task { await session.end() }} } - if let error { - ErrorView(error: error) { self.error = nil } + if let mediaError = localMedia.error { + ErrorView(error: mediaError) { localMedia.dismissError() } } #endif } @@ -98,14 +106,17 @@ struct AppView: View { @ViewBuilder private func agentListening() -> some View { ZStack { - if chatViewModel.messages.isEmpty, - !viewModel.isCameraEnabled, - !viewModel.isScreenShareEnabled + if session.messages.isEmpty, + !localMedia.isCameraEnabled, + !localMedia.isScreenShareEnabled { - AgentListeningView() + Text("agent.listening") + .font(.system(size: 15)) + .shimmering() + .transition(.blurReplace) } } - .animation(.default, value: chatViewModel.messages.isEmpty) + .animation(.default, value: session.messages.isEmpty) } } diff --git a/VoiceAgent/App/AppViewModel.swift b/VoiceAgent/App/AppViewModel.swift deleted file mode 100644 index 205284e..0000000 --- a/VoiceAgent/App/AppViewModel.swift +++ /dev/null @@ -1,307 +0,0 @@ -@preconcurrency import AVFoundation -import Combine -import LiveKit -import Observation - -/// The main view model encapsulating root states and behaviors of the app -/// such as connection, published tracks, etc. -/// -/// It consumes `LiveKit.Room` object, observing its internal state and propagating appropriate changes. -/// It does not expose any publicly mutable state, encouraging unidirectional data flow. -@MainActor -@Observable -final class AppViewModel { - // MARK: - Constants - - private enum Constants { - static let agentConnectionTimeout: TimeInterval = 20 - } - - // MARK: - Errors - - enum Error: LocalizedError { - case agentNotConnected - - var errorDescription: String? { - switch self { - case .agentNotConnected: - "Agent did not connect to the Room" - } - } - } - - // MARK: - Modes - - enum InteractionMode { - case voice - case text - } - - let agentFeatures: AgentFeatures - - // MARK: - State - - // MARK: Connection - - private(set) var connectionState: ConnectionState = .disconnected - private(set) var isListening = false - var isInteractive: Bool { - switch connectionState { - case .disconnected where isListening, - .connecting where isListening, - .connected, - .reconnecting: - true - default: - false - } - } - - private(set) var agent: Participant? - - private(set) var interactionMode: InteractionMode = .voice - - // MARK: Tracks - - private(set) var isMicrophoneEnabled = false - private(set) var audioTrack: (any AudioTrack)? - private(set) var isCameraEnabled = false - private(set) var cameraTrack: (any VideoTrack)? - private(set) var isScreenShareEnabled = false - private(set) var screenShareTrack: (any VideoTrack)? - - private(set) var agentAudioTrack: (any AudioTrack)? - private(set) var avatarCameraTrack: (any VideoTrack)? - - // MARK: Devices - - private(set) var audioDevices: [AudioDevice] = AudioManager.shared.inputDevices - private(set) var selectedAudioDeviceID: String = AudioManager.shared.inputDevice.deviceId - - private(set) var videoDevices: [AVCaptureDevice] = [] - private(set) var selectedVideoDeviceID: String? - - private(set) var canSwitchCamera = false - - // MARK: - Dependencies - - @ObservationIgnored - @Dependency(\.room) private var room - @ObservationIgnored - @Dependency(\.tokenService) private var tokenService - @ObservationIgnored - @Dependency(\.errorHandler) private var errorHandler - - // MARK: - Initialization - - init(agentFeatures: AgentFeatures = .current) { - self.agentFeatures = agentFeatures - - observeRoom() - observeDevices() - } - - private func observeRoom() { - Task { [weak self] in - guard let changes = self?.room.changes else { return } - for await _ in changes { - guard let self else { return } - - connectionState = room.connectionState - agent = room.agentParticipant - - isMicrophoneEnabled = room.localParticipant.isMicrophoneEnabled() - audioTrack = room.localParticipant.firstAudioTrack - isCameraEnabled = room.localParticipant.isCameraEnabled() - cameraTrack = room.localParticipant.firstCameraVideoTrack - isScreenShareEnabled = room.localParticipant.isScreenShareEnabled() - screenShareTrack = room.localParticipant.firstScreenShareVideoTrack - - agentAudioTrack = room.agentParticipant?.audioTracks - .first(where: { $0.source == .microphone })?.track as? AudioTrack - avatarCameraTrack = room.agentParticipant?.avatarWorker?.firstCameraVideoTrack - } - } - } - - private func observeDevices() { - Task { - do { - try AudioManager.shared.set(microphoneMuteMode: .inputMixer) // don't play mute sound effect - try await AudioManager.shared.setRecordingAlwaysPreparedMode(true) - - AudioManager.shared.onDeviceUpdate = { [weak self] _ in - Task { @MainActor in - self?.audioDevices = AudioManager.shared.inputDevices - self?.selectedAudioDeviceID = AudioManager.shared.defaultInputDevice.deviceId - } - } - - canSwitchCamera = try await CameraCapturer.canSwitchPosition() - videoDevices = try await CameraCapturer.captureDevices() - selectedVideoDeviceID = videoDevices.first?.uniqueID - } catch { - errorHandler(error) - } - } - } - - deinit { - AudioManager.shared.onDeviceUpdate = nil - } - - private func resetState() { - isListening = false - interactionMode = .voice - } - - // MARK: - Connection - - func connect() async { - errorHandler(nil) - resetState() - do { - if agentFeatures.contains(.voice) { - try await connectWithVoice() - } else { - try await connectWithoutVoice() - } - - try await checkAgentConnected() - } catch { - errorHandler(error) - resetState() - } - } - - /// Connect and enable microphone, capture pre-connect audio - private func connectWithVoice() async throws { - try await room.withPreConnectAudio { - await MainActor.run { self.isListening = true } - - let connectionDetails = try await self.getConnection() - - try await self.room.connect( - url: connectionDetails.serverUrl, - token: connectionDetails.participantToken, - connectOptions: .init(enableMicrophone: true) - ) - } - } - - /// Connect without enabling microphone - private func connectWithoutVoice() async throws { - let connectionDetails = try await getConnection() - - try await room.connect( - url: connectionDetails.serverUrl, - token: connectionDetails.participantToken, - connectOptions: .init(enableMicrophone: false) - ) - } - - private func getConnection() async throws -> TokenService.ConnectionDetails { - let roomName = "room-\(Int.random(in: 1000 ... 9999))" - let participantName = "user-\(Int.random(in: 1000 ... 9999))" - - return try await tokenService.fetchConnectionDetails( - roomName: roomName, - participantName: participantName - )! - } - - func disconnect() async { - await room.disconnect() - resetState() - } - - private func checkAgentConnected() async throws { - try await Task.sleep(for: .seconds(Constants.agentConnectionTimeout)) - if connectionState == .connected, agent == nil { - await disconnect() - throw Error.agentNotConnected - } - } - - // MARK: - Actions - - func toggleTextInput() { - switch interactionMode { - case .voice: - interactionMode = .text - case .text: - interactionMode = .voice - } - } - - func toggleMicrophone() async { - do { - try await room.localParticipant.setMicrophone(enabled: !isMicrophoneEnabled) - } catch { - errorHandler(error) - } - } - - func toggleCamera() async { - let enable = !isCameraEnabled - do { - // One video track at a time - if enable, isScreenShareEnabled { - try await room.localParticipant.setScreenShare(enabled: false) - } - - let device = try await CameraCapturer.captureDevices().first(where: { $0.uniqueID == selectedVideoDeviceID }) - try await room.localParticipant.setCamera(enabled: enable, captureOptions: CameraCaptureOptions(device: device)) - } catch { - errorHandler(error) - } - } - - func toggleScreenShare() async { - let enable = !isScreenShareEnabled - do { - // One video track at a time - if enable, isCameraEnabled { - try await room.localParticipant.setCamera(enabled: false) - } - try await room.localParticipant.setScreenShare(enabled: enable) - } catch { - errorHandler(error) - } - } - - #if os(macOS) - func select(audioDevice: AudioDevice) { - selectedAudioDeviceID = audioDevice.deviceId - - let device = AudioManager.shared.inputDevices.first(where: { $0.deviceId == selectedAudioDeviceID }) ?? AudioManager.shared.defaultInputDevice - AudioManager.shared.inputDevice = device - } - - func select(videoDevice: AVCaptureDevice) async { - selectedVideoDeviceID = videoDevice.uniqueID - - guard let cameraCapturer = getCameraCapturer() else { return } - do { - let captureOptions = CameraCaptureOptions(device: videoDevice) - try await cameraCapturer.set(options: captureOptions) - } catch { - errorHandler(error) - } - } - #endif - - func switchCamera() async { - guard let cameraCapturer = getCameraCapturer() else { return } - do { - try await cameraCapturer.switchCameraPosition() - } catch { - errorHandler(error) - } - } - - private func getCameraCapturer() -> CameraCapturer? { - guard let cameraTrack = cameraTrack as? LocalVideoTrack else { return nil } - return cameraTrack.capturer as? CameraCapturer - } -} diff --git a/VoiceAgent/Auth/TokenService.swift b/VoiceAgent/Auth/TokenService.swift deleted file mode 100644 index 3880480..0000000 --- a/VoiceAgent/Auth/TokenService.swift +++ /dev/null @@ -1,94 +0,0 @@ -import Foundation - -/// An example service for fetching LiveKit authentication tokens -/// -/// To use the LiveKit Cloud sandbox (development only) -/// - Enable your sandbox here https://cloud.livekit.io/projects/p_/sandbox/templates/token-server -/// - Create .env.xcconfig with your LIVEKIT_SANDBOX_ID -/// -/// To use a hardcoded token (development only) -/// - Generate a token: https://docs.livekit.io/home/cli/cli-setup/#generate-access-token -/// - Set `hardcodedServerUrl` and `hardcodedToken` below -/// -/// To use your own server (production applications) -/// - Add a token endpoint to your server with a LiveKit Server SDK https://docs.livekit.io/home/server/generating-tokens/ -/// - Modify or replace this class as needed to connect to your new token server -/// - Rejoice in your new production-ready LiveKit application! -/// -/// See [docs](https://docs.livekit.io/home/get-started/authentication) for more information. -actor TokenService { - struct ConnectionDetails: Codable { - let serverUrl: String - let roomName: String - let participantName: String - let participantToken: String - } - - func fetchConnectionDetails(roomName: String, participantName: String) async throws -> ConnectionDetails? { - if let hardcodedConnectionDetails = fetchHardcodedConnectionDetails(roomName: roomName, participantName: participantName) { - return hardcodedConnectionDetails - } - - return try await fetchConnectionDetailsFromSandbox(roomName: roomName, participantName: participantName) - } - - private let hardcodedServerUrl: String? = nil - private let hardcodedToken: String? = nil - - private let sandboxId: String? = { - if let value = Bundle.main.object(forInfoDictionaryKey: "LiveKitSandboxId") as? String { - // LK CLI will add unwanted double quotes - return value.trimmingCharacters(in: CharacterSet(charactersIn: "\"")) - } - return nil - }() - - private let sandboxUrl: String = "https://cloud-api.livekit.io/api/sandbox/connection-details" - private func fetchConnectionDetailsFromSandbox(roomName: String, participantName: String) async throws -> ConnectionDetails? { - guard let sandboxId else { - return nil - } - - var urlComponents = URLComponents(string: sandboxUrl)! - urlComponents.queryItems = [ - URLQueryItem(name: "roomName", value: roomName), - URLQueryItem(name: "participantName", value: participantName), - ] - - var request = URLRequest(url: urlComponents.url!) - request.httpMethod = "POST" - request.addValue(sandboxId, forHTTPHeaderField: "X-Sandbox-ID") - - let (data, response) = try await URLSession.shared.data(for: request) - - guard let httpResponse = response as? HTTPURLResponse else { - debugPrint("Failed to connect to LiveKit Cloud sandbox") - return nil - } - - guard (200 ... 299).contains(httpResponse.statusCode) else { - debugPrint("Error from LiveKit Cloud sandbox: \(httpResponse.statusCode), response: \(httpResponse)") - return nil - } - - guard let connectionDetails = try? JSONDecoder().decode(ConnectionDetails.self, from: data) else { - debugPrint("Error parsing connection details from LiveKit Cloud sandbox, response: \(httpResponse)") - return nil - } - - return connectionDetails - } - - private func fetchHardcodedConnectionDetails(roomName: String, participantName: String) -> ConnectionDetails? { - guard let serverUrl = hardcodedServerUrl, let token = hardcodedToken else { - return nil - } - - return .init( - serverUrl: serverUrl, - roomName: roomName, - participantName: participantName, - participantToken: token - ) - } -} diff --git a/VoiceAgent/Chat/View/ChatTextInputView.swift b/VoiceAgent/Chat/ChatTextInputView.swift similarity index 91% rename from VoiceAgent/Chat/View/ChatTextInputView.swift rename to VoiceAgent/Chat/ChatTextInputView.swift index ceb58cc..17cede3 100644 --- a/VoiceAgent/Chat/View/ChatTextInputView.swift +++ b/VoiceAgent/Chat/ChatTextInputView.swift @@ -1,12 +1,12 @@ +import LiveKitComponents import SwiftUI /// A multiplatform view that shows the chat input text field and send button. struct ChatTextInputView: View { - @Environment(ChatViewModel.self) private var chatViewModel - @Environment(\.horizontalSizeClass) private var horizontalSizeClass + @EnvironmentObject private var session: Session + @Environment(\.horizontalSizeClass) private var horizontalSizeClass @FocusState.Binding var keyboardFocus: Bool - @State private var messageText = "" var body: some View { @@ -82,12 +82,6 @@ struct ChatTextInputView: View { let text = messageText messageText = "" keyboardFocus = false - await chatViewModel.sendMessage(text) + await session.send(text: text) } } - -#Preview { - @FocusState var focus - ChatTextInputView(keyboardFocus: $focus) - .environment(ChatViewModel()) -} diff --git a/VoiceAgent/Chat/View/ChatView.swift b/VoiceAgent/Chat/ChatView.swift similarity index 63% rename from VoiceAgent/Chat/View/ChatView.swift rename to VoiceAgent/Chat/ChatView.swift index 8a723b0..2b35e56 100644 --- a/VoiceAgent/Chat/View/ChatView.swift +++ b/VoiceAgent/Chat/ChatView.swift @@ -1,38 +1,25 @@ +import LiveKitComponents import SwiftUI -/// A multiplatform view that shows the message feed. struct ChatView: View { - @Environment(ChatViewModel.self) private var viewModel + @EnvironmentObject private var session: Session var body: some View { - ScrollViewReader { scrollView in - ScrollView { - LazyVStack { - ForEach(viewModel.messages.values.reversed(), content: message) - } - } - .onChange(of: viewModel.messages.count) { - scrollView.scrollTo(viewModel.messages.keys.last) - } - .upsideDown() + ChatScrollView(messageBuilder: message) .padding(.horizontal) - .scrollIndicators(.never) - .animation(.default, value: viewModel.messages) - } + .animation(.default, value: session.messages) } @ViewBuilder private func message(_ message: ReceivedMessage) -> some View { ZStack { switch message.content { - case let .userTranscript(text): + case let .userTranscript(text), let .userInput(text): userTranscript(text) case let .agentTranscript(text): agentTranscript(text) } } - .upsideDown() - .id(message.id) // for the ScrollViewReader to work } @ViewBuilder diff --git a/VoiceAgent/Chat/ChatViewModel.swift b/VoiceAgent/Chat/ChatViewModel.swift deleted file mode 100644 index bc32665..0000000 --- a/VoiceAgent/Chat/ChatViewModel.swift +++ /dev/null @@ -1,80 +0,0 @@ -import Collections -import Foundation -import LiveKit -import Observation - -/// A view model that aggregates messages from multiple message providers (senders and receivers) -/// and exposes a single entry point for the UI to interact with the message feed. -/// -/// It does not expose any publicly mutable state, encouraging unidirectional data flow. -@MainActor -@Observable -final class ChatViewModel { - // MARK: - State - - private(set) var messages: OrderedDictionary = [:] - - // MARK: - Dependencies - - @ObservationIgnored - @Dependency(\.room) private var room - @ObservationIgnored - @Dependency(\.messageReceivers) private var messageReceivers - @ObservationIgnored - @Dependency(\.messageSenders) private var messageSenders - @ObservationIgnored - @Dependency(\.errorHandler) private var errorHandler - - // MARK: - Initialization - - init() { - observeMessages() - observeRoom() - } - - // MARK: - Private - - private func observeMessages() { - for messageReceiver in messageReceivers { - Task { [weak self] in - do { - for await message in try await messageReceiver.messages() { - guard let self else { return } - messages.updateValue(message, forKey: message.id) - } - } catch { - self?.errorHandler(error) - } - } - } - } - - private func observeRoom() { - Task { [weak self] in - guard let changes = self?.room.changes else { return } - for await _ in changes { - guard let self else { return } - if room.connectionState == .disconnected { - clearHistory() - } - } - } - } - - private func clearHistory() { - messages.removeAll() - } - - // MARK: - Actions - - func sendMessage(_ text: String) async { - let message = SentMessage(id: UUID().uuidString, timestamp: Date(), content: .userText(text)) - do { - for sender in messageSenders { - try await sender.send(message) - } - } catch { - errorHandler(error) - } - } -} diff --git a/VoiceAgent/Chat/Message.swift b/VoiceAgent/Chat/Message.swift deleted file mode 100644 index 163df3b..0000000 --- a/VoiceAgent/Chat/Message.swift +++ /dev/null @@ -1,24 +0,0 @@ -import Foundation - -/// A message received from the agent. -struct ReceivedMessage: Identifiable, Equatable, Sendable { - let id: String - let timestamp: Date - let content: Content - - enum Content: Equatable, Sendable { - case agentTranscript(String) - case userTranscript(String) - } -} - -/// A message sent to the agent. -struct SentMessage: Identifiable, Equatable, Sendable { - let id: String - let timestamp: Date - let content: Content - - enum Content: Equatable, Sendable { - case userText(String) - } -} diff --git a/VoiceAgent/Chat/Receive/MessageReceiver.swift b/VoiceAgent/Chat/Receive/MessageReceiver.swift deleted file mode 100644 index 6394d6d..0000000 --- a/VoiceAgent/Chat/Receive/MessageReceiver.swift +++ /dev/null @@ -1,11 +0,0 @@ -import Foundation - -/// A protocol that defines a message receiver. -/// -/// A message receiver is responsible for creating a stream of messages from the agent. -/// It is used to receive messages from the agent and update the message feed. -/// -/// - SeeAlso: ``ReceivedMessage`` -protocol MessageReceiver: Sendable { - func messages() async throws -> AsyncStream -} diff --git a/VoiceAgent/Chat/Receive/TranscriptionDelegateReceiver.swift b/VoiceAgent/Chat/Receive/TranscriptionDelegateReceiver.swift deleted file mode 100644 index 8c9d5d8..0000000 --- a/VoiceAgent/Chat/Receive/TranscriptionDelegateReceiver.swift +++ /dev/null @@ -1,53 +0,0 @@ -import Foundation -import LiveKit - -/// An actor that receives transcription messages from the room and yields them as messages. -/// -/// Room delegate methods are called multiple times for each message, with a stable message ID -/// that can be direcly used for diffing. -/// -/// Example: -/// ``` -/// { id: "1", content: "Hello" } -/// { id: "1", content: "Hello world!" } -/// ``` -@available(*, deprecated, message: "Use TranscriptionStreamReceiver compatible with livekit-agents 1.0") -actor TranscriptionDelegateReceiver: MessageReceiver, RoomDelegate { - private let room: Room - private var continuation: AsyncStream.Continuation? - - init(room: Room) { - self.room = room - room.add(delegate: self) - } - - deinit { - room.remove(delegate: self) - } - - /// Creates a new message stream for the transcription delegate receiver. - func messages() -> AsyncStream { - let (stream, continuation) = AsyncStream.makeStream(of: ReceivedMessage.self) - self.continuation = continuation - return stream - } - - nonisolated func room(_: Room, participant: Participant, trackPublication _: TrackPublication, didReceiveTranscriptionSegments segments: [TranscriptionSegment]) { - segments - .filter { !$0.text.isEmpty } - .forEach { segment in - let message = ReceivedMessage( - id: segment.id, - timestamp: segment.lastReceivedTime, - content: participant.isAgent ? .agentTranscript(segment.text) : .userTranscript(segment.text) - ) - Task { - await yield(message) - } - } - } - - private func yield(_ message: ReceivedMessage) { - continuation?.yield(message) - } -} diff --git a/VoiceAgent/Chat/Receive/TranscriptionStreamReceiver.swift b/VoiceAgent/Chat/Receive/TranscriptionStreamReceiver.swift deleted file mode 100644 index f0bb51a..0000000 --- a/VoiceAgent/Chat/Receive/TranscriptionStreamReceiver.swift +++ /dev/null @@ -1,157 +0,0 @@ -import Foundation -import LiveKit - -/// An actor that converts raw text streams from the LiveKit `Room` into `Message` objects. -/// - Note: Streams are supported by `livekit-agents` >= 1.0.0. -/// - SeeAlso: ``TranscriptionDelegateReceiver`` -/// -/// For agent messages, new text stream is emitted for each message, and the stream is closed when the message is finalized. -/// Each agent message is delivered in chunks, that are accumulated and published into the message stream. -/// -/// For user messages, the full transcription is sent each time, but may be updated until finalized. -/// -/// The ID of the segment is stable and unique across the lifetime of the message. -/// This ID can be used directly for `Identifiable` conformance. -/// -/// Example text stream for agent messages: -/// ``` -/// { segment_id: "1", content: "Hello" } -/// { segment_id: "1", content: " world" } -/// { segment_id: "1", content: "!" } -/// { segment_id: "2", content: "Hello" } -/// { segment_id: "2", content: " Apple" } -/// { segment_id: "2", content: "!" } -/// ``` -/// -/// Example text stream for user messages: -/// ``` -/// { segment_id: "3", content: "Hello" } -/// { segment_id: "3", content: "Hello world!" } -/// { segment_id: "4", content: "Hello" } -/// { segment_id: "4", content: "Hello Apple!" } -/// ``` -/// -/// Example output: -/// ``` -/// Message(id: "1", timestamp: 2025-01-01 12:00:00 +0000, content: .agentTranscript("Hello world!")) -/// Message(id: "2", timestamp: 2025-01-01 12:00:10 +0000, content: .agentTranscript("Hello Apple!")) -/// Message(id: "3", timestamp: 2025-01-01 12:00:20 +0000, content: .userTranscript("Hello world!")) -/// Message(id: "4", timestamp: 2025-01-01 12:00:30 +0000, content: .userTranscript("Hello Apple!")) -/// ``` -/// -actor TranscriptionStreamReceiver: MessageReceiver { - private struct PartialMessageID: Hashable { - let segmentID: String - let participantID: Participant.Identity - } - - private struct PartialMessage { - var content: String - let timestamp: Date - var streamID: String - - mutating func appendContent(_ newContent: String) { - content += newContent - } - - mutating func replaceContent(_ newContent: String, streamID: String) { - content = newContent - self.streamID = streamID - } - } - - private let transcriptionTopic = "lk.transcription" - private enum TranscriptionAttributes: String { - case final = "lk.transcription_final" - case segment = "lk.segment_id" - } - - private let room: Room - - private lazy var partialMessages: [PartialMessageID: PartialMessage] = [:] - - init(room: Room) { - self.room = room - } - - /// Creates a new message stream for the chat topic. - func messages() async throws -> AsyncStream { - let (stream, continuation) = AsyncStream.makeStream(of: ReceivedMessage.self) - - try await room.registerTextStreamHandler(for: transcriptionTopic) { [weak self] reader, participantIdentity in - guard let self else { return } - for try await message in reader where !message.isEmpty { - await continuation.yield(processIncoming(partialMessage: message, reader: reader, participantIdentity: participantIdentity)) - } - } - - continuation.onTermination = { [weak self] _ in - Task { - guard let self else { return } - await self.room.unregisterTextStreamHandler(for: self.transcriptionTopic) - } - } - - return stream - } - - /// Aggregates the incoming text into a message, storing the partial content in the `partialMessages` dictionary. - /// - Note: When the message is finalized, or a new message is started, the dictionary is purged to limit memory usage. - private func processIncoming(partialMessage message: String, reader: TextStreamReader, participantIdentity: Participant.Identity) -> ReceivedMessage { - let segmentID = reader.info.attributes[TranscriptionAttributes.segment.rawValue] ?? reader.info.id - let participantID = participantIdentity - let partialID = PartialMessageID(segmentID: segmentID, participantID: participantID) - - let currentStreamID = reader.info.id - - let timestamp: Date - let updatedContent: String - - if var existingMessage = partialMessages[partialID] { - // Update existing message - if existingMessage.streamID == currentStreamID { - // Same stream, append content - existingMessage.appendContent(message) - } else { - // Different stream for same segment, replace content - existingMessage.replaceContent(message, streamID: currentStreamID) - } - updatedContent = existingMessage.content - timestamp = existingMessage.timestamp - partialMessages[partialID] = existingMessage - } else { - // This is a new message - updatedContent = message - timestamp = reader.info.timestamp - partialMessages[partialID] = PartialMessage( - content: updatedContent, - timestamp: timestamp, - streamID: currentStreamID - ) - cleanupPreviousTurn(participantIdentity, exceptSegmentID: segmentID) - } - - let isFinal = reader.info.attributes[TranscriptionAttributes.final.rawValue] == "true" - if isFinal { - partialMessages[partialID] = nil - } - - let newOrUpdatedMessage = ReceivedMessage( - id: segmentID, - timestamp: timestamp, - content: participantIdentity == room.localParticipant.identity ? .userTranscript(updatedContent) : .agentTranscript(updatedContent) - ) - - return newOrUpdatedMessage - } - - private func cleanupPreviousTurn(_ participantID: Participant.Identity, exceptSegmentID: String) { - let keysToRemove = partialMessages.keys.filter { - $0.participantID == participantID && $0.segmentID != exceptSegmentID - } - - for key in keysToRemove { - partialMessages[key] = nil - } - } -} diff --git a/VoiceAgent/Chat/Send/LocalMessageSender.swift b/VoiceAgent/Chat/Send/LocalMessageSender.swift deleted file mode 100644 index cfad208..0000000 --- a/VoiceAgent/Chat/Send/LocalMessageSender.swift +++ /dev/null @@ -1,40 +0,0 @@ -import Foundation -import LiveKit - -/// An actor that sends local messages to the agent. -/// Currently, it only supports sending text messages. -/// -/// It also serves as the loopback for the local messages, -/// so that they can be displayed in the message feed -/// without relying on the agent-side transcription. -actor LocalMessageSender: MessageSender, MessageReceiver { - private let room: Room - private let topic: String - - private var messageContinuation: AsyncStream.Continuation? - - init(room: Room, topic: String = "lk.chat") { - self.room = room - self.topic = topic - } - - func send(_ message: SentMessage) async throws { - guard case let .userText(text) = message.content else { return } - - try await room.localParticipant.sendText(text, for: topic) - - let loopbackMessage = ReceivedMessage( - id: message.id, - timestamp: message.timestamp, - content: .userTranscript(text) - ) - - messageContinuation?.yield(loopbackMessage) - } - - func messages() async throws -> AsyncStream { - let (stream, continuation) = AsyncStream.makeStream() - messageContinuation = continuation - return stream - } -} diff --git a/VoiceAgent/Chat/Send/MessageSender.swift b/VoiceAgent/Chat/Send/MessageSender.swift deleted file mode 100644 index 9cd39e2..0000000 --- a/VoiceAgent/Chat/Send/MessageSender.swift +++ /dev/null @@ -1,11 +0,0 @@ -import Foundation - -/// A protocol that defines a message sender. -/// -/// A message sender is responsible for sending messages to the agent. -/// It is used to send messages to the agent and update the message feed. -/// -/// - SeeAlso: ``SentMessage`` -protocol MessageSender: Sendable { - func send(_ message: SentMessage) async throws -} diff --git a/VoiceAgent/ControlBar/Devices/AudioDeviceSelector.swift b/VoiceAgent/ControlBar/AudioDeviceSelector.swift similarity index 70% rename from VoiceAgent/ControlBar/Devices/AudioDeviceSelector.swift rename to VoiceAgent/ControlBar/AudioDeviceSelector.swift index c18e9f8..025c05f 100644 --- a/VoiceAgent/ControlBar/Devices/AudioDeviceSelector.swift +++ b/VoiceAgent/ControlBar/AudioDeviceSelector.swift @@ -1,19 +1,20 @@ +import LiveKit import SwiftUI #if os(macOS) /// A platform-specific view that shows a list of available audio devices. struct AudioDeviceSelector: View { - @Environment(AppViewModel.self) private var viewModel + @EnvironmentObject private var localMedia: LocalMedia var body: some View { Menu { - ForEach(viewModel.audioDevices, id: \.deviceId) { device in + ForEach(localMedia.audioDevices, id: \.deviceId) { device in Button { - viewModel.select(audioDevice: device) + localMedia.select(audioDevice: device) } label: { HStack { Text(device.name) - if device.deviceId == viewModel.selectedAudioDeviceID { + if device.deviceId == localMedia.selectedAudioDeviceID { Image(systemName: "checkmark") } } diff --git a/VoiceAgent/ControlBar/ControlBar.swift b/VoiceAgent/ControlBar/ControlBar.swift index 52322e8..34c140b 100644 --- a/VoiceAgent/ControlBar/ControlBar.swift +++ b/VoiceAgent/ControlBar/ControlBar.swift @@ -4,8 +4,14 @@ import LiveKitComponents /// Available controls depend on the agent features and the track availability. /// - SeeAlso: ``AgentFeatures`` struct ControlBar: View { - @Environment(AppViewModel.self) private var viewModel + @EnvironmentObject private var session: Session + @EnvironmentObject private var localMedia: LocalMedia + + @Binding var chat: Bool @Environment(\.horizontalSizeClass) private var horizontalSizeClass + @Environment(\.voiceEnabled) private var voiceEnabled + @Environment(\.videoEnabled) private var videoEnabled + @Environment(\.textEnabled) private var textEnabled private enum Constants { static let buttonWidth: CGFloat = 16 * .grid @@ -15,17 +21,17 @@ struct ControlBar: View { var body: some View { HStack(spacing: .zero) { biggerSpacer() - if viewModel.agentFeatures.contains(.voice) { + if voiceEnabled { audioControls() flexibleSpacer() } - if viewModel.agentFeatures.contains(.video) { + if videoEnabled { videoControls() flexibleSpacer() screenShareButton() flexibleSpacer() } - if viewModel.agentFeatures.contains(.text) { + if textEnabled { textInputButton() flexibleSpacer() } @@ -79,14 +85,14 @@ struct ControlBar: View { private func audioControls() -> some View { HStack(spacing: .zero) { Spacer() - AsyncButton(action: viewModel.toggleMicrophone) { + AsyncButton(action: localMedia.toggleMicrophone) { HStack(spacing: .grid) { - Image(systemName: viewModel.isMicrophoneEnabled ? "microphone.fill" : "microphone.slash.fill") + Image(systemName: localMedia.isMicrophoneEnabled ? "microphone.fill" : "microphone.slash.fill") .transition(.symbolEffect) - BarAudioVisualizer(audioTrack: viewModel.audioTrack, barColor: .fg1, barCount: 3, barSpacingFactor: 0.1) + BarAudioVisualizer(audioTrack: localMedia.microphoneTrack, barColor: .fg1, barCount: 3, barSpacingFactor: 0.1) .frame(width: 2 * .grid, height: 0.5 * Constants.buttonHeight) .frame(maxHeight: .infinity) - .id(viewModel.audioTrack?.id) + .id(localMedia.microphoneTrack?.id) } .frame(height: Constants.buttonHeight) .padding(.horizontal, 2 * .grid) @@ -106,8 +112,10 @@ struct ControlBar: View { private func videoControls() -> some View { HStack(spacing: .zero) { Spacer() - AsyncButton(action: viewModel.toggleCamera) { - Image(systemName: viewModel.isCameraEnabled ? "video.fill" : "video.slash.fill") + AsyncButton { + await localMedia.toggleCamera(disableScreenShare: true) + } label: { + Image(systemName: localMedia.isCameraEnabled ? "video.fill" : "video.slash.fill") .transition(.symbolEffect) .frame(height: Constants.buttonHeight) .padding(.horizontal, 2 * .grid) @@ -121,48 +129,55 @@ struct ControlBar: View { Spacer() } .frame(width: Constants.buttonWidth) - .disabled(viewModel.agent == nil) + .disabled(!session.agent.isConnected) } @ViewBuilder private func screenShareButton() -> some View { - AsyncButton(action: viewModel.toggleScreenShare) { + AsyncButton { + await localMedia.toggleScreenShare(disableCamera: true) + } label: { Image(systemName: "arrow.up.square.fill") .frame(width: Constants.buttonWidth, height: Constants.buttonHeight) .contentShape(Rectangle()) } .buttonStyle( ControlBarButtonStyle( - isToggled: viewModel.isScreenShareEnabled, + isToggled: localMedia.isScreenShareEnabled, foregroundColor: .fg1, backgroundColor: .bg2, borderColor: .separator1 ) ) - .disabled(viewModel.agent == nil) + .disabled(!session.agent.isConnected) } @ViewBuilder private func textInputButton() -> some View { - AsyncButton(action: viewModel.toggleTextInput) { + Button { + chat.toggle() + } label: { Image(systemName: "ellipsis.message.fill") .frame(width: Constants.buttonWidth, height: Constants.buttonHeight) .contentShape(Rectangle()) } .buttonStyle( ControlBarButtonStyle( - isToggled: viewModel.interactionMode == .text, + isToggled: chat, foregroundColor: .fg1, backgroundColor: .bg2, borderColor: .separator1 ) ) - .disabled(viewModel.agent == nil) + .disabled(!session.agent.isConnected) } @ViewBuilder private func disconnectButton() -> some View { - AsyncButton(action: viewModel.disconnect) { + AsyncButton { + await session.end() + session.restoreMessageHistory([]) + } label: { Image(systemName: "phone.down.fill") .frame(width: Constants.buttonWidth, height: Constants.buttonHeight) .contentShape(Rectangle()) @@ -174,11 +189,10 @@ struct ControlBar: View { borderColor: .separatorSerious ) ) - .disabled(viewModel.connectionState == .disconnected) + .disabled(!session.isConnected) } } #Preview { - ControlBar() - .environment(AppViewModel()) + ControlBar(chat: .constant(false)) } diff --git a/VoiceAgent/ControlBar/Devices/VideoDeviceSelector.swift b/VoiceAgent/ControlBar/VideoDeviceSelector.swift similarity index 70% rename from VoiceAgent/ControlBar/Devices/VideoDeviceSelector.swift rename to VoiceAgent/ControlBar/VideoDeviceSelector.swift index 2f78852..e7eff4d 100644 --- a/VoiceAgent/ControlBar/Devices/VideoDeviceSelector.swift +++ b/VoiceAgent/ControlBar/VideoDeviceSelector.swift @@ -1,20 +1,21 @@ import AVFoundation +import LiveKitComponents import SwiftUI #if os(macOS) /// A platform-specific view that shows a list of available video devices. struct VideoDeviceSelector: View { - @Environment(AppViewModel.self) private var viewModel + @EnvironmentObject private var localMedia: LocalMedia var body: some View { Menu { - ForEach(viewModel.videoDevices, id: \.uniqueID) { device in + ForEach(localMedia.videoDevices, id: \.uniqueID) { device in AsyncButton { - await viewModel.select(videoDevice: device) + await localMedia.select(videoDevice: device) } label: { HStack { Text(device.localizedName) - if device.uniqueID == viewModel.selectedVideoDeviceID { + if device.uniqueID == localMedia.selectedVideoDeviceID { Image(systemName: "checkmark") } } diff --git a/VoiceAgent/DI/Dependencies.swift b/VoiceAgent/DI/Dependencies.swift deleted file mode 100644 index dbc7ab9..0000000 --- a/VoiceAgent/DI/Dependencies.swift +++ /dev/null @@ -1,50 +0,0 @@ -import LiveKit - -/// A minimalistic dependency injection container. -/// It allows sharing common dependencies e.g. `Room` between view models and services. -/// - Note: For production apps, consider using a more flexible approach offered by e.g.: -/// - [Factory](https://github.com/hmlongco/Factory) -/// - [swift-dependencies](https://github.com/pointfreeco/swift-dependencies) -/// - [Needle](https://github.com/uber/needle) -@MainActor -final class Dependencies { - static let shared = Dependencies() - - private init() {} - - // MARK: LiveKit - - lazy var room = Room(roomOptions: RoomOptions(defaultScreenShareCaptureOptions: ScreenShareCaptureOptions(useBroadcastExtension: true))) - - // MARK: Services - - lazy var tokenService = TokenService() - - private lazy var localMessageSender = LocalMessageSender(room: room) - lazy var messageSenders: [any MessageSender] = [ - localMessageSender, - ] - lazy var messageReceivers: [any MessageReceiver] = [ - TranscriptionStreamReceiver(room: room), - localMessageSender, - ] - - // MARK: Error - - lazy var errorHandler: (Error?) -> Void = { _ in } -} - -/// A property wrapper that injects a dependency from the ``Dependencies`` container. -@MainActor -@propertyWrapper -struct Dependency { - let keyPath: KeyPath - - init(_ keyPath: KeyPath) { - self.keyPath = keyPath - } - - var wrappedValue: T { - Dependencies.shared[keyPath: keyPath] - } -} diff --git a/VoiceAgent/Error/WarningView.swift b/VoiceAgent/Error/WarningView.swift deleted file mode 100644 index 80e24bc..0000000 --- a/VoiceAgent/Error/WarningView.swift +++ /dev/null @@ -1,34 +0,0 @@ -import SwiftUI - -/// A view that shows a warning snackbar. -struct WarningView: View { - let warning: LocalizedStringKey - - var body: some View { - VStack(spacing: 2 * .grid) { - HStack(spacing: 2 * .grid) { - Image(systemName: "exclamationmark.triangle") - Text("warning.title") - Spacer() - } - .font(.system(size: 15, weight: .semibold)) - - Text(warning) - .font(.system(size: 15)) - .frame(maxWidth: .infinity, alignment: .leading) - } - .padding(3 * .grid) - .foregroundStyle(.fgModerate) - .background(.bgModerate) - .clipShape(RoundedRectangle(cornerRadius: .cornerRadiusSmall)) - .overlay( - RoundedRectangle(cornerRadius: .cornerRadiusSmall) - .stroke(.separatorModerate, lineWidth: 1) - ) - .safeAreaPadding(4 * .grid) - } -} - -#Preview { - WarningView(warning: "Sample warning message") -} diff --git a/VoiceAgent/Helpers/AsyncButton.swift b/VoiceAgent/Helpers/AsyncButton.swift deleted file mode 100644 index 5c26eaa..0000000 --- a/VoiceAgent/Helpers/AsyncButton.swift +++ /dev/null @@ -1,47 +0,0 @@ -import SwiftUI - -/// A drop-in replacement `Button` that executes an async action and shows a busy label when in progress. -/// -/// - Parameters: -/// - action: The async action to execute. -/// - label: The label to show when not busy. -/// - busyLabel: The label to show when busy. Defaults to an empty view. -struct AsyncButton: View { - private let action: () async -> Void - - @ViewBuilder private let label: Label - @ViewBuilder private let busyLabel: BusyLabel - - @State private var isBusy = false - - init( - action: @escaping () async -> Void, - @ViewBuilder label: () -> Label, - @ViewBuilder busyLabel: () -> BusyLabel = EmptyView.init - ) { - self.action = action - self.label = label() - self.busyLabel = busyLabel() - } - - var body: some View { - Button { - isBusy = true - Task { - await action() - isBusy = false - } - } label: { - if isBusy { - if busyLabel is EmptyView { - label - } else { - busyLabel - } - } else { - label - } - } - .disabled(isBusy) - } -} diff --git a/VoiceAgent/Helpers/Environment.swift b/VoiceAgent/Helpers/Environment.swift index 144539d..164a9d9 100644 --- a/VoiceAgent/Helpers/Environment.swift +++ b/VoiceAgent/Helpers/Environment.swift @@ -1,5 +1,8 @@ import SwiftUI extension EnvironmentValues { + @Entry var voiceEnabled: Bool = true + @Entry var videoEnabled: Bool = true + @Entry var textEnabled: Bool = true @Entry var namespace: Namespace.ID? // don't initialize outside View } diff --git a/VoiceAgent/Helpers/ObservableObject+.swift b/VoiceAgent/Helpers/ObservableObject+.swift deleted file mode 100644 index 7b60e10..0000000 --- a/VoiceAgent/Helpers/ObservableObject+.swift +++ /dev/null @@ -1,17 +0,0 @@ -import Combine - -extension ObservableObject { - typealias BufferedObjectWillChangePublisher = Publishers.Buffer - - // This is necessary due to ObservableObjectPublisher not respecting the demand. - // See: https://forums.swift.org/t/asyncpublisher-causes-crash-in-rather-simple-situation - private var bufferedObjectWillChange: BufferedObjectWillChangePublisher { - objectWillChange - .buffer(size: 1, prefetch: .byRequest, whenFull: .dropOldest) - } - - /// A publisher that emits the `objectWillChange` events. - var changes: AsyncPublisher { - bufferedObjectWillChange.values - } -} diff --git a/VoiceAgent/Helpers/VideoTrack+.swift b/VoiceAgent/Helpers/VideoTrack+.swift deleted file mode 100644 index 6c30576..0000000 --- a/VoiceAgent/Helpers/VideoTrack+.swift +++ /dev/null @@ -1,10 +0,0 @@ -import Foundation -import LiveKit - -extension VideoTrack { - /// The aspect ratio of the video track or 1 if the dimensions are not available. - var aspectRatio: CGFloat { - guard let dimensions else { return 1 } - return CGFloat(dimensions.width) / CGFloat(dimensions.height) - } -} diff --git a/VoiceAgent/Helpers/View+.swift b/VoiceAgent/Helpers/ViewModifiers.swift similarity index 76% rename from VoiceAgent/Helpers/View+.swift rename to VoiceAgent/Helpers/ViewModifiers.swift index c2cc263..7767d41 100644 --- a/VoiceAgent/Helpers/View+.swift +++ b/VoiceAgent/Helpers/ViewModifiers.swift @@ -1,16 +1,5 @@ import SwiftUI -/// A view modifier that flips the view upside down. -/// It may be used to create e.g. an inverted List. -/// - SeeAlso: ``ChatView`` -struct UpsideDown: ViewModifier { - func body(content: Content) -> some View { - content - .rotationEffect(.radians(Double.pi)) - .scaleEffect(x: -1, y: 1, anchor: .center) - } -} - /// A view modifier that slightly blurs the top of the view. struct BlurredTop: ViewModifier { func body(content: Content) -> some View { @@ -50,11 +39,6 @@ struct Shimerring: ViewModifier { } extension View { - /// Flips the view upside down. - func upsideDown() -> some View { - modifier(UpsideDown()) - } - /// Blurs the top of the view. func blurredTop() -> some View { modifier(BlurredTop()) diff --git a/VoiceAgent/Interactions/TextInteractionView.swift b/VoiceAgent/Interactions/TextInteractionView.swift index a60f193..3d29b9a 100644 --- a/VoiceAgent/Interactions/TextInteractionView.swift +++ b/VoiceAgent/Interactions/TextInteractionView.swift @@ -1,3 +1,4 @@ +import LiveKit import SwiftUI /// A multiplatform view that shows text-specific interaction controls. @@ -9,7 +10,9 @@ import SwiftUI /// /// Additionally, the view shows a complete chat view with text input capabilities. struct TextInteractionView: View { - @Environment(AppViewModel.self) private var viewModel + @EnvironmentObject private var session: Session + @EnvironmentObject private var localMedia: LocalMedia + @FocusState.Binding var keyboardFocus: Bool var body: some View { @@ -36,13 +39,13 @@ struct TextInteractionView: View { private func participants() -> some View { HStack { Spacer() - AgentParticipantView() - .frame(maxWidth: viewModel.avatarCameraTrack != nil ? 50 * .grid : 25 * .grid) + AgentView() + .frame(maxWidth: session.agent.avatarVideoTrack != nil ? 50 * .grid : 25 * .grid) ScreenShareView() LocalParticipantView() Spacer() } - .frame(height: viewModel.isCameraEnabled || viewModel.isScreenShareEnabled || viewModel.avatarCameraTrack != nil ? 50 * .grid : 25 * .grid) + .frame(height: localMedia.isCameraEnabled || localMedia.isScreenShareEnabled || session.agent.avatarVideoTrack != nil ? 50 * .grid : 25 * .grid) .safeAreaPadding() } } diff --git a/VoiceAgent/Interactions/VisionInteractionView.swift b/VoiceAgent/Interactions/VisionInteractionView.swift index b06159c..2661c57 100644 --- a/VoiceAgent/Interactions/VisionInteractionView.swift +++ b/VoiceAgent/Interactions/VisionInteractionView.swift @@ -3,14 +3,14 @@ import SwiftUI #if os(visionOS) /// A platform-specific view that shows all interaction controls with optional chat. struct VisionInteractionView: View { - @Environment(AppViewModel.self) private var viewModel + var chat: Bool @FocusState.Binding var keyboardFocus: Bool var body: some View { HStack { participants().rotation3DEffect(.degrees(30), axis: .y, anchor: .trailing) agent() - chat().rotation3DEffect(.degrees(-30), axis: .y, anchor: .leading) + chatView().rotation3DEffect(.degrees(-30), axis: .y, anchor: .leading) } } @@ -27,16 +27,16 @@ struct VisionInteractionView: View { @ViewBuilder private func agent() -> some View { - AgentParticipantView() + AgentView() .frame(width: 175 * .grid) .frame(maxHeight: .infinity) .glassBackgroundEffect() } @ViewBuilder - private func chat() -> some View { + private func chatView() -> some View { VStack { - if case .text = viewModel.interactionMode { + if chat { ChatView() ChatTextInputView(keyboardFocus: _keyboardFocus) } diff --git a/VoiceAgent/Interactions/VoiceInteractionView.swift b/VoiceAgent/Interactions/VoiceInteractionView.swift index 942d050..e36f6c6 100644 --- a/VoiceAgent/Interactions/VoiceInteractionView.swift +++ b/VoiceAgent/Interactions/VoiceInteractionView.swift @@ -24,7 +24,7 @@ struct VoiceInteractionView: View { HStack { Spacer() .frame(width: 50 * .grid) - AgentParticipantView() + AgentView() VStack { Spacer() ScreenShareView() @@ -39,7 +39,7 @@ struct VoiceInteractionView: View { @ViewBuilder private func compact() -> some View { ZStack(alignment: .bottom) { - AgentParticipantView() + AgentView() .frame(maxWidth: .infinity, maxHeight: .infinity) .ignoresSafeArea() HStack { diff --git a/VoiceAgent/Participant/AgentParticipantView.swift b/VoiceAgent/Media/AgentView.swift similarity index 72% rename from VoiceAgent/Participant/AgentParticipantView.swift rename to VoiceAgent/Media/AgentView.swift index 1cf12ff..a052a91 100644 --- a/VoiceAgent/Participant/AgentParticipantView.swift +++ b/VoiceAgent/Media/AgentView.swift @@ -3,20 +3,20 @@ import LiveKitComponents /// A view that combines the avatar camera view (if available) /// or the audio visualizer (if available). /// - Note: If both are unavailable, the view will show a placeholder visualizer. -struct AgentParticipantView: View { - @Environment(AppViewModel.self) private var viewModel - @Environment(\.namespace) private var namespace +struct AgentView: View { + @EnvironmentObject private var session: Session + @Environment(\.namespace) private var namespace /// Reveals the avatar camera view when true. @SceneStorage("videoTransition") private var videoTransition = false var body: some View { ZStack { - if let avatarCameraTrack = viewModel.avatarCameraTrack { - SwiftUIVideoView(avatarCameraTrack) + if let avatarVideoTrack = session.agent.avatarVideoTrack { + SwiftUIVideoView(avatarVideoTrack) .clipShape(RoundedRectangle(cornerRadius: .cornerRadiusPerPlatform)) - .aspectRatio(avatarCameraTrack.aspectRatio, contentMode: .fit) - .padding(.horizontal, avatarCameraTrack.aspectRatio == 1 ? 4 * .grid : .zero) + .aspectRatio(avatarVideoTrack.aspectRatio, contentMode: .fit) + .padding(.horizontal, session.agent.avatarVideoTrack?.aspectRatio == 1 ? 4 * .grid : .zero) .shadow(radius: 20, y: 10) .mask( GeometryReader { proxy in @@ -31,15 +31,15 @@ struct AgentParticipantView: View { .onAppear { videoTransition = true } - } else if let agentAudioTrack = viewModel.agentAudioTrack { - BarAudioVisualizer(audioTrack: agentAudioTrack, - agentState: viewModel.agent?.agentState ?? .listening, + } else if let audioTrack = session.agent.audioTrack { + BarAudioVisualizer(audioTrack: audioTrack, + agentState: session.agent.agentState ?? .listening, barCount: 5, barSpacingFactor: 0.05, barMinOpacity: 0.1) .frame(maxWidth: 75 * .grid, maxHeight: 48 * .grid) .transition(.opacity) - } else if viewModel.isInteractive { + } else if session.isConnected { BarAudioVisualizer(audioTrack: nil, agentState: .listening, barCount: 1, @@ -48,7 +48,7 @@ struct AgentParticipantView: View { .transition(.opacity) } } - .animation(.snappy, value: viewModel.agentAudioTrack?.id) + .animation(.snappy, value: session.agent.audioTrack?.id) .matchedGeometryEffect(id: "agent", in: namespace!) } } diff --git a/VoiceAgent/Participant/LocalParticipantView.swift b/VoiceAgent/Media/LocalParticipantView.swift similarity index 82% rename from VoiceAgent/Participant/LocalParticipantView.swift rename to VoiceAgent/Media/LocalParticipantView.swift index 8e4c4d0..6aa524a 100644 --- a/VoiceAgent/Participant/LocalParticipantView.swift +++ b/VoiceAgent/Media/LocalParticipantView.swift @@ -2,19 +2,20 @@ import LiveKitComponents /// A view that shows the local participant's camera view with flip control. struct LocalParticipantView: View { - @Environment(AppViewModel.self) private var viewModel + @EnvironmentObject private var localMedia: LocalMedia + @Environment(\.namespace) private var namespace var body: some View { - if let cameraTrack = viewModel.cameraTrack { + if let cameraTrack = localMedia.cameraTrack { SwiftUIVideoView(cameraTrack) .clipShape(RoundedRectangle(cornerRadius: .cornerRadiusPerPlatform)) .aspectRatio(cameraTrack.aspectRatio, contentMode: .fit) .shadow(radius: 20, y: 10) .transition(.scale.combined(with: .opacity)) .overlay(alignment: .bottomTrailing) { - if viewModel.canSwitchCamera { - AsyncButton(action: viewModel.switchCamera) { + if localMedia.canSwitchCamera { + AsyncButton(action: localMedia.switchCamera) { Image(systemName: "arrow.trianglehead.2.clockwise.rotate.90") .padding(2 * .grid) .foregroundStyle(.fg0) diff --git a/VoiceAgent/Participant/ScreenShareView.swift b/VoiceAgent/Media/ScreenShareView.swift similarity index 82% rename from VoiceAgent/Participant/ScreenShareView.swift rename to VoiceAgent/Media/ScreenShareView.swift index 9774574..43f9693 100644 --- a/VoiceAgent/Participant/ScreenShareView.swift +++ b/VoiceAgent/Media/ScreenShareView.swift @@ -2,11 +2,12 @@ import LiveKitComponents /// A view that shows the screen share preview. struct ScreenShareView: View { - @Environment(AppViewModel.self) private var viewModel + @EnvironmentObject private var localMedia: LocalMedia + @Environment(\.namespace) private var namespace var body: some View { - if let screenShareTrack = viewModel.screenShareTrack { + if let screenShareTrack = localMedia.screenShareTrack { SwiftUIVideoView(screenShareTrack) .clipShape(RoundedRectangle(cornerRadius: .cornerRadiusPerPlatform)) .aspectRatio(screenShareTrack.aspectRatio, contentMode: .fit) diff --git a/VoiceAgent/Participant/AgentListeningView.swift b/VoiceAgent/Participant/AgentListeningView.swift deleted file mode 100644 index f2f9416..0000000 --- a/VoiceAgent/Participant/AgentListeningView.swift +++ /dev/null @@ -1,16 +0,0 @@ -import SwiftUI - -/// A tooltip that indicates that the audio is being recorded -/// e.g. while using pre-connect audio feature to initiate a conversation. -struct AgentListeningView: View { - var body: some View { - Text("agent.listening") - .font(.system(size: 15)) - .shimmering() - .transition(.blurReplace) - } -} - -#Preview { - AgentListeningView() -} diff --git a/VoiceAgent/Start/StartView.swift b/VoiceAgent/Start/StartView.swift index 291c63a3..4712bc4 100644 --- a/VoiceAgent/Start/StartView.swift +++ b/VoiceAgent/Start/StartView.swift @@ -1,10 +1,11 @@ +import LiveKitComponents import SwiftUI /// The initial view that is shown when the app is not connected to the server. struct StartView: View { - @Environment(AppViewModel.self) private var viewModel - @Environment(\.horizontalSizeClass) private var horizontalSizeClass + @EnvironmentObject private var session: Session + @Environment(\.horizontalSizeClass) private var horizontalSizeClass @Namespace private var button var body: some View { @@ -55,7 +56,9 @@ struct StartView: View { @ViewBuilder private func connectButton() -> some View { - AsyncButton(action: viewModel.connect) { + AsyncButton { + await session.start() + } label: { HStack { Spacer() Text("connect.start") @@ -85,5 +88,4 @@ struct StartView: View { #Preview { StartView() - .environment(AppViewModel()) } diff --git a/VoiceAgent/VoiceAgentApp.swift b/VoiceAgent/VoiceAgentApp.swift index 802c0d7..b9de9f0 100644 --- a/VoiceAgent/VoiceAgentApp.swift +++ b/VoiceAgent/VoiceAgentApp.swift @@ -3,13 +3,25 @@ import SwiftUI @main struct VoiceAgentApp: App { - // Create the root view model - private let viewModel = AppViewModel() + // To use the LiveKit Cloud sandbox (development only) + // - Enable your sandbox here https://cloud.livekit.io/projects/p_/sandbox/templates/token-server + // - Create .env.xcconfig with your LIVEKIT_SANDBOX_ID + private static let sandboxID = Bundle.main.object(forInfoDictionaryKey: "LiveKitSandboxId") as! String + + /// For production use, replace the `SandboxTokenSource` with an `EndpointTokenSource` or your own `TokenSourceConfigurable` implementation. + private let session = Session( + tokenSource: SandboxTokenSource(id: Self.sandboxID).cached(), + options: SessionOptions(room: Room(roomOptions: RoomOptions(defaultScreenShareCaptureOptions: ScreenShareCaptureOptions(useBroadcastExtension: true)))) + ) var body: some Scene { WindowGroup { AppView() - .environment(viewModel) + .environmentObject(session) + .environmentObject(LocalMedia(session: session)) + .environment(\.voiceEnabled, true) + .environment(\.videoEnabled, true) + .environment(\.textEnabled, true) } #if os(macOS) .defaultSize(width: 900, height: 900) @@ -21,15 +33,3 @@ struct VoiceAgentApp: App { #endif } } - -/// A set of flags that define the features supported by the agent. -/// Enable them based on your agent capabilities. -struct AgentFeatures: OptionSet { - let rawValue: Int - - static let voice = Self(rawValue: 1 << 0) - static let text = Self(rawValue: 1 << 1) - static let video = Self(rawValue: 1 << 2) - - static let current: Self = [.voice, .text] -} diff --git a/VoiceAgentTests/ChatViewModelTests.swift b/VoiceAgentTests/ChatViewModelTests.swift deleted file mode 100644 index a7f3ad7..0000000 --- a/VoiceAgentTests/ChatViewModelTests.swift +++ /dev/null @@ -1,53 +0,0 @@ -import Testing -@testable import VoiceAgent - -@MainActor -struct ChatViewModelTests { - @Test func multipleReceivers() async throws { - let receiver1 = MockMessageReceiver() - let receiver2 = MockMessageReceiver() - - let message1 = ReceivedMessage( - id: "1", - timestamp: .init(), - content: .userTranscript("Hello") - ) - let message2 = ReceivedMessage( - id: "2", - timestamp: .init(), - content: .agentTranscript("Hi there") - ) - - Dependencies.shared.messageReceivers = [receiver1, receiver2] - let viewModel = ChatViewModel() - - try await Task.sleep(for: .milliseconds(100)) - await receiver1.postMessage(message1) - try await Task.sleep(for: .milliseconds(100)) - await receiver2.postMessage(message2) - try await Task.sleep(for: .milliseconds(100)) - - #expect(viewModel.messages.count == 2) - #expect(viewModel.messages["1"]?.content == .userTranscript("Hello")) - #expect(viewModel.messages["2"]?.content == .agentTranscript("Hi there")) - - let orderedMessages = Array(viewModel.messages.values) - #expect(orderedMessages.count == 2) - #expect(orderedMessages[0].id == "1") - #expect(orderedMessages[1].id == "2") - } -} - -actor MockMessageReceiver: MessageReceiver { - private var continuation: AsyncStream.Continuation? - - func messages() async throws -> AsyncStream { - let (stream, continuation) = AsyncStream.makeStream(of: ReceivedMessage.self) - self.continuation = continuation - return stream - } - - func postMessage(_ message: ReceivedMessage) { - continuation?.yield(message) - } -}