diff --git a/Sources/LiveKit/Audio/DefaultMixerAudioObserver.swift b/Sources/LiveKit/Audio/DefaultMixerAudioObserver.swift new file mode 100644 index 000000000..3d72c7451 --- /dev/null +++ b/Sources/LiveKit/Audio/DefaultMixerAudioObserver.swift @@ -0,0 +1,148 @@ +/* + * Copyright 2025 LiveKit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@preconcurrency import AVFoundation + +#if swift(>=5.9) +internal import LiveKitWebRTC +#else +@_implementationOnly import LiveKitWebRTC +#endif + +public final class DefaultMixerAudioObserver: AudioEngineObserver, Loggable { + public var next: (any AudioEngineObserver)? { + get { _state.next } + set { _state.mutate { $0.next = newValue } } + } + + /// Adjust the volume of captured app audio. Range is 0.0 ~ 1.0. + public var appVolume: Float { + get { _state.read { $0.appMixerNode.outputVolume } } + set { _state.mutate { $0.appMixerNode.outputVolume = newValue } } + } + + /// Adjust the volume of microphone audio. Range is 0.0 ~ 1.0. + public var micVolume: Float { + get { _state.read { $0.micMixerNode.outputVolume } } + set { _state.mutate { $0.micMixerNode.outputVolume = newValue } } + } + + // MARK: - Internal + + var appAudioNode: AVAudioPlayerNode { + _state.read { $0.appNode } + } + + var micAudioNode: AVAudioPlayerNode { + _state.read { $0.micNode } + } + + struct State { + var next: (any AudioEngineObserver)? + + // AppAudio + public let appNode = AVAudioPlayerNode() + public let appMixerNode = AVAudioMixerNode() + + // Not connected for device rendering mode. + public let micNode = AVAudioPlayerNode() + public let micMixerNode = AVAudioMixerNode() + } + + let _state = StateSync(State()) + + public init() {} + + public func setNext(_ handler: any AudioEngineObserver) { + next = handler + } + + public func engineDidCreate(_ engine: AVAudioEngine) { + let (appNode, appMixerNode, micNode, micMixerNode) = _state.read { + ($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode) + } + + engine.attach(appNode) + engine.attach(appMixerNode) + engine.attach(micNode) + engine.attach(micMixerNode) + + // Invoke next + next?.engineDidCreate(engine) + } + + public func engineWillRelease(_ engine: AVAudioEngine) { + // Invoke next + next?.engineWillRelease(engine) + + let (appNode, appMixerNode, micNode, micMixerNode) = _state.read { + ($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode) + } + + engine.detach(appNode) + engine.detach(appMixerNode) + engine.detach(micNode) + engine.detach(micMixerNode) + } + + public func engineWillConnectInput(_ engine: AVAudioEngine, src: AVAudioNode?, dst: AVAudioNode, format: AVAudioFormat, context: [AnyHashable: Any]) { + // Get the main mixer + guard let mainMixerNode = context[kRTCAudioEngineInputMixerNodeKey] as? AVAudioMixerNode else { + // If failed to get main mixer, call next and return. + next?.engineWillConnectInput(engine, src: src, dst: dst, format: format, context: context) + return + } + + // Read nodes from state lock. + let (appNode, appMixerNode, micNode, micMixerNode) = _state.read { + ($0.appNode, $0.appMixerNode, $0.micNode, $0.micMixerNode) + } + + // TODO: Investigate if possible to get this format prior to starting screen capture. + // + let appAudioNodeFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, + sampleRate: format.sampleRate, // Assume same sample rate + channels: 2, + interleaved: false) + + log("Connecting app -> appMixer -> mainMixer") + // appAudio -> appAudioMixer -> mainMixer + engine.connect(appNode, to: appMixerNode, format: appAudioNodeFormat) + engine.connect(appMixerNode, to: mainMixerNode, format: format) + + // src is not null if device rendering mode. + if let src { + log("Connecting src (device) to micMixer -> mainMixer") + // mic (device) -> micMixer -> mainMixer + engine.connect(src, to: micMixerNode, format: format) + } + + // TODO: Investigate if possible to get this format prior to starting screen capture. + let micNodeFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, + sampleRate: format.sampleRate, // Assume same sample rate + channels: 1, // Mono + interleaved: false) + + log("Connecting micAudio (player) to micMixer -> mainMixer") + // mic (player) -> micMixer -> mainMixer + engine.connect(micNode, to: micMixerNode, format: micNodeFormat) + // Always connect micMixer to mainMixer + engine.connect(micMixerNode, to: mainMixerNode, format: format) + + // Invoke next + next?.engineWillConnectInput(engine, src: src, dst: dst, format: format, context: context) + } +} diff --git a/Sources/LiveKit/Convenience/AudioProcessing.swift b/Sources/LiveKit/Convenience/AudioProcessing.swift index 8e9cfc73a..af8cd734c 100644 --- a/Sources/LiveKit/Convenience/AudioProcessing.swift +++ b/Sources/LiveKit/Convenience/AudioProcessing.swift @@ -56,6 +56,17 @@ public extension LKAudioBuffer { } } +public extension CMSampleBuffer { + func toAVAudioPCMBuffer() -> AVAudioPCMBuffer? { + let format = AVAudioFormat(cmAudioFormatDescription: formatDescription!) + let numSamples = AVAudioFrameCount(numSamples) + let pcmBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: numSamples)! + pcmBuffer.frameLength = numSamples + CMSampleBufferCopyPCMDataIntoAudioBufferList(self, at: 0, frameCount: Int32(numSamples), into: pcmBuffer.mutableAudioBufferList) + return pcmBuffer + } +} + public extension AVAudioPCMBuffer { /// Computes Peak and Linear Scale RMS Value (Average) for all channels. func audioLevels() -> [AudioLevel] { diff --git a/Sources/LiveKit/Track/AudioManager.swift b/Sources/LiveKit/Track/AudioManager.swift index 9350474de..df88cbac6 100644 --- a/Sources/LiveKit/Track/AudioManager.swift +++ b/Sources/LiveKit/Track/AudioManager.swift @@ -258,6 +258,8 @@ public class AudioManager: Loggable { _state.mutate { $0.engineObservers = engineObservers } } + public let mixer = DefaultMixerAudioObserver() + // MARK: - For testing var isPlayoutInitialized: Bool { @@ -308,9 +310,9 @@ public class AudioManager: Loggable { init() { #if os(iOS) || os(visionOS) || os(tvOS) - let engineObservers: [any AudioEngineObserver] = [DefaultAudioSessionObserver()] + let engineObservers: [any AudioEngineObserver] = [DefaultAudioSessionObserver(), mixer] #else - let engineObservers: [any AudioEngineObserver] = [] + let engineObservers: [any AudioEngineObserver] = [mixer] #endif _state = StateSync(State(engineObservers: engineObservers)) _admDelegateAdapter.audioManager = self diff --git a/Sources/LiveKit/Track/Capturers/MacOSScreenCapturer.swift b/Sources/LiveKit/Track/Capturers/MacOSScreenCapturer.swift index c92811ab2..b48211fb5 100644 --- a/Sources/LiveKit/Track/Capturers/MacOSScreenCapturer.swift +++ b/Sources/LiveKit/Track/Capturers/MacOSScreenCapturer.swift @@ -98,9 +98,16 @@ public class MacOSScreenCapturer: VideoCapturer { configuration.pixelFormat = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange configuration.showsCursor = options.showCursor + if #available(macOS 13.0, *) { + configuration.capturesAudio = options.appAudio + } + // Why does SCStream hold strong reference to delegate? let stream = SCStream(filter: filter, configuration: configuration, delegate: nil) try stream.addStreamOutput(self, type: .screen, sampleHandlerQueue: nil) + if #available(macOS 13.0, *) { + try stream.addStreamOutput(self, type: .audio, sampleHandlerQueue: nil) + } try await stream.startCapture() _screenCapturerState.mutate { $0.scStream = stream } @@ -200,7 +207,6 @@ extension MacOSScreenCapturer { @available(macOS 12.3, *) extension MacOSScreenCapturer: SCStreamOutput { public func stream(_: SCStream, didOutputSampleBuffer sampleBuffer: CMSampleBuffer, - of outputType: SCStreamOutputType) { guard case .started = captureState else { @@ -211,40 +217,50 @@ extension MacOSScreenCapturer: SCStreamOutput { // Return early if the sample buffer is invalid. guard sampleBuffer.isValid else { return } - guard case .screen = outputType else { return } - - // Retrieve the array of metadata attachments from the sample buffer. - guard let attachmentsArray = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, - createIfNecessary: false) as? [[SCStreamFrameInfo: Any]], - let attachments = attachmentsArray.first else { return } - - // Validate the status of the frame. If it isn't `.complete`, return nil. - guard let statusRawValue = attachments[SCStreamFrameInfo.status] as? Int, - let status = SCFrameStatus(rawValue: statusRawValue), - status == .complete else { return } - - // Retrieve the content rectangle, scale, and scale factor. - guard let contentRectDict = attachments[.contentRect], - let contentRect = CGRect(dictionaryRepresentation: contentRectDict as! CFDictionary), - // let contentScale = attachments[.contentScale] as? CGFloat, - let scaleFactor = attachments[.scaleFactor] as? CGFloat else { return } - - // Schedule resend timer - let newTimer = Task.detached(priority: .utility) { [weak self] in - while true { - try? await Task.sleep(nanoseconds: UInt64(1 * 1_000_000_000)) - if Task.isCancelled { break } - guard let self else { break } - try await self._capturePreviousFrame() + if case .audio = outputType { + if let pcm = sampleBuffer.toAVAudioPCMBuffer() { + let node = AudioManager.shared.mixer.appAudioNode + if let engine = node.engine, engine.isRunning { + node.scheduleBuffer(pcm) + if !node.isPlaying { + node.play() + } + } + } + } else if case .screen = outputType { + // Retrieve the array of metadata attachments from the sample buffer. + guard let attachmentsArray = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, + createIfNecessary: false) as? [[SCStreamFrameInfo: Any]], + let attachments = attachmentsArray.first else { return } + + // Validate the status of the frame. If it isn't `.complete`, return nil. + guard let statusRawValue = attachments[SCStreamFrameInfo.status] as? Int, + let status = SCFrameStatus(rawValue: statusRawValue), + status == .complete else { return } + + // Retrieve the content rectangle, scale, and scale factor. + guard let contentRectDict = attachments[.contentRect], + let contentRect = CGRect(dictionaryRepresentation: contentRectDict as! CFDictionary), + // let contentScale = attachments[.contentScale] as? CGFloat, + let scaleFactor = attachments[.scaleFactor] as? CGFloat else { return } + + // Schedule resend timer + let newTimer = Task.detached(priority: .utility) { [weak self] in + while true { + try? await Task.sleep(nanoseconds: UInt64(1 * 1_000_000_000)) + if Task.isCancelled { break } + guard let self else { break } + try await self._capturePreviousFrame() + } } - } - _screenCapturerState.mutate { - $0.resendTimer?.cancel() - $0.resendTimer = newTimer - } + _screenCapturerState.mutate { + $0.resendTimer?.cancel() + $0.resendTimer = newTimer + } - capture(sampleBuffer, contentRect: contentRect, scaleFactor: scaleFactor) + capture(sampleBuffer, contentRect: contentRect, scaleFactor: scaleFactor) + } } } diff --git a/Sources/LiveKit/Types/Options/ScreenShareCaptureOptions.swift b/Sources/LiveKit/Types/Options/ScreenShareCaptureOptions.swift index 39c7d5e31..32a0e1f57 100644 --- a/Sources/LiveKit/Types/Options/ScreenShareCaptureOptions.swift +++ b/Sources/LiveKit/Types/Options/ScreenShareCaptureOptions.swift @@ -28,6 +28,9 @@ public final class ScreenShareCaptureOptions: NSObject, VideoCaptureOptions, Sen @objc public let showCursor: Bool + @objc + public let appAudio: Bool + /// Use broadcast extension for screen capture (iOS only). /// /// If a broadcast extension has been properly configured, this defaults to `true`. @@ -49,12 +52,14 @@ public final class ScreenShareCaptureOptions: NSObject, VideoCaptureOptions, Sen public init(dimensions: Dimensions = .h1080_169, fps: Int = 30, showCursor: Bool = true, + appAudio: Bool = true, useBroadcastExtension: Bool = defaultToBroadcastExtension, includeCurrentApplication: Bool = false) { self.dimensions = dimensions self.fps = fps self.showCursor = showCursor + self.appAudio = appAudio self.useBroadcastExtension = useBroadcastExtension self.includeCurrentApplication = includeCurrentApplication } @@ -66,6 +71,7 @@ public final class ScreenShareCaptureOptions: NSObject, VideoCaptureOptions, Sen return dimensions == other.dimensions && fps == other.fps && showCursor == other.showCursor && + appAudio == other.appAudio && useBroadcastExtension == other.useBroadcastExtension && includeCurrentApplication == other.includeCurrentApplication } @@ -75,6 +81,7 @@ public final class ScreenShareCaptureOptions: NSObject, VideoCaptureOptions, Sen hasher.combine(dimensions) hasher.combine(fps) hasher.combine(showCursor) + hasher.combine(appAudio) hasher.combine(useBroadcastExtension) hasher.combine(includeCurrentApplication) return hasher.finalize()