diff --git a/starboard/android/shared/BUILD.gn b/starboard/android/shared/BUILD.gn index 52040d577ebf..5b4bb03dde04 100644 --- a/starboard/android/shared/BUILD.gn +++ b/starboard/android/shared/BUILD.gn @@ -393,6 +393,18 @@ static_library("starboard_platform") { if (sb_evergreen_compatible_use_libunwind) { deps += [ "//third_party/llvm-project/libunwind:unwind_starboard" ] } + + defines = [] + if (enable_iamf_decode) { + sources += [ + "//starboard/shared/libiamf/iamf_audio_decoder.cc", + "//starboard/shared/libiamf/iamf_audio_decoder.h", + "//starboard/shared/libiamf/iamf_buffer_parser.cc", + "//starboard/shared/libiamf/iamf_buffer_parser.h", + ] + + defines += [ "ENABLE_IAMF_DECODE" ] + } } static_library("starboard_base_symbolize") { diff --git a/starboard/android/shared/media_common.h b/starboard/android/shared/media_common.h index dcbd0b36d108..4a2b6b86731c 100644 --- a/starboard/android/shared/media_common.h +++ b/starboard/android/shared/media_common.h @@ -61,6 +61,11 @@ inline const char* SupportedAudioCodecToMimeType( if (audio_codec == kSbMediaAudioCodecOpus) { return "audio/opus"; } +#if SB_API_VERSION >= 15 + if (audio_codec == kSbMediaAudioCodecIamf) { + return "audio/iamf"; + } +#endif // SB_API_VERSION >= 15 return nullptr; } diff --git a/starboard/android/shared/media_is_audio_supported.cc b/starboard/android/shared/media_is_audio_supported.cc index 023cbfc5f8fb..364a093210da 100644 --- a/starboard/android/shared/media_is_audio_supported.cc +++ b/starboard/android/shared/media_is_audio_supported.cc @@ -60,6 +60,12 @@ bool SbMediaIsAudioSupported(SbMediaAudioCodec audio_codec, return true; } +#if SB_API_VERSION >= 15 && ENABLE_IAMF_DECODE + if (audio_codec == kSbMediaAudioCodecIamf) { + return true; + } +#endif // SB_API_VERSION >= 15 && ENABLE_IAMF_DECODE + bool media_codec_supported = MediaCapabilitiesCache::GetInstance()->HasAudioDecoderFor(mime, bitrate); diff --git a/starboard/android/shared/platform_configuration/BUILD.gn b/starboard/android/shared/platform_configuration/BUILD.gn index f638914483e8..2885fccd10f5 100644 --- a/starboard/android/shared/platform_configuration/BUILD.gn +++ b/starboard/android/shared/platform_configuration/BUILD.gn @@ -175,6 +175,9 @@ config("platform_configuration") { "-Wl,--wrap=readdir_r", ] } + if (enable_iamf_decode) { + configs += [ ":libiamf_config" ] + } } config("size") { @@ -217,3 +220,9 @@ config("pedantic_warnings") { "-Wno-unused-parameter", ] } + +if (enable_iamf_decode) { + config("libiamf_config") { + libs = [ "//third_party/libiamf/platforms/android/libiamf.a" ] + } +} diff --git a/starboard/android/shared/player_components_factory.h b/starboard/android/shared/player_components_factory.h index 0b8f6cb0cd2f..c1ed5b50dc9e 100644 --- a/starboard/android/shared/player_components_factory.h +++ b/starboard/android/shared/player_components_factory.h @@ -49,6 +49,10 @@ #include "starboard/shared/starboard/player/filter/video_renderer_internal_impl.h" #include "starboard/shared/starboard/player/filter/video_renderer_sink.h" +#if ENABLE_IAMF_DECODE +#include "starboard/shared/libiamf/iamf_audio_decoder.h" +#endif // ENABLE_IAMF_DECODE + namespace starboard { namespace android { namespace shared { @@ -445,6 +449,18 @@ class PlayerComponentsFactory : public starboard::shared::starboard::player:: return std::unique_ptr( std::move(audio_decoder_impl)); } +#if SB_API_VERSION >= 15 && ENABLE_IAMF_DECODE + } else if (audio_stream_info.codec == kSbMediaAudioCodecIamf) { + SB_LOG(INFO) << "Creating IAMF audio decoder"; + std::unique_ptr + audio_decoder_impl( + new starboard::shared::libiamf::IamfAudioDecoder( + audio_stream_info)); + if (audio_decoder_impl->is_valid()) { + return std::unique_ptr( + std::move(audio_decoder_impl)); + } +#endif // SB_API_VERSION >= 15 && ENABLE_IAMF_DECODE } else { SB_LOG(ERROR) << "Unsupported audio codec " << audio_stream_info.codec; diff --git a/starboard/android/shared/player_create.cc b/starboard/android/shared/player_create.cc index 2ead14761a61..43a6f82495ea 100644 --- a/starboard/android/shared/player_create.cc +++ b/starboard/android/shared/player_create.cc @@ -119,7 +119,11 @@ SbPlayer SbPlayerCreate(SbWindow window, audio_codec != kSbMediaAudioCodecAac && audio_codec != kSbMediaAudioCodecAc3 && audio_codec != kSbMediaAudioCodecEac3 && - audio_codec != kSbMediaAudioCodecOpus) { + audio_codec != kSbMediaAudioCodecOpus +#if SB_API_VERSION >= 15 + && audio_codec != kSbMediaAudioCodecIamf +#endif // SB_API_VERSION >= 15 + ) { SB_LOG(ERROR) << "Unsupported audio codec: " << starboard::GetMediaAudioCodecName(audio_codec) << "."; player_error_func( diff --git a/starboard/build/config/BUILDCONFIG.gn b/starboard/build/config/BUILDCONFIG.gn index 5f2c51966a9f..95b420b641b7 100644 --- a/starboard/build/config/BUILDCONFIG.gn +++ b/starboard/build/config/BUILDCONFIG.gn @@ -40,6 +40,8 @@ declare_args() { build_with_separate_cobalt_toolchain = false use_xcode_clang = false + + enable_iamf_decode = false } _is_on_pythonpath = exec_script("//starboard/build/is_on_path.py", [], "json") diff --git a/starboard/linux/shared/BUILD.gn b/starboard/linux/shared/BUILD.gn index c39d975fdd66..c3a05f3cb0c2 100644 --- a/starboard/linux/shared/BUILD.gn +++ b/starboard/linux/shared/BUILD.gn @@ -355,6 +355,17 @@ static_library("starboard_platform_sources") { if (is_debug || is_devel) { defines += [ "SB_PLAYER_ENABLE_VIDEO_DUMPER" ] } + + if (enable_iamf_decode) { + sources += [ + "//starboard/shared/libiamf/iamf_audio_decoder.cc", + "//starboard/shared/libiamf/iamf_audio_decoder.h", + "//starboard/shared/libiamf/iamf_buffer_parser.cc", + "//starboard/shared/libiamf/iamf_buffer_parser.h", + ] + + defines += [ "ENABLE_IAMF_DECODE" ] + } } if (current_toolchain == starboard_toolchain) { diff --git a/starboard/linux/shared/media_is_audio_supported.cc b/starboard/linux/shared/media_is_audio_supported.cc index ce2fbdff69a1..ea9cf3369fb9 100644 --- a/starboard/linux/shared/media_is_audio_supported.cc +++ b/starboard/linux/shared/media_is_audio_supported.cc @@ -32,6 +32,12 @@ bool SbMediaIsAudioSupported(SbMediaAudioCodec audio_codec, return bitrate <= kSbMediaMaxAudioBitrateInBitsPerSecond; } +#if SB_API_VERSION >= 15 && ENABLE_IAMF_DECODE + if (audio_codec == kSbMediaAudioCodecIamf) { + return bitrate <= kSbMediaMaxAudioBitrateInBitsPerSecond; + } +#endif // SB_API_VERSION >= 15 + if (audio_codec == kSbMediaAudioCodecAc3 || audio_codec == kSbMediaAudioCodecEac3) { return bitrate <= kSbMediaMaxAudioBitrateInBitsPerSecond; diff --git a/starboard/linux/shared/player_components_factory.cc b/starboard/linux/shared/player_components_factory.cc index 67d98405b87d..3b9d0b46087e 100644 --- a/starboard/linux/shared/player_components_factory.cc +++ b/starboard/linux/shared/player_components_factory.cc @@ -41,6 +41,10 @@ #include "starboard/shared/starboard/player/filter/video_render_algorithm_impl.h" #include "starboard/shared/starboard/player/filter/video_renderer_sink.h" +#if ENABLE_IAMF_DECODE +#include "starboard/shared/libiamf/iamf_audio_decoder.h" +#endif // ENABLE_IAMF_DECODE + namespace starboard { namespace shared { namespace starboard { @@ -86,6 +90,13 @@ class PlayerComponentsFactory : public PlayerComponents::Factory { libfdkaac::LibfdkaacHandle::GetHandle()->IsLoaded()) { SB_LOG(INFO) << "Playing audio using FdkAacAudioDecoder."; return std::unique_ptr(new FdkAacAudioDecoder()); +#if SB_API_VERSION >= 15 && ENABLE_IAMF_DECODE + } else if (audio_stream_info.codec == kSbMediaAudioCodecIamf) { + SB_LOG(INFO) << "Playing audio using IamfAudioDecoder."; + return std::unique_ptr( + new ::starboard::shared::libiamf::IamfAudioDecoder( + audio_stream_info)); +#endif // SB_API_VERSION >= 15 && ENABLE_IAMF_DECODE } else { std::unique_ptr audio_decoder_impl( FfmpegAudioDecoder::Create(audio_stream_info)); diff --git a/starboard/linux/x64x11/shared/platform_configuration/BUILD.gn b/starboard/linux/x64x11/shared/platform_configuration/BUILD.gn index 66eedfc23e3f..39fed6b2bce2 100644 --- a/starboard/linux/x64x11/shared/platform_configuration/BUILD.gn +++ b/starboard/linux/x64x11/shared/platform_configuration/BUILD.gn @@ -30,6 +30,9 @@ config("platform_configuration") { config("libraries") { configs = [ "//starboard/linux/shared/platform_configuration:libraries" ] + if (enable_iamf_decode) { + libs = [ "//third_party/libiamf/platforms/linux/libiamf.a" ] + } } config("linker_flags") { diff --git a/starboard/shared/libiamf/iamf_audio_decoder.cc b/starboard/shared/libiamf/iamf_audio_decoder.cc new file mode 100644 index 000000000000..43db908041b0 --- /dev/null +++ b/starboard/shared/libiamf/iamf_audio_decoder.cc @@ -0,0 +1,337 @@ +// Copyright 2024 The Cobalt Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "starboard/shared/libiamf/iamf_audio_decoder.h" + +#include + +#include "starboard/common/string.h" +#include "third_party/libiamf/source/code/include/IAMF_defines.h" + +namespace starboard { +namespace shared { +namespace libiamf { + +namespace { +using shared::starboard::player::DecodedAudio; + +constexpr int kForceBinauralAudio = false; +constexpr int kForce6ChannelAudio = false; +constexpr int kForce8ChannelAudio = false; + +std::string ErrorCodeToString(int code) { + switch (code) { + case IAMF_OK: + return "IAMF_OK"; + case IAMF_ERR_BAD_ARG: + return "IAMF_ERR_BAD_ARG"; + case IAMF_ERR_BUFFER_TOO_SMALL: + return "IAMF_ERR_BUFFER_TOO_SMALL"; + case IAMF_ERR_INTERNAL: + return "IAMF_ERR_INTERNAL"; + case IAMF_ERR_INVALID_PACKET: + return "IAMF_ERR_INVALID_PACKET"; + case IAMF_ERR_INVALID_STATE: + return "IAMF_ERR_INVALID_STATE"; + case IAMF_ERR_UNIMPLEMENTED: + return "IAMF_ERR_UNIMPLEMENTED"; + case IAMF_ERR_ALLOC_FAIL: + return "IAMF_ERR_ALLOC_FAIL"; + default: + return "Unknown IAMF error code " + std::to_string(code); + } +} +} // namespace + +IamfAudioDecoder::IamfAudioDecoder(const AudioStreamInfo& audio_stream_info) + : audio_stream_info_(audio_stream_info) { + decoder_ = IAMF_decoder_open(); + if (!decoder_) { + SB_LOG(ERROR) << "Error creating libiamf decoder"; + } +} + +IamfAudioDecoder::~IamfAudioDecoder() { + TeardownDecoder(); +} + +bool IamfAudioDecoder::is_valid() const { + return decoder_ != NULL; +} + +void IamfAudioDecoder::Initialize(const OutputCB& output_cb, + const ErrorCB& error_cb) { + SB_DCHECK(BelongsToCurrentThread()); + SB_DCHECK(output_cb); + SB_DCHECK(!output_cb_); + SB_DCHECK(error_cb); + SB_DCHECK(!error_cb_); + + output_cb_ = output_cb; + error_cb_ = error_cb; +} + +void IamfAudioDecoder::Decode(const InputBuffers& input_buffers, + const ConsumedCB& consumed_cb) { + SB_DCHECK(BelongsToCurrentThread()); + SB_DCHECK(!input_buffers.empty()); + SB_DCHECK(pending_audio_buffers_.empty()); + SB_DCHECK(output_cb_); + + if (stream_ended_) { + SB_LOG(ERROR) << "Decode() is called after WriteEndOfStream() is called."; + return; + } + + for (const auto& input_buffer : input_buffers) { + if (!DecodeInternal(input_buffer)) { + return; + } + } + Schedule(consumed_cb); +} + +bool IamfAudioDecoder::DecodeInternal( + const scoped_refptr& input_buffer) { + SB_DCHECK(BelongsToCurrentThread()); + SB_DCHECK(input_buffer); + SB_DCHECK(output_cb_); + SB_DCHECK(!stream_ended_ || !pending_audio_buffers_.empty()); + SB_DCHECK(is_valid()); + + if (input_buffer->size() == 0) { + SB_LOG(ERROR) << "Empty input buffer written to IamfAudioDecoder"; + return false; + } + + IamfBufferParser::IamfBufferInfo info; + IamfBufferParser().ParseInputBuffer( + input_buffer, &info, kForceBinauralAudio, + kForce6ChannelAudio | kForce8ChannelAudio); + if (!info.is_valid()) { + ReportError("Failed to parse IA Descriptors"); + return false; + } + if (!decoder_is_configured_) { + if (!ConfigureDecoder(&info, input_buffer->timestamp())) { + return false; + } + } + + scoped_refptr decoded_audio = new DecodedAudio( + audio_stream_info_.number_of_channels, GetSampleType(), + kSbMediaAudioFrameStorageTypeInterleaved, input_buffer->timestamp(), + audio_stream_info_.number_of_channels * info.num_samples * + starboard::media::GetBytesPerSample(GetSampleType())); + int samples_decoded = + IAMF_decoder_decode(decoder_, info.data.data(), info.data_size, nullptr, + reinterpret_cast(decoded_audio->data())); + if (samples_decoded < 1) { + ReportError("Failed to decode IAMF sample, error " + + ErrorCodeToString(samples_decoded)); + return false; + } + + SB_DCHECK(samples_decoded <= info.num_samples) + << "Samples decoded (" << samples_decoded + << ") is greater than the number of samples indicated by the stream (" + << info.num_samples << ")"; + + if (samples_per_second_ == 0) { + samples_per_second_ = info.sample_rate; + } + + // TODO: Enable partial audio once float32 pcm output is available. + const auto& sample_info = input_buffer->audio_sample_info(); + decoded_audio->AdjustForDiscardedDurations( + samples_per_second_, sample_info.discarded_duration_from_front, + sample_info.discarded_duration_from_back); + + decoded_audios_.push(decoded_audio); + + output_cb_(); + + return true; +} + +void IamfAudioDecoder::WriteEndOfStream() { + SB_DCHECK(BelongsToCurrentThread()); + SB_DCHECK(output_cb_); + + stream_ended_ = true; + if (!pending_audio_buffers_.empty()) { + return; + } + + // Put EOS into the queue. + decoded_audios_.push(new DecodedAudio); + + Schedule(output_cb_); +} + +bool IamfAudioDecoder::ConfigureDecoder(IamfBufferInfo* info, + int64_t timestamp) { + SB_DCHECK(is_valid()); + SB_DCHECK(!decoder_is_configured_); + + // TODO: libiamf has an issue outputting 32 bit float samples, set to 16 bit + // for now. + int error = IAMF_decoder_set_bit_depth(decoder_, 16); + if (error != IAMF_OK) { + ReportError("IAMF_decoder_set_bit_depth() fails with error " + + ErrorCodeToString(error)); + return false; + } + + error = IAMF_decoder_set_sampling_rate(decoder_, info->sample_rate); + if (error != IAMF_OK) { + ReportError("IAMF_decoder_set_sampling_rate() fails with error " + + ErrorCodeToString(error)); + return false; + } + + if (kForceBinauralAudio) { + SB_LOG(INFO) << "Configuring IamfAudioDecoder for binaural output"; + error = IAMF_decoder_output_layout_set_binaural(decoder_); + if (error != IAMF_OK) { + ReportError( + "IAMF_decoder_output_layout_set_binaural() fails with error " + + ErrorCodeToString(error)); + return false; + } + } else { + IAMF_SoundSystem sound_system = SOUND_SYSTEM_A; + if (kForce6ChannelAudio) { + SB_LOG(INFO) << "Configuring IamfAudioDecoder for 5.1 output"; + sound_system = SOUND_SYSTEM_B; + } else if (kForce8ChannelAudio) { + SB_LOG(INFO) << "Configuring IamfAudioDecoder for 7.1 output"; + sound_system = SOUND_SYSTEM_C; + } else { + SB_LOG(INFO) << "Configuring IamfAudioDecoder for stereo output"; + } + + error = IAMF_decoder_output_layout_set_sound_system(decoder_, sound_system); + if (error != IAMF_OK) { + ReportError( + "IAMF_decoder_output_layout_set_sound_system() fails with error " + + ErrorCodeToString(error)); + return false; + } + } + + // Time base is set to 90000, as it is in the iamfplayer example + // https://github.com/AOMediaCodec/libiamf/blob/v1.0.0-errata/code/test/tools/iamfplayer/player/iamfplayer.c#L450 + error = IAMF_decoder_set_pts(decoder_, timestamp, 90000); + if (error != IAMF_OK) { + ReportError("IAMF_decoder_set_pts() fails with error " + + ErrorCodeToString(error)); + return false; + } + + error = IAMF_decoder_set_mix_presentation_id( + decoder_, info->mix_presentation_id.value()); + if (error != IAMF_OK) { + ReportError("IAMF_decoder_set_mix_presentation_id() fails with error " + + ErrorCodeToString(error)); + return false; + } + + error = IAMF_decoder_peak_limiter_enable(decoder_, 0); + if (error != IAMF_OK) { + ReportError("IAMF_decoder_peak_limiter_enable() fails with error " + + ErrorCodeToString(error)); + return false; + } + + error = IAMF_decoder_set_normalization_loudness(decoder_, .0f); + if (error != IAMF_OK) { + ReportError("IAMF_decoder_set_normalization_loudness() fails with error " + + ErrorCodeToString(error)); + return false; + } + + error = IAMF_decoder_configure(decoder_, info->config_obus.data(), + info->config_obus_size, nullptr); + if (error != IAMF_OK) { + ReportError("IAMF_decoder_configure() fails with error " + + ErrorCodeToString(error)); + return false; + } + + decoder_is_configured_ = true; + + return true; +} + +void IamfAudioDecoder::TeardownDecoder() { + if (is_valid()) { + IAMF_decoder_close(decoder_); + decoder_ = NULL; + } +} + +scoped_refptr IamfAudioDecoder::Read( + int* samples_per_second) { + SB_DCHECK(BelongsToCurrentThread()); + SB_DCHECK(output_cb_); + SB_DCHECK(!decoded_audios_.empty()); + SB_DCHECK(samples_per_second_ > 0); + + scoped_refptr result; + if (!decoded_audios_.empty()) { + result = decoded_audios_.front(); + decoded_audios_.pop(); + } + *samples_per_second = samples_per_second_; + return result; +} + +void IamfAudioDecoder::Reset() { + SB_DCHECK(BelongsToCurrentThread()); + + if (is_valid()) { + TeardownDecoder(); + decoder_ = IAMF_decoder_open(); + } + + decoder_is_configured_ = false; + + stream_ended_ = false; + while (!decoded_audios_.empty()) { + decoded_audios_.pop(); + } + pending_audio_buffers_.clear(); + consumed_cb_ = nullptr; + + CancelPendingJobs(); +} + +SbMediaAudioSampleType IamfAudioDecoder::GetSampleType() const { + SB_DCHECK(BelongsToCurrentThread()); +#if SB_API_VERSION <= 15 && SB_HAS_QUIRK(SUPPORT_INT16_AUDIO_SAMPLES) + return kSbMediaAudioSampleTypeInt16; +#endif // SB_API_VERSION <= 15 && SB_HAS_QUIRK(SUPPORT_INT16_AUDIO_SAMPLES) + return kSbMediaAudioSampleTypeInt16Deprecated; +} + +void IamfAudioDecoder::ReportError(const std::string& message) const { + SB_DCHECK(error_cb_); + SB_LOG(ERROR) << "IamfAudioDecoder error: " << message; + error_cb_(kSbPlayerErrorDecode, message); +} + +} // namespace libiamf +} // namespace shared +} // namespace starboard diff --git a/starboard/shared/libiamf/iamf_audio_decoder.h b/starboard/shared/libiamf/iamf_audio_decoder.h new file mode 100644 index 000000000000..6f457d548b90 --- /dev/null +++ b/starboard/shared/libiamf/iamf_audio_decoder.h @@ -0,0 +1,85 @@ +// Copyright 2024 The Cobalt Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef STARBOARD_SHARED_LIBIAMF_IAMF_AUDIO_DECODER_H_ +#define STARBOARD_SHARED_LIBIAMF_IAMF_AUDIO_DECODER_H_ + +#include +#include +#include + +#include "starboard/common/ref_counted.h" +#include "starboard/media.h" +#include "starboard/shared/internal_only.h" +#include "starboard/shared/libiamf/iamf_buffer_parser.h" +#include "starboard/shared/starboard/media/media_util.h" +#include "starboard/shared/starboard/player/decoded_audio_internal.h" +#include "starboard/shared/starboard/player/filter/audio_decoder_internal.h" +#include "starboard/shared/starboard/player/job_queue.h" +#include "third_party/libiamf/source/code/include/IAMF_decoder.h" + +namespace starboard { +namespace shared { +namespace libiamf { + +class IamfAudioDecoder + : public ::starboard::shared::starboard::player::filter::AudioDecoder, + private starboard::player::JobQueue::JobOwner { + public: + typedef starboard::media::AudioStreamInfo AudioStreamInfo; + typedef shared::libiamf::IamfBufferParser::IamfBufferInfo IamfBufferInfo; + + explicit IamfAudioDecoder(const AudioStreamInfo& audio_stream_info); + ~IamfAudioDecoder() override; + + bool is_valid() const; + + // AudioDecoder functions + void Initialize(const OutputCB& output_cb, const ErrorCB& error_cb) override; + void Decode(const InputBuffers& input_buffers, + const ConsumedCB& consumed_cb) override; + void WriteEndOfStream() override; + scoped_refptr Read(int* samples_per_second) override; + void Reset() override; + + private: + static constexpr int kMinimumBuffersToDecode = 2; + static constexpr int kDefaultSampleRate = 48000; + + bool ConfigureDecoder(IamfBufferInfo* info, int64_t timestamp); + void TeardownDecoder(); + bool DecodeInternal(const scoped_refptr& input_buffer); + + SbMediaAudioSampleType GetSampleType() const; + + void ReportError(const std::string& message) const; + + OutputCB output_cb_; + ErrorCB error_cb_; + ConsumedCB consumed_cb_; + + IAMF_Decoder* decoder_ = nullptr; + bool stream_ended_ = false; + std::queue> decoded_audios_; + AudioStreamInfo audio_stream_info_; + bool decoder_is_configured_ = false; + std::deque> pending_audio_buffers_; + int samples_per_second_ = 0; +}; + +} // namespace libiamf +} // namespace shared +} // namespace starboard + +#endif // STARBOARD_SHARED_LIBIAMF_IAMF_AUDIO_DECODER_H_ diff --git a/starboard/shared/libiamf/iamf_buffer_parser.cc b/starboard/shared/libiamf/iamf_buffer_parser.cc new file mode 100644 index 000000000000..02cd04bef7a6 --- /dev/null +++ b/starboard/shared/libiamf/iamf_buffer_parser.cc @@ -0,0 +1,588 @@ +// Copyright 2024 The Cobalt Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "starboard/shared/libiamf/iamf_buffer_parser.h" + +#include +#include +#include + +#include "third_party/libiamf/source/code/include/IAMF_defines.h" + +namespace starboard { +namespace shared { +namespace libiamf { + +namespace { + +// From //media/formats/mp4/rcheck.h. +#define RCHECK(condition) \ + do { \ + if (!(condition)) { \ + SB_DLOG(ERROR) << "Failure while parsing IAMF config: " #condition; \ + return false; \ + } \ + } while (0) + +// From +// https://aomediacodec.github.io/iamf/v1.0.0-errata.html#obu-header-syntax. +constexpr int kObuTypeCodecConfig = 0; +constexpr int kObuTypeAudioElement = 1; +constexpr int kObuTypeMixPresentation = 2; +constexpr int kObuTypeSequenceHeader = 31; + +// From +// https://aomediacodec.github.io/iamf/v1.0.0-errata.html#obu-codecconfig. +constexpr int kFourccOpus = 0x4f707573; +constexpr int kFourccIpcm = 0x6970636d; + +} // namespace + +class BufferReader { + public: + BufferReader(const uint8_t* buf, size_t size) + : buf_(buf), pos_(0), size_(size) { +#if SB_IS_BIG_ENDIAN +#error BufferReader assumes little-endianness. +#endif // SB_IS_BIG_ENDIAN + } + + bool Read1(uint8_t* ptr) { + if (!HasBytes(sizeof(uint8_t)) || !ptr) { + return false; + } + *ptr = buf_[pos_++]; + return true; + } + + bool Read4(uint32_t* ptr) { + if (!HasBytes(sizeof(uint32_t)) || !ptr) { + return false; + } + std::memcpy(ptr, &buf_[pos_], sizeof(uint32_t)); + *ptr = ByteSwap(*ptr); + pos_ += sizeof(uint32_t); + return true; + } + + bool ReadLeb128(uint32_t* ptr) { + if (!HasBytes(sizeof(uint32_t)) || !ptr) { + return false; + } + int bytes_read = ReadLeb128Internal(buf_ + pos_, ptr); + if (bytes_read < 0) { + return false; + } + pos_ += bytes_read; + return true; + } + + bool ReadString(std::string* str) { + int bytes_read = ReadStringInternal(buf_ + pos_, str); + if (bytes_read < 0) { + return false; + } + pos_ += bytes_read; + return true; + } + + bool SkipBytes(size_t size) { + if (!HasBytes(size)) { + return false; + } + pos_ += size; + return true; + } + + bool SkipLeb128() { + uint32_t val; + return ReadLeb128(&val); + } + + bool SkipString() { + std::string str; + return ReadString(&str); + } + + size_t size() const { return size_; } + size_t pos() const { return pos_; } + const uint8_t* buf() const { return buf_; } + + private: + bool HasBytes(size_t size) const { return size + pos_ <= size_; } + inline uint32_t ByteSwap(uint32_t x) { +#if defined(COMPILER_MSVC) + return _byteswap_ulong(x); +#else + return __builtin_bswap32(x); +#endif + } + + // Decodes an Leb128 value and stores it in |value|. Returns the number of + // bytes read, capped to sizeof(uint32_t). Returns the number of bytes read, + // or -1 on error. + int ReadLeb128Internal(const uint8_t* buf, uint32_t* value) { + SB_DCHECK(buf); + SB_DCHECK(value); + *value = 0; + bool error = true; + size_t i = 0; + for (; i < sizeof(uint32_t); ++i) { + uint8_t byte = buf[i]; + *value |= ((byte & 0x7f) << (i * 7)); + if (!(byte & 0x80)) { + error = false; + break; + } + } + + if (error) { + return -1; + } + return i + 1; + } + + // Reads a c-string into |str|. Returns the number of bytes read, capped to + // 128 bytes, or -1 on error. + int ReadStringInternal(const uint8_t* buf, std::string* str) { + SB_DCHECK(buf); + + int remaining_size = static_cast(size_) - pos_; + if (remaining_size <= 0) { + return -1; + } + + // The size of the string is capped to 128 bytes. + const int kMaxStringSize = 128; + int str_size = std::min(remaining_size, kMaxStringSize); + str->clear(); + + size_t bytes_read = 0; + while (bytes_read < str_size && buf[bytes_read] != '\0') { + bytes_read++; + } + + if (bytes_read == str_size) { + if (buf[bytes_read - 1] != '\0') { + return -1; + } + } else { + if (buf[bytes_read] != '\0') { + return -1; + } + } + + if (bytes_read > 0) { + str->resize(bytes_read); + std::memcpy(str->data(), reinterpret_cast(buf), bytes_read); + } + + // Account for null terminator byte. + return ++bytes_read; + } + + int pos_ = 0; + const uint8_t* buf_; + const size_t size_ = 0; +}; + +IamfBufferParser::IamfBufferParser() {} + +bool IamfBufferParser::ParseInputBuffer( + const scoped_refptr& input_buffer, + IamfBufferInfo* info, + const bool prefer_binaural_audio, + const bool prefer_surround_audio) { + SB_DCHECK(info); + SB_DCHECK(input_buffer->data()); + SB_DCHECK(!(prefer_binaural_audio && prefer_surround_audio)); + RCHECK(ParseInputBufferInternal(input_buffer, info, prefer_binaural_audio, + prefer_surround_audio)); + return true; +} + +bool IamfBufferParser::ParseInputBufferInternal( + const scoped_refptr& input_buffer, + IamfBufferInfo* info, + const bool prefer_binaural_audio, + const bool prefer_surround_audio) { + BufferReader reader(input_buffer->data(), input_buffer->size()); + + while (!info->is_valid() && reader.pos() < reader.size()) { + RCHECK(ParseDescriptorOBU(&reader, info, prefer_binaural_audio, + prefer_surround_audio)); + } + + info->data_size = reader.size() - info->config_obus_size; + info->config_obus.assign(reader.buf(), reader.buf() + info->config_obus_size); + info->data.assign(reader.buf() + info->config_obus_size, + reader.buf() + reader.size()); + + return true; +} + +bool IamfBufferParser::ParseDescriptorOBU(BufferReader* reader, + IamfBufferInfo* info, + const bool prefer_binaural_audio, + const bool prefer_surround_audio) { + SB_DCHECK(reader); + uint8_t obu_type = 0; + uint32_t obu_size = 0; + if (!ParseOBUHeader(reader, &obu_type, &obu_size)) { + SB_DLOG(ERROR) << "Error parsing OBU header"; + return false; + } + + int next_obu_pos = reader->pos() + obu_size; + + switch (static_cast(obu_type)) { + case kObuTypeCodecConfig: + RCHECK(ParseCodecConfigOBU(reader, info)); + break; + case kObuTypeAudioElement: + RCHECK(ParseAudioElementOBU(reader, info, prefer_binaural_audio, + prefer_surround_audio)); + break; + case kObuTypeSequenceHeader: + break; + case kObuTypeMixPresentation: + RCHECK(ParseMixPresentationOBU(reader, info, prefer_binaural_audio, + prefer_surround_audio)); + break; + default: + // Once an OBU is read that is not a descriptor, descriptor parsing is + // assumed to be complete. + SB_DCHECK(info->is_valid()); + return true; + } + + // Skip to the next OBU. + const size_t remaining_size = next_obu_pos - reader->pos(); + RCHECK(reader->SkipBytes(remaining_size)); + info->config_obus_size = reader->pos(); + return true; +} + +bool IamfBufferParser::ParseOBUHeader(BufferReader* reader, + uint8_t* obu_type, + uint32_t* obu_size) const { + uint8_t header_flags; + RCHECK(reader->Read1(&header_flags)); + *obu_type = (header_flags >> 3) & 0x1f; + + const bool obu_redundant_copy = (header_flags >> 2) & 1; + const bool obu_trimming_status_flag = (header_flags >> 1) & 1; + const bool obu_extension_flag = header_flags & 1; + + *obu_size = 0; + + RCHECK(reader->ReadLeb128(obu_size)); + + // |obu_size| contains the size of the OBU after its own field. + // If either of the flags are set, subtract the number of bytes read + // from the flags from |obu_size| before returning to ParseDescriptorOBU(). + size_t reader_pos_before_flags = reader->pos(); + + if (obu_trimming_status_flag) { + RCHECK(reader->SkipLeb128()); + RCHECK(reader->SkipLeb128()); + } + + if (obu_extension_flag) { + RCHECK(reader->SkipLeb128()); + } + + size_t flag_bytes_read = reader->pos() - reader_pos_before_flags; + if (flag_bytes_read >= *obu_size) { + return false; + } + *obu_size -= flag_bytes_read; + return true; +} + +bool IamfBufferParser::ParseCodecConfigOBU(BufferReader* reader, + IamfBufferInfo* info) { + RCHECK(reader->SkipLeb128()); + + uint32_t codec_id = 0; + RCHECK(reader->Read4(&codec_id)); + + RCHECK(reader->ReadLeb128(&info->num_samples)); + + // audio_roll_distance + RCHECK(reader->SkipBytes(2)); + + const int kOpusSampleRate = 48000; + switch (codec_id) { + case kFourccOpus: + info->sample_rate = kOpusSampleRate; + break; + case kFourccIpcm: { + // sample_format_flags + RCHECK(reader->SkipBytes(1)); + + // sample_size + RCHECK(reader->SkipBytes(1)); + + uint32_t sample_rate_unsigned; + RCHECK(reader->Read4(&sample_rate_unsigned)); + info->sample_rate = static_cast(sample_rate_unsigned); + break; + } + default: + SB_NOTREACHED(); + return false; + } + + return true; +} + +bool IamfBufferParser::ParseAudioElementOBU(BufferReader* reader, + IamfBufferInfo* info, + const bool prefer_binaural_audio, + const bool prefer_surround_audio) { + uint32_t audio_element_id; + RCHECK(reader->ReadLeb128(&audio_element_id)); + + uint8_t audio_element_type; + RCHECK(reader->Read1(&audio_element_type)); + audio_element_type = audio_element_type >> 5; + + // codec_config_id + RCHECK(reader->SkipLeb128()); + + uint32_t num_substreams; + RCHECK(reader->ReadLeb128(&num_substreams)); + + for (int i = 0; i < num_substreams; ++i) { + // audio_substream_id + RCHECK(reader->SkipLeb128()); + } + + uint32_t num_parameters; + RCHECK(reader->ReadLeb128(&num_parameters)); + + for (int i = 0; i < num_parameters; ++i) { + uint32_t param_definition_type; + RCHECK(reader->ReadLeb128(¶m_definition_type)); + + if (param_definition_type == IAMF_PARAMETER_TYPE_DEMIXING) { + SkipParamDefinition(reader); + // DemixingParamDefintion + RCHECK(reader->SkipBytes(1)); + } else if (param_definition_type == IAMF_PARAMETER_TYPE_RECON_GAIN) { + SkipParamDefinition(reader); + } else if (param_definition_type > 2) { + uint32_t param_definition_size; + RCHECK(reader->ReadLeb128(¶m_definition_size)); + RCHECK(reader->SkipBytes(param_definition_size)); + } + } + + if (static_cast(audio_element_type) == + AUDIO_ELEMENT_CHANNEL_BASED && + (prefer_binaural_audio || prefer_surround_audio)) { + // Parse ScalableChannelLayoutConfig for binaural and surround + // loudspeaker layouts + uint8_t num_layers; + RCHECK(reader->Read1(&num_layers)); + num_layers = num_layers >> 5; + // Read ChannelAudioLayerConfigs + for (int i = 0; i < static_cast(num_layers); ++i) { + uint8_t loudspeaker_layout; + bool output_gain_is_present_flag; + RCHECK(reader->Read1(&loudspeaker_layout)); + output_gain_is_present_flag = (loudspeaker_layout >> 3) & 0x01; + loudspeaker_layout = loudspeaker_layout >> 4; + if (loudspeaker_layout == IA_CHANNEL_LAYOUT_BINAURAL) { + binaural_audio_element_ids_.insert(audio_element_id); + } else if (loudspeaker_layout > IA_CHANNEL_LAYOUT_STEREO && + loudspeaker_layout < IA_CHANNEL_LAYOUT_COUNT) { + surround_audio_element_ids_.insert(audio_element_id); + } + + // substream_count and coupled_substream_count + RCHECK(reader->SkipBytes(2)); + + if (output_gain_is_present_flag) { + // output_gain_flags and output_gain + RCHECK(reader->SkipBytes(3)); + } + + if (i == 1 && loudspeaker_layout == static_cast(15)) { + // expanded_loudspeaker_layout + RCHECK(reader->SkipBytes(1)); + } + } + } + return true; +} + +bool IamfBufferParser::ParseMixPresentationOBU( + BufferReader* reader, + IamfBufferInfo* info, + const bool prefer_binaural_audio, + const bool prefer_surround_audio) { + uint32_t mix_presentation_id; + RCHECK(reader->ReadLeb128(&mix_presentation_id)); + + uint32_t count_label; + RCHECK(reader->ReadLeb128(&count_label)); + for (int i = 0; i < count_label; ++i) { + // language_label; + RCHECK(reader->SkipString()); + } + + for (int i = 0; i < count_label; ++i) { + // MixPresentationAnnotations; + RCHECK(reader->SkipString()); + } + + uint32_t num_sub_mixes; + RCHECK(reader->ReadLeb128(&num_sub_mixes)); + for (int i = 0; i < num_sub_mixes; ++i) { + uint32_t num_audio_elements; + RCHECK(reader->ReadLeb128(&num_audio_elements)); + for (int j = 0; j < num_audio_elements; ++j) { + uint32_t audio_element_id; + RCHECK(reader->ReadLeb128(&audio_element_id)); + + // Set a mix presentation for binaural or surround streams. The mix + // presentation is chosen if there exists an audio element that has + // the qualities it requires - such as an audio element with a + // binaural loudspeaker layout. + if (!info->mix_presentation_id.has_value() || + (prefer_binaural_audio && + binaural_mix_selection_ > kBinauralMixSelectionLoudspeakerLayout)) { + if (prefer_binaural_audio && + binaural_audio_element_ids_.find(audio_element_id) != + binaural_audio_element_ids_.end()) { + info->mix_presentation_id = mix_presentation_id; + binaural_mix_selection_ = kBinauralMixSelectionLoudspeakerLayout; + } else if (prefer_surround_audio && + surround_audio_element_ids_.find(audio_element_id) != + surround_audio_element_ids_.end()) { + info->mix_presentation_id = mix_presentation_id; + } + } + + for (int k = 0; k < count_label; ++k) { + // MixPresentationElementAnnotatoions + RCHECK(reader->SkipString()); + } + + // The following fields are for the RenderingConfig + // headphones_rendering_mode + RCHECK(reader->SkipBytes(1)); + uint32_t rendering_config_extension_size; + RCHECK(reader->ReadLeb128(&rendering_config_extension_size)); + // rendering_config_extension_bytes + RCHECK(reader->SkipBytes(rendering_config_extension_size)); + + // The following fields are for the ElementMixConfig + SkipParamDefinition(reader); + // default_mix_gain + RCHECK(reader->SkipBytes(2)); + } + + // The following fields are for the OutputMixConfig + SkipParamDefinition(reader); + // default_mix_gain + RCHECK(reader->SkipBytes(2)); + + uint32_t num_layouts; + RCHECK(reader->ReadLeb128(&num_layouts)); + for (int j = 0; j < num_layouts; ++j) { + uint8_t layout_type; + RCHECK(reader->Read1(&layout_type)); + layout_type = layout_type >> 6; + // If a binaural mix presentation is preferred and the mix + // presentation id has not yet been set, set the mix presentation id + // if the current mix presentation has a binaural loudness layout. The + // mix presentation id will change if a different mix presentation is + // found that uses an audio element with a binaural loudspeaker + // layout, as that is higher priority. + if (static_cast(layout_type) == IAMF_LAYOUT_TYPE_BINAURAL && + prefer_binaural_audio && + (!info->mix_presentation_id.has_value() || + binaural_mix_selection_ > kBinauralMixSelectionLoudnessLayout)) { + info->mix_presentation_id = mix_presentation_id; + binaural_mix_selection_ = kBinauralMixSelectionLoudnessLayout; + } + + // The following fields are for the LoudnessInfo + uint8_t info_type; + RCHECK(reader->Read1(&info_type)); + // integrated_loudness and digital_loudness + RCHECK(reader->SkipBytes(4)); + if (info_type & 1) { + // true_peak + RCHECK(reader->SkipBytes(2)); + } + if (info_type & 2) { + uint8_t num_anchored_loudness; + RCHECK(reader->Read1(&num_anchored_loudness)); + for (uint8_t k = 0; k < num_anchored_loudness; ++k) { + // anchor_element and anchored_loudness + RCHECK(reader->SkipBytes(3)); + } + } + if ((info_type & 0b11111100) > 0) { + uint32_t info_type_size; + RCHECK(reader->ReadLeb128(&info_type_size)); + // info_type_bytes + RCHECK(reader->SkipBytes(info_type_size)); + } + } + } + + // If the mix presentation id is unassigned at this point, the stream is + // stereo, or a proper mix presentation for binaural or surround preferred + // streams hasn't yet been parsed. Default to the first read mix + // presentation in case a preferred mix does not exist. + if (!info->mix_presentation_id.has_value()) { + info->mix_presentation_id = mix_presentation_id; + } + + return true; +} + +bool IamfBufferParser::SkipParamDefinition(BufferReader* reader) const { + // parameter_id + RCHECK(reader->SkipLeb128()); + // parameter_rate + RCHECK(reader->SkipLeb128()); + uint8_t param_definition_mode; + RCHECK(reader->Read1(¶m_definition_mode)); + param_definition_mode = param_definition_mode >> 7; + if (param_definition_mode == static_cast(0)) { + // duration + RCHECK(reader->SkipLeb128()); + uint32_t constant_subblock_duration; + RCHECK(reader->ReadLeb128(&constant_subblock_duration)); + if (constant_subblock_duration == 0) { + uint32_t num_subblocks; + RCHECK(reader->ReadLeb128(&num_subblocks)); + for (int i = 0; i < num_subblocks; ++i) { + // subblock_duration + RCHECK(reader->SkipLeb128()); + } + } + } + return true; +} + +} // namespace libiamf +} // namespace shared +} // namespace starboard diff --git a/starboard/shared/libiamf/iamf_buffer_parser.h b/starboard/shared/libiamf/iamf_buffer_parser.h new file mode 100644 index 000000000000..83154fd0825a --- /dev/null +++ b/starboard/shared/libiamf/iamf_buffer_parser.h @@ -0,0 +1,109 @@ +// Copyright 2024 The Cobalt Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef STARBOARD_SHARED_LIBIAMF_IAMF_BUFFER_PARSER_H_ +#define STARBOARD_SHARED_LIBIAMF_IAMF_BUFFER_PARSER_H_ + +#include +#include +#include +#include + +#include "starboard/common/log.h" +#include "starboard/common/ref_counted.h" +#include "starboard/shared/internal_only.h" +#include "starboard/shared/starboard/player/input_buffer_internal.h" + +namespace starboard { +namespace shared { +namespace libiamf { + +class BufferReader; + +// TODO: Skip parsing if the OBUs are redundant +class IamfBufferParser { + public: + typedef ::starboard::shared::starboard::player::InputBuffer InputBuffer; + + struct IamfBufferInfo { + bool is_valid() const { + return mix_presentation_id.has_value() && sample_rate > 0 && + num_samples > 0; + } + + uint32_t num_samples; + int sample_rate; + std::optional mix_presentation_id; + std::vector config_obus; + size_t config_obus_size; + std::vector data; + size_t data_size; + const scoped_refptr input_buffer; + }; + + IamfBufferParser(); + + bool ParseInputBuffer(const scoped_refptr& input_buffer, + IamfBufferInfo* info, + const bool prefer_binaural_audio, + const bool prefer_surround_audio); + + private: + // Used in the selection of a binaural mix presentation, using the strategy + // defined in + // https://aomediacodec.github.io/iamf/#processing-mixpresentation-selection. + // The preferred methods of choosing a binaural mix presentation are listed + // from high to low. + enum BinauralMixSelection { + kBinauralMixSelectionLoudspeakerLayout, + kBinauralMixSelectionLoudnessLayout, + kBinauralMixSelectionNotFound + }; + + bool ParseInputBufferInternal(const scoped_refptr& input_buffer, + IamfBufferInfo* info, + const bool prefer_binaural_audio, + const bool prefer_surround_audio); + // Reads a single Descriptor OBU. Returns false on error. + bool ParseDescriptorOBU(BufferReader* reader, + IamfBufferInfo* info, + const bool prefer_binaural_audio, + const bool prefer_surround_audio); + bool ParseOBUHeader(BufferReader* reader, + uint8_t* obu_type, + uint32_t* obu_size) const; + bool ParseCodecConfigOBU(BufferReader* reader, IamfBufferInfo* info); + bool ParseAudioElementOBU(BufferReader* reader, + IamfBufferInfo* info, + const bool prefer_binaural_audio, + const bool prefer_surround_audio); + bool ParseMixPresentationOBU(BufferReader* reader, + IamfBufferInfo* info, + const bool prefer_binaural_audio, + const bool prefer_surround_audio); + // Helper function to skip parsing ParamDefinitions found in the config OBUs + // https://aomediacodec.github.io/iamf/v1.0.0-errata.html#paramdefinition + bool SkipParamDefinition(BufferReader* reader) const; + + std::unordered_set binaural_audio_element_ids_; + std::unordered_set surround_audio_element_ids_; + + BinauralMixSelection binaural_mix_selection_ = kBinauralMixSelectionNotFound; +}; + +} // namespace libiamf +} // namespace shared +} // namespace starboard + +#endif // STARBOARD_SHARED_LIBIAMF_IAMF_BUFFER_PARSER_H_